# This code computes 3 bootstrap statistics for the old
# faithful data.
# 1. A 90% CI for the median
# 2. A 90% CI for the mean
# 3. P(|xbar - mu| > 5.
       
print('Old Faithful Data')
       
# Number of bootstrap samples to generate
nboot = 1000

#Make sure the working directory contains oldfaithful.txt
ofl = read.table('oldfaithful.txt');
ofl_data = ofl[[1]]
n = length(ofl_data) 
hist(ofl_data, nclass=20)       

#Sample median and mean
oflMedian = median(ofl_data)
oflMean = mean(ofl_data)

# COMMENT: Could generate one sample at a time by doing everything in the for loop.

#Generate nboot empirical bootstrap trials
# each trial consists of n samples from the data
x = sample(ofl_data, n*nboot, replace=TRUE)
bootdata = matrix(x,nrow=n, ncol=nboot)

#Compute the medians and means of the bootstrap trials
bootMedians = rep(0,nboot)
bootMeans = rep(0,nboot)
for (j in 1:nboot)
{
  bootMedians[j] = median(bootdata[,j])
  bootMeans[j] = mean(bootdata[,j])  
}
hist(bootMedians, nclass=20)       
hist(bootMeans, nclass=20)       

# Compute the .05 and .95 quantiles for the differences
# Quantile has many methods of interpolating to compute the quantiles.
# E.g for median, quantil(1:4, .5) = 2.5. We just go with the default method
d_median05 = quantile(bootMedians - oflMedian, .05);
d_median95 = quantile(bootMedians - oflMedian, .95);

d_mean05 = quantile(bootMeans - oflMean, .05);
d_mean95 = quantile(bootMeans - oflMean, .95);

# Compute the 90% bootstrap confidence intervals
CI_median = oflMedian - c(d_median95, d_median05)
CI_mean = oflMean - c(d_mean95, d_mean05)
       
# Compute the fraction of the bootstrap samples where 
#the  |xbar_star - xbar|  > 5 
probDiff5 = sum(abs(bootMeans - oflMean) > 5)/nboot 
     
s = sprintf("Mean = %.3f, median = %.3f", oflMean, oflMedian)
print(s)
s = sprintf("CI_median: [%.3f, %.3f]", CI_median[1], CI_median[2])
print(s)
s = sprintf("CI_mean: [%.3f, %.3f]", CI_mean[1], CI_mean[2])
print(s)
s = sprintf("P(|xbar_star - xbaar| > 5) = %.3f", probDiff5)
print(s)