# This code computes 3 bootstrap statistics for the old # faithful data. # 1. A 90% CI for the median # 2. A 90% CI for the mean # 3. P(|xbar - mu| > 5. print('Old Faithful Data') # Number of bootstrap samples to generate nboot = 1000 #Make sure the working directory contains oldfaithful.txt ofl = read.table('oldfaithful.txt'); ofl_data = ofl[[1]] n = length(ofl_data) hist(ofl_data, nclass=20) #Sample median and mean oflMedian = median(ofl_data) oflMean = mean(ofl_data) # COMMENT: Could generate one sample at a time by doing everything in the for loop. #Generate nboot empirical bootstrap trials # each trial consists of n samples from the data x = sample(ofl_data, n*nboot, replace=TRUE) bootdata = matrix(x,nrow=n, ncol=nboot) #Compute the medians and means of the bootstrap trials bootMedians = rep(0,nboot) bootMeans = rep(0,nboot) for (j in 1:nboot) { bootMedians[j] = median(bootdata[,j]) bootMeans[j] = mean(bootdata[,j]) } hist(bootMedians, nclass=20) hist(bootMeans, nclass=20) # Compute the .05 and .95 quantiles for the differences # Quantile has many methods of interpolating to compute the quantiles. # E.g for median, quantil(1:4, .5) = 2.5. We just go with the default method d_median05 = quantile(bootMedians - oflMedian, .05); d_median95 = quantile(bootMedians - oflMedian, .95); d_mean05 = quantile(bootMeans - oflMean, .05); d_mean95 = quantile(bootMeans - oflMean, .95); # Compute the 90% bootstrap confidence intervals CI_median = oflMedian - c(d_median95, d_median05) CI_mean = oflMean - c(d_mean95, d_mean05) # Compute the fraction of the bootstrap samples where #the |xbar_star - xbar| > 5 probDiff5 = sum(abs(bootMeans - oflMean) > 5)/nboot s = sprintf("Mean = %.3f, median = %.3f", oflMean, oflMedian) print(s) s = sprintf("CI_median: [%.3f, %.3f]", CI_median[1], CI_median[2]) print(s) s = sprintf("CI_mean: [%.3f, %.3f]", CI_mean[1], CI_mean[2]) print(s) s = sprintf("P(|xbar_star - xbaar| > 5) = %.3f", probDiff5) print(s)