# Rproject11_Tablets_TwoSampleT.r
# 1.0 Read in data ----
#       See Example 12.2A of Rice
#
#   Measurements of chlorpheniramine maleate in tablets made by seven laboratories
#     Nominal dosage equal to 4mg
#     10 measurements per laboratory
#
#     Sources of variability
#         within labs
#         between labs

if (FALSE){tablets1=read.table(file="Rice 3e Datasets/ASCII Comma/Chapter 12/tablets1.txt",
sep=",",stringsAsFactors = FALSE, quote="\'",
}
# Read in matrix and label columns
sep=",",stringsAsFactors = FALSE, skip=1,
dimnames(tablets1)[[2]]<-paste("Lab",c(1:7),sep="")

tablets1
##    Lab1 Lab2 Lab3 Lab4 Lab5 Lab6 Lab7
## 1  4.13 3.86 4.00 3.88 4.02 4.02 4.00
## 2  4.07 3.85 4.02 3.88 3.95 3.86 4.02
## 3  4.04 4.08 4.01 3.91 4.02 3.96 4.03
## 4  4.07 4.11 4.01 3.95 3.89 3.97 4.04
## 5  4.05 4.08 4.04 3.92 3.91 4.00 4.10
## 6  4.04 4.01 3.99 3.97 4.01 3.82 3.81
## 7  4.02 4.02 4.03 3.92 3.89 3.98 3.91
## 8  4.06 4.04 3.97 3.90 3.89 3.99 3.96
## 9  4.10 3.97 3.98 3.97 3.99 4.02 4.05
## 10 4.04 3.95 3.98 3.90 4.00 3.93 4.06
#   Replicate Figure 12.1 of Rice
boxplot(tablets1)

#   2.1 Define a function to implement Two-sample t test ----
fcn.TwoSampleTTest<-function(x,y, conf.level=0.95, digits0=4){
# conf.level=0.95; digits0=4
x.mean=mean(x)
y.mean=mean(y)
x.var=var(x)
y.var=var(y)
x.n=length(x)
y.n=length(y)
sigmasq.pooled=((x.n-1)*x.var + (y.n-1)*y.var)/(x.n + y.n -2)
# Print out statistics for each sample and pooled estimate of standard deviation
cat("Sample Statistics:\n")
print(t(data.frame(
x.mean,
x.n,
x.stdev=sqrt(x.var),
y.mean,
y.n,
y.stdev=sqrt(y.var),
sigma.pooled=sqrt(sigmasq.pooled)
)))

cat("\n t test Computations")
mean.diff=x.mean-y.mean
mean.diff.sterr=sqrt(sigmasq.pooled)*sqrt( 1/x.n + 1/y.n)
mean.diff.tstat=mean.diff/mean.diff.sterr
tstat.df=x.n + y.n -2
mean.diff.tstat.pvalue=2*pt(-abs(mean.diff.tstat), df=tstat.df)

print(t(data.frame(
mean.diff=mean.diff,
mean.diff.sterr=mean.diff.sterr,
tstat=mean.diff.tstat,
df=tstat.df,
pvalue=mean.diff.tstat.pvalue)))

# Confidence interval for difference
alphahalf=(1-conf.level)/2.
t.critical=qt(1-alphahalf, df=tstat.df)

mean.diff.confInterval=mean.diff +c(-1,1)*mean.diff.sterr*t.critical
cat(paste("\n", 100*conf.level, " Percent Confidence Interval:\n "))
cat(paste(c(
"[", round(mean.diff.confInterval[1],digits=digits0),
",", round(mean.diff.confInterval[2],digits=digits0),"]"), collapse=""))

#    invisible(return(NULL))
}

#  2.2 Apply function fcn.TwoSampleTTest ----

# Compare Lab7 to Lab4
fcn.TwoSampleTTest(tablets1$Lab7, tablets1$Lab4)
## Sample Statistics:
##                     [,1]
## x.mean        3.99800000
## x.n          10.00000000
## x.stdev       0.08482662
## y.mean        3.92000000
## y.n          10.00000000
## y.stdev       0.03333333
## sigma.pooled  0.06444636
##
##  t test Computations                       [,1]
## mean.diff        0.07800000
## mean.diff.sterr  0.02882129
## tstat            2.70633286
## df              18.00000000
## pvalue           0.01445587
##
##  95  Percent Confidence Interval:
##  [0.0174,0.1386]
# Compare output to t.test
t.test(tablets1$Lab7, tablets1$Lab4, var.equal=TRUE)
##
##  Two Sample t-test
##
## data:  tablets1$Lab7 and tablets1$Lab4
## t = 2.7063, df = 18, p-value = 0.01446
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.01744872 0.13855128
## sample estimates:
## mean of x mean of y
##     3.998     3.920
#   3.1 Conduct One-Way ANOVA ----
#       Analysis of Variance with R function aov()

#   Create vector variables
#     yvec = Dependent variable
yvec=as.vector(data.matrix(tablets1))

#     xvec.factor.Lab = Independent variable (of factor type in R)

xvec.factor.Lab=as.factor(as.vector(col(data.matrix(tablets1))))
table(xvec.factor.Lab)
## xvec.factor.Lab
##  1  2  3  4  5  6  7
## 10 10 10 10 10 10 10
plot(as.numeric(xvec.factor.Lab), yvec)

# One-way ANOVA using r function aov()
tablets1.aov<-aov(yvec ~ xvec.factor.Lab)
tablets1.aov.summary<-summary(tablets1.aov)

# Replicate Table in Example 12.2.A, p. 483 of Rice
print(tablets1.aov.summary)
##                 Df Sum Sq  Mean Sq F value   Pr(>F)
## xvec.factor.Lab  6 0.1247 0.020790    5.66 9.45e-05 ***
## Residuals       63 0.2314 0.003673
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Construct model tables

tablets1.aov.model.tables<-model.tables(tablets1.aov, type="means",se=TRUE)
print(tablets1.aov.model.tables)
## Tables of means
## Grand mean
##
## 3.984571
##
##  xvec.factor.Lab
## xvec.factor.Lab
##     1     2     3     4     5     6     7
## 4.062 3.997 4.003 3.920 3.957 3.955 3.998
##
## Standard errors for differences of means
##         xvec.factor.Lab
##                  0.0271
## replic.              10
# Validate standard error for difference of means
ResidualsMeanSq=0.003673
J=nrow(tablets1)
sqrt(ResidualsMeanSq/J) # standard error of each mean
## [1] 0.01916507
sqrt(ResidualsMeanSq/J)*sqrt(2) # standard error of difference of two means
## [1] 0.02710351
#tablets1.aov.model.tables<-model.tables(tablets1.aov, type="effects",se=TRUE)
#print(tablets1.aov.model.tables)

#   3.2 Confidence intervals for pairwise differences ----
# Create confidence intervals on differences between means
#   Studentized range statistic
#   Tukey's 'Honest Significant Difference' method
# Apply R function TukeyHSD():
TukeyHSD(tablets1.aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
##
## Fit: aov(formula = yvec ~ xvec.factor.Lab)
##
## $xvec.factor.Lab ## diff lwr upr p adj ## 2-1 -0.065 -0.147546752 0.017546752 0.2165897 ## 3-1 -0.059 -0.141546752 0.023546752 0.3226101 ## 4-1 -0.142 -0.224546752 -0.059453248 0.0000396 ## 5-1 -0.105 -0.187546752 -0.022453248 0.0045796 ## 6-1 -0.107 -0.189546752 -0.024453248 0.0036211 ## 7-1 -0.064 -0.146546752 0.018546752 0.2323813 ## 3-2 0.006 -0.076546752 0.088546752 0.9999894 ## 4-2 -0.077 -0.159546752 0.005546752 0.0830664 ## 5-2 -0.040 -0.122546752 0.042546752 0.7578129 ## 6-2 -0.042 -0.124546752 0.040546752 0.7140108 ## 7-2 0.001 -0.081546752 0.083546752 1.0000000 ## 4-3 -0.083 -0.165546752 -0.000453248 0.0478900 ## 5-3 -0.046 -0.128546752 0.036546752 0.6204148 ## 6-3 -0.048 -0.130546752 0.034546752 0.5720976 ## 7-3 -0.005 -0.087546752 0.077546752 0.9999964 ## 5-4 0.037 -0.045546752 0.119546752 0.8178759 ## 6-4 0.035 -0.047546752 0.117546752 0.8533629 ## 7-4 0.078 -0.004546752 0.160546752 0.0760155 ## 6-5 -0.002 -0.084546752 0.080546752 1.0000000 ## 7-5 0.041 -0.041546752 0.123546752 0.7362355 ## 7-6 0.043 -0.039546752 0.125546752 0.6912252 # # Compare Tukey HSD confidence interval for Lab7 vs Lab4 fcn.TwoSampleTTest(tablets1$Lab7,tablets1$Lab4) ## Sample Statistics: ## [,1] ## x.mean 3.99800000 ## x.n 10.00000000 ## x.stdev 0.08482662 ## y.mean 3.92000000 ## y.n 10.00000000 ## y.stdev 0.03333333 ## sigma.pooled 0.06444636 ## ## t test Computations [,1] ## mean.diff 0.07800000 ## mean.diff.sterr 0.02882129 ## tstat 2.70633286 ## df 18.00000000 ## pvalue 0.01445587 ## ## 95 Percent Confidence Interval: ## [0.0174,0.1386] # Note P-value from t-test is 0.0144 while TukeyHSD is 0.076 # 4. Implement One-Way Anova with lm() ---- lmfit.oneway.Lab=lm(yvec ~ xvec.factor.Lab,x=TRUE, y=TRUE) lmfit.oneway.Lab.summary<-summary(lmfit.oneway.Lab) print(lmfit.oneway.Lab.summary) ## ## Call: ## lm(formula = yvec ~ xvec.factor.Lab, x = TRUE, y = TRUE) ## ## Residuals: ## Min 1Q Median 3Q Max ## -0.1880 -0.0245 0.0060 0.0410 0.1130 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 4.06200 0.01917 211.948 < 2e-16 *** ## xvec.factor.Lab2 -0.06500 0.02710 -2.398 0.019449 * ## xvec.factor.Lab3 -0.05900 0.02710 -2.177 0.033248 * ## xvec.factor.Lab4 -0.14200 0.02710 -5.239 1.99e-06 *** ## xvec.factor.Lab5 -0.10500 0.02710 -3.874 0.000257 *** ## xvec.factor.Lab6 -0.10700 0.02710 -3.948 0.000201 *** ## xvec.factor.Lab7 -0.06400 0.02710 -2.361 0.021316 * ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 0.06061 on 63 degrees of freedom ## Multiple R-squared: 0.3503, Adjusted R-squared: 0.2884 ## F-statistic: 5.66 on 6 and 63 DF, p-value: 9.453e-05 # Compare regression output from lm() with anova output from aov() print(tablets1.aov.summary) ## Df Sum Sq Mean Sq F value Pr(>F) ## xvec.factor.Lab 6 0.1247 0.020790 5.66 9.45e-05 *** ## Residuals 63 0.2314 0.003673 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # Residual standard error equals root (Mean Sq for Residuals) print(lmfit.oneway.Lab.summary$sigma)
## [1] 0.06060541
print((lmfit.oneway.Lab.summary\$sigma)^2)
## [1] 0.003673016
# Compare to Mean Sq for Residuals in tablets1.aov.summary

# F-statistic/degrees of freedom/P-values are same