# ANOVA example for class 19-prep

#DATA ----
T1 = c(2,4,1,5,3)
T2 = c(3,4,6,1,4)
T3 = c(2,1,3,3,5)

#One way ANOVA using aov() ----
procedure = c(rep('T1',length(T1)),rep('T2',length(T2)),rep('T3',length(T3)))
pain = c(T1,T2,T3)
data.pain = data.frame(procedure,pain)
print(data.pain)
aov.data = aov(pain~procedure,data=data.pain)  #do the analysis of variance
summary(aov.data)                                    #show the summary table
print(model.tables(aov.data,"means"),digits=3)       #report the means and the number of subjects/cell
boxplot(pain~procedure,data=data.pain)        #graphical summarydatafilename

# One way ANOVA by hand ---- 
# Make sure all the groups are the same size 
if (length(T1) != length(T2) || length(T2) != length(T3))
  stop("lengths are not equal", call. = F)
m = length(T1)
n = 3
group.means = c(mean(T1),mean(T2), mean(T3))
grandmean = mean(group.means)
group.vars = c(var(T1),var(T2), var(T3))
betweenGroupVar = m*var(group.means)
aveWithinGroupVar = mean(group.vars)
fstat = betweenGroupVar/aveWithinGroupVar
p = 1-pf(fstat, n-1,n*(m-1))
print(fstat)
print(p)

# Plot F distributions ----
makepdf = FALSE
fname = '../pdftex/img/class19-fdist.pdf'
if (makepdf)
  pdf(fname, width=5.5,height= 3)
par(mar=c(3.5,2.5,.4,.1))
txtxpos = 7.5
txtwd = .5
linewd = .8
cex.txt = 1.1

x = seq(0,10,.01)
plot(c(0,10),c(0,1), type='n', mgp=c(2,1,0), xlab='x', ylab='', cex.lab=2)
df1 = 3; df2 = 4
col = 'red'
txtht = .9
lines(x,df(x,df1,df2), col=col, lwd=2)
s = sprintf('F(%d,%d)', df1, df2)
text(txtxpos, txtht, s, pos=2, cex=cex.txt)
lines(c(txtxpos+txtwd,txtxpos+txtwd+linewd),c(txtht,txtht), col=col,lwd=3)

df1 = 10; df2 = 15
col = 'blue'
txtht = .82
lines(x,df(x,df1,df2), col=col, lwd=2)
s = sprintf('F(%d,%d)', df1, df2)
text(txtxpos, txtht, s, pos=2, cex=cex.txt)
lines(c(txtxpos+txtwd,txtxpos+txtwd+linewd),c(txtht,txtht), col=col,lwd=3)

df1 = 30; df2 = 15
col = 'magenta'
txtht = .74
lines(x,df(x,df1,df2), col=col, lwd=2)
s = sprintf('F(%d,%d)', df1, df2)
text(txtxpos, txtht, s, pos=2, cex=cex.txt)
lines(c(txtxpos+txtwd,txtxpos+txtwd+linewd),c(txtht,txtht), col=col,lwd=3)

if (makepdf)
  dev.off()

# Mendel ----
n = 556
prob = c(9,3,3,1)/16
observed = c(315,108,102,31)
expected = n*prob

likRatioStat = 2*sum(observed*log(observed/expected))
p_likRatio = 1-pchisq(likRatioStat,3)
print(likRatioStat)
print(p_likRatio)

X2 = sum((observed-expected)^2/expected)
p_pearson = 1- pchisq(X2,3)
print(observed)
print(expected)
print(X2)
print(p_pearson)


# reading question ----
outcomes = 0:6
df = length(outcomes) - 1
observed = c(5,11,13,7,2,1,1)
prob = dbinom(outcomes, 6,.4)
n = sum(observed)
expectedCounts = n*prob
likratiostat = 2*sum(observed*log(observed/expectedCounts))
pval = 1-pchisq(likratiostat, df)
print(likratiostat)
print(pval)