# problem 1 ----
# 1b ----
# The following computes probabilities by hand 
# x = #tails before the 3rd head, prob of heads = .5
alpha = .05
x = 0
tot = 0
done = FALSE
p = choose(x+2,2)*.5^(x+3)
while (!done)
{
  tot = tot + choose(x+2,2)*.5^(x+3)
  cat(x, '  ', tot)
  if (tot > 1-alpha)
  {
    cat('\n********* Critical value = ',x+1, ' *********')
    criticalPoint.right = x+1
    done = TRUE
  }
  cat('\n')
  x = x+1
}
# OR we could use the negative binomial.
# Here we'll just see that they agree
testCP = qnbinom(1-alpha,3,.5)  + 1  # = 9
if (testCP != criticalPoint.right)
{
 stop('Discrepancy in the values of the critical points') 
}

# 1b Make the figure ----
makepdf = F
fname = '../pdftex/img/ps8-1b.pdf'
if (makepdf)
{
  wd = 4.5
  ht = 3
  scale = .8
  pdf(fname, width=wd*scale,height= ht*scale)
}
par(mar=c(3.5,2.5,.4,.3))
x = 0:20
y = dgeom(x,.5)
plot(x,y, pch=19, cex=.8, col='blue', ylab='', mgp=c(1.7,1,0))
#axis(1,pos=0,at=0:20)
lines(c(9,20),c(-.02,-.02), lwd=4, col='red')
if (makepdf)
  dev.off()

# problem 2 ----
x = c(1.76, -2.28, -0.56, 1.46, 0.59, 1.26, -1.94, -0.79, -0.86, -1.41,2.07, 1.30)
n = length(x)
sigma2 = 1
s2 = var(x)
X2 = (n-1)*s2/sigma2
p = 1 - pchisq(X2, n-1)

#Problem 3 ----
observed = c(7,13,12,9,9,13,11,10,16)
benford = c(.301,.176,.125,.097,.079,.067,.058,.051,.046)
n = sum(observed)
df = 8
sum(benford)
expected = benford*n
G = 2*sum(observed*log(observed/expected))
X2 = sum((observed-expected)^2/expected)
print(G)
print(X2)
1-pchisq(G,df)
1-pchisq(X2,df)

#problem 4 ----
#4a ----
x = c(-0.802,  0.457,  0.972,  0.044,  0.318, -1.380,  0.111, 
      -0.023, -0.700, -1.977, -0.497,  1.471, -1.314, -0.078, 
      -0.505,  0.583,  1.363, -1.863, -2.105,  0.488)

y = c(  9.019,  9.852,  7.947,  9.465, 10.060, 10.508,  9.506, 
        9.540, 10.218,  9.407, 11.455, 11.422,  7.698,  9.972, 
       10.928, 11.577, 10.376,  8.605,  9.347, 10.715)
var.test(x,y)

#4b ----
n = length(x)
m = length(y)
s2x = var(x)
s2y = var(y)
fstat = s2x/s2y
p = 2*min(pf(.9703, n-1,m-1))
fstat
p

#problem 5 ----
mns = c(1.32, 1.26, 1.53, 1.39)
v = c(.56, .80, .93, .82)
m = 351  # number of samples per group
n = length(mns) # number of groups
msb = m*var(mns)  # between group variance
msw = mean(v) # within group variance
fstat = msb/msw
df1 = n-1
df2 =  n*(m-1);
p = 1 - pf(fstat, df1,df2)

print(fstat)
print(p)

#Check using aov ----
#fake the data
m = 351
fd = rnorm(m,0,1)
fakedata = (fd-mean(fd))/sqrt(var(fd)) #mean 0 variance 1 data
x = c()
for (j in 1:4)
{
  x0 = c(rep(mns[j],m)+sqrt(v[j])*fakedata)
  x = c(x,x0)
  print(mean(x0))
  print(var(x0))
}
fact = c(rep('clean',m), rep('5-day',m), rep('10-day',m), rep('full',m))
d = data.frame(fact,x)
res = aov(x~fact,data=d)
summary(res)

# 10-day vs others by hand
m = 351
basej = 3
for (j in 1:4)
{
   ind = c(j,basej)
   n = length(ind) #two groups at a ti,e
   msb = m*var(mns[ind])  # between group variance
   msw = mean(v[ind]) # within group variance
   fstat = msb/msw
   df1 = n-1
   df2 =  n*(m-1);
   p_fstat = 1 - pf(fstat, df1,df2)
   
   #tstat
   sxy = sqrt((v[j]+v[basej])/m)
   tstat = (mns[basej]-mns[j])/sxy
   df = 2*m-2
   p_t_one_sided = 1-pt(abs(tstat),df)
   p_t_two_sided = 2*p_t_one_sided
   s = sprintf("%.5f, %.5f, %.5f, %.5f, %.5f, %.5f",
               tstat, p_t_one_sided, fstat, p_fstat, tstat^2, p_t_two_sided)
   print(s)
}