# Project4_Bayesian_HardyWeinberg.r

# Multinomial Counts:  Hardy Weinberg Equilibrium

# 1.0 Trinomial data from Example 8.5.1.A, p. 273 of Rice.
#
# x=(X1,X2,X3)  # counts of multinomial cells (1,2,3)

# Erythrocyte antigen blood types of n=1029 Hong Kong population in 1937.
x<-c(342,500,187)

# Two estimators:  Multinomial MLE and Binomial(X3) MLE
x.n=sum(x)
x.theta.mle=(2*x[3] + x[2])/(2*sum(x))
x.theta.binomialx3=sqrt(x[3]/sum(x))

print(x.theta.mle)
## [1] 0.4246842
print(x.theta.binomialx3)
## [1] 0.4262978
# 2.1 R functions
# function computing cell probabilities given Hardy-Weinberg theta parameter
fcn.probs.hardyweinberg<-function(theta0){
  probs=c((1-theta0)**2, 2*theta0*(1-theta0), theta0**2)
  return(probs)
}

# 3.  Bayesian inference with uniform prior

dgrid=.001
theta.grid=seq(0,1,dgrid)

# Compute likelihood of x 

# First, compute matrix with multinomial probabilities (3 columns) for each theta (row)
probs.grid=t(apply(as.matrix(theta.grid),1, fcn.probs.hardyweinberg))


plot(theta.grid, probs.grid[,1],type="l",col='red', xlab="theta",
     ylab="Cell Probability",
     main="Hardy Weinberg Cell Probabilities \n 1 (Red), 2 (Green), 3 (Blue)")


lines(theta.grid, probs.grid[,2],type="l",col='green')

lines(theta.grid, probs.grid[,3],type="l",col='blue')

#  Compute likelihood function given x at theta.grid values

# Issue: scaling of likelihood; use log scale and normalize 
loglike.grid<-( log(probs.grid) %*% as.matrix(x))
plot(theta.grid, loglike.grid)

loglike.grid.norm0<-loglike.grid - max(loglike.grid)
plot(theta.grid, loglike.grid.norm0)

#
# Convert from Log scale to Original Scale of Likelihood
like.grid.norm0 =exp(loglike.grid.norm0) 
like.grid.norm0<-(1/dgrid)*like.grid.norm0/sum(like.grid.norm0)
plot(theta.grid,like.grid.norm0, type="l")

length(like.grid.norm0)
## [1] 1001
length(theta.grid)
## [1] 1001
# For uniform prior, the posterior is the normalized likelihiood
plot(theta.grid, like.grid.norm0[,1],type="l", 
     xlab="theta",
     ylab="Density", 
     xlim=c(.3,.6),
     main="Figure 8.10 Posterior Density\nHardy-Weinberg Model theta" )

# Compute 95% posterior predictive interval (numerically)
index.quantile.025=which(cumsum(like.grid.norm0*dgrid) >.025)[1]
posterior.llimit=theta.grid[index.quantile.025]
index.quantile.975=which(cumsum(like.grid.norm0*dgrid) >.975)[1]
posterior.ulimit=theta.grid[index.quantile.975]

abline(v=c(posterior.llimit, posterior.ulimit), col='blue')
abline(v=x.theta.mle, col='green')

print(x.theta.mle)
## [1] 0.4246842
print(c(posterior.llimit, posterior.ulimit))
## [1] 0.403 0.446