# Rproject6_LRTest_Poisson.r

# Example 8.4.A:  Counts of asbestos fibers on filters
#       Steel et al. 1980

# 1.  Data ----
x=c(31,29,19,18,31,28,
34,27,34,30,16,18,
26,27,27,18,24,22,
28,24,21,17,24)

# x= rpois(n=100, lambda=24.91)
# 2.  Parameter Estimation of Poisson----

# Suppose x is a sample of size n=23 from a Poisson(lambda) distribution
#
#   2.1 Estimate of lambda (MOM and MLE are the same)

lambda.hat=mean(x)

print(lambda.hat)
## [1] 24.91304
#

par(mfcol=c(1,1))
#   2.2 Plot histograms of sample data

#     Vary argument nclass= for number of bins
hist(x, xlim=c(0,1.5*max(x)), probability=TRUE)

hist(x,nclass=15, xlim=c(0,1.5*max(x)), probability=TRUE)

hist(x,nclass=10, xlim=c(0,1.5*max(x)), probability=TRUE)
# Add plot of pmf function for fitted Poisson distribution
x.grid=seq(0,1.5*max(x),1)
x.probs=dpois(x.grid, lambda=lambda.hat)

lines(x.grid,x.probs, type="h", col='blue',lwd=4)

#
lambda.hat.sterror=sqrt(lambda.hat/length(x))

print(lambda.hat.sterror)
## [1] 1.040757
# 3.  Likelihood Ratio Test Based on Histogram

nclass0=10
hist.0<-hist(x,nclass=nclass0)

print(hist.0$counts) ## [1] 5 1 2 3 1 5 2 2 2 n.bins=length(hist.0$counts)

print(hist.0$breaks) ## [1] 16 18 20 22 24 26 28 30 32 34 # cdf values at the breaks hist.0.breaks.MLEFITTED.cdf=ppois(hist.0$breaks, lambda=lambda.hat)

# bin probabilities are the differences
hist.0.pbin=diff(hist.0.breaks.MLEFITTED.cdf)

####
#   3.1 Construct vectors of Observed and Expected counts

counts.Observed=hist.0$counts probs.mle=hist.0.pbin counts.sum=sum(counts.Observed) counts.Expected=counts.sum*probs.mle #labels.BloodType=c("M","MN","N") # 3.2 Compute LRStat # Conduct level alpha=0.05 test # Compute P-Value component.LRStat=2*counts.Observed*log(counts.Observed/counts.Expected) LRStat=sum(component.LRStat) print(LRStat) ## [1] 15.82175 # # Under Null Hypothesis, LRStat is a chi-square r.v. with # q=(m-1) -1 degrees of freedom # where m=number of bins # For level alpha test, determine critical value of LRStat alpha=.05 m=n.bins q=m-1-1 print(q) ## [1] 7 chisq.criticalValue=qchisq(p=1-alpha,df=q) print(chisq.criticalValue) ## [1] 14.06714 # Test rejected at alpha level (LRStat > chisq.criticalValue) # # Compute P-value of LRStat LRStat.pvalue=1-pchisq(LRStat, df=q) print(LRStat.pvalue) ## [1] 0.02679569 # 4. Pearson ChiSquare Test ---- # 4.1 Construct vectors of Observed and Expected counts counts.Observed=hist.0$counts

probs.mle=hist.0.pbin
counts.sum=sum(counts.Observed)

counts.Expected=counts.sum*probs.mle

#   4.2 Compute Pearson ChiSqStat
#       Conduct level alpha=0.05 test
#       Compute P-Value

component.ChiSqStat=((counts.Observed-counts.Expected)^2 )/counts.Expected
ChiSqStat=sum(component.ChiSqStat)
print(ChiSqStat)
## [1] 16.84007
#
# Under Null Hypothesis, LRStat is a chi-square r.v. with
#   q=(m-1) -1  degrees of freedom
#     where m=number of bins
# For level alpha test, determine critical value of LRStat
alpha=.05
m=n.bins
q=m-1-1
print(q)
## [1] 7
chisq.criticalValue=qchisq(p=1-alpha,df=q)
print(chisq.criticalValue)
## [1] 14.06714
#   Test accepted at alpha level (ChiSqStat < chisq.criticalValue)
#
# Compute P-value of ChiSqStat

ChiSqStat.pvalue=1-pchisq(ChiSqStat, df=q)

print(ChiSqStat.pvalue)
## [1] 0.01845707
############################
# 5.  Create Table for Both Tests ----
table.lrtests<-data.frame(
Observed=counts.Observed,
Expected=counts.Expected,
LRStat.j=component.LRStat,
ChiSqStat.j=component.ChiSqStat)

#   Add last row equal to sums of columns
#     Use r function apply(X=, MARGIN=, FUN=)
table.lrtests.0<-rbind(
table.lrtests,
t(as.matrix(apply(X=table.lrtests,MARGIN=2,FUN=sum))))
labels.tablerows<-paste("Bin_",c(1:n.bins),"[",
hist.0$breaks[1:(n.bins)],",", hist.0$breaks[2:(n.bins+1)],"]",sep="")
dimnames(table.lrtests.0)[[1]]<-c(labels.tablerows,"Total/Sum")

print(table.lrtests.0)
##              Observed   Expected   LRStat.j ChiSqStat.j
## Bin_1[16,18]        5  1.2812509 13.6160105 10.79343231
## Bin_2[18,20]        1  2.1902179 -1.5680021  0.64679349
## Bin_3[20,22]        2  3.0734068 -1.7185579  0.37489412
## Bin_4[22,24]        3  3.6030113 -1.0989460  0.10092185
## Bin_5[24,26]        1  3.5810485 -2.5513113  1.86029635
## Bin_6[26,28]        5  3.0554534  4.9250991  1.23754511
## Bin_7[28,30]        2  2.2621571 -0.4926865  0.03038089
## Bin_8[30,32]        2  1.4669015  1.2399793  0.19373762
## Bin_9[32,34]        2  0.8399652  3.4701678  1.60206703
## Total/Sum          23 21.3534126 15.8217528 16.84006878
print(data.frame(cbind(LRStat.pvalue=LRStat.pvalue,
ChiSqStat.pvalue=ChiSqStat.pvalue),
row.names=c("P-Value")))
##         LRStat.pvalue ChiSqStat.pvalue
## P-Value    0.02679569       0.01845707