# Problem_14_9_39.r

# 1.0 Read in data ----
#       See Problem 14.9.39
# Data from Knafl et al. (1984)
#

tankvolume=read.table(file="Rice 3e Datasets/ASCII Comma/Chapter 14/tankvolume.txt",
  sep=",",stringsAsFactors = FALSE,
  header=TRUE)

Volume=tankvolume$Volume
Pressure=tankvolume$Pressure

# (a). Plot pressure versus volume.  The relationship appears linear

plot(Volume, Pressure)
#summary(Volume)

# (b). Calculate the linear regression of pressure on volume
lmfit1=lm( Pressure~ Volume)
summary(lmfit1)
## 
## Call:
## lm(formula = Pressure ~ Volume)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.429 -15.610   2.047  10.819  36.634 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -257.301      9.430  -27.29   <2e-16 ***
## Volume      2316.469      9.243  250.61   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.44 on 19 degrees of freedom
## Multiple R-squared:  0.9997, Adjusted R-squared:  0.9997 
## F-statistic: 6.28e+04 on 1 and 19 DF,  p-value: < 2.2e-16
abline(lmfit1,col='green')

#   Plot the residuals versus volume

plot(Volume, lmfit1$residuals)

#
# The residuals plot shows a non-linear relationship with volume
#
# (c). Fit Pressure as a quadratic function of volume.
VolumeSq=Volume*Volume

lmfit2=lm(Pressure ~ Volume + VolumeSq)
summary(lmfit2)
## 
## Call:
## lm(formula = Pressure ~ Volume + VolumeSq)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.645  -7.189   1.944   7.371  15.528 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -204.995      9.274 -22.104 1.70e-14 ***
## Volume      2164.032     23.052  93.877  < 2e-16 ***
## VolumeSq      83.191     12.276   6.777 2.39e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.6 on 18 degrees of freedom
## Multiple R-squared:  0.9999, Adjusted R-squared:  0.9999 
## F-statistic: 1.057e+05 on 2 and 18 DF,  p-value: < 2.2e-16
plot(Volume, lmfit2$residuals)
abline(h=0,col='gray')

# The fit looks much better, but the residuals at specific volume 
# levels tend to be all positive or all negative together.

# There is variability within given Volume level which is smaller
# than variability across Volume levels.

# There appears to be two sources of varability: across volume levels and within.