# Problem_14_9_39.r

# 1.0 Read in data ----
#       See Problem 14.9.39
# Data from Knafl et al. (1984)
#

tankvolume=read.table(file="Rice 3e Datasets/ASCII Comma/Chapter 14/tankvolume.txt",
sep=",",stringsAsFactors = FALSE,

Volume=tankvolume$Volume Pressure=tankvolume$Pressure

# (a). Plot pressure versus volume.  The relationship appears linear

plot(Volume, Pressure)
#summary(Volume)

# (b). Calculate the linear regression of pressure on volume
lmfit1=lm( Pressure~ Volume)
summary(lmfit1)
##
## Call:
## lm(formula = Pressure ~ Volume)
##
## Residuals:
##     Min      1Q  Median      3Q     Max
## -28.429 -15.610   2.047  10.819  36.634
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -257.301      9.430  -27.29   <2e-16 ***
## Volume      2316.469      9.243  250.61   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.44 on 19 degrees of freedom
## Multiple R-squared:  0.9997, Adjusted R-squared:  0.9997
## F-statistic: 6.28e+04 on 1 and 19 DF,  p-value: < 2.2e-16
abline(lmfit1,col='green')

#   Plot the residuals versus volume

plot(Volume, lmfit1$residuals) # # The residuals plot shows a non-linear relationship with volume # # (c). Fit Pressure as a quadratic function of volume. VolumeSq=Volume*Volume lmfit2=lm(Pressure ~ Volume + VolumeSq) summary(lmfit2) ## ## Call: ## lm(formula = Pressure ~ Volume + VolumeSq) ## ## Residuals: ## Min 1Q Median 3Q Max ## -18.645 -7.189 1.944 7.371 15.528 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) -204.995 9.274 -22.104 1.70e-14 *** ## Volume 2164.032 23.052 93.877 < 2e-16 *** ## VolumeSq 83.191 12.276 6.777 2.39e-06 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 10.6 on 18 degrees of freedom ## Multiple R-squared: 0.9999, Adjusted R-squared: 0.9999 ## F-statistic: 1.057e+05 on 2 and 18 DF, p-value: < 2.2e-16 plot(Volume, lmfit2$residuals)
abline(h=0,col='gray')

# The fit looks much better, but the residuals at specific volume
# levels tend to be all positive or all negative together.

# There is variability within given Volume level which is smaller
# than variability across Volume levels.

# There appears to be two sources of varability: across volume levels and within.