# fm_casestudy_1_rcode_CAPM.r

#Execute the R-script ``fm_casestudy_1_0_DownloadData.r'' 
#   creates the time-series matrix $casestudy1.data0.00$ 
#     and saves it in R-workspace ``casestudy_1_0.Rdata'.

#source("fm_casestudy_1_0.r")
# 0.1 Load libraries ----

library("zoo")
## Warning: package 'zoo' was built under R version 3.1.3
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library("quantmod")
## Warning: package 'quantmod' was built under R version 3.1.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 3.1.3
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 3.1.3
## Version 0.4-0 included new data defaults. See ?getSymbols.
library("coefplot")
## Warning: package 'coefplot' was built under R version 3.1.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.1.3
library ("graphics")
library("ggplot2")

#####

# 0.2 Load R dataset: casestudy_1_0.Rdata ----
load("casestudy_1_0.RData")
dim(casestudy1.data0.00)
## [1] 3834   14
names(casestudy1.data0.00)
##  [1] "BAC"        "GE"         "JDSU"       "XOM"        "SP500"     
##  [6] "AEP"        "DUK"        "DGS3MO"     "DGS1"       "DGS5"      
## [11] "DGS10"      "DAAA"       "DBAA"       "DCOILWTICO"
head(casestudy1.data0.00)
##                 BAC       GE   JDSU      XOM   SP500      AEP      DUK
## 2000-01-03 15.61121 31.37894 752.00 27.45076 1455.22 14.97396 40.60250
## 2000-01-04 14.68461 30.12378 684.50 26.92497 1399.42 15.15257 41.23363
## 2000-01-05 14.84576 30.07149 633.00 28.39281 1402.11 15.71819 42.91664
## 2000-01-06 16.11480 30.47353 599.00 29.86064 1403.45 15.80750 44.07370
## 2000-01-07 15.69179 31.65351 719.75 29.77301 1441.47 16.01588 45.23077
## 2000-01-10 15.14791 31.64044 801.50 29.35676 1457.60 15.95634 45.17818
##            DGS3MO DGS1 DGS5 DGS10 DAAA DBAA DCOILWTICO
## 2000-01-03   5.48 6.09 6.50  6.58 7.75 8.27         NA
## 2000-01-04   5.43 6.00 6.40  6.49 7.69 8.21      25.56
## 2000-01-05   5.44 6.05 6.51  6.62 7.78 8.29      24.65
## 2000-01-06   5.41 6.03 6.46  6.57 7.72 8.24      24.79
## 2000-01-07   5.38 6.00 6.42  6.52 7.69 8.22      24.79
## 2000-01-10   5.42 6.07 6.49  6.57 7.72 8.27      24.71
tail(casestudy1.data0.00)
##              BAC    GE  JDSU   XOM   SP500   AEP   DUK DGS3MO DGS1 DGS5
## 2015-03-24 15.61 25.27 13.48 84.52 2091.50 57.25 76.06   0.02 0.24 1.37
## 2015-03-25 15.41 24.91 13.04 84.86 2061.05 55.91 74.96   0.04 0.25 1.41
## 2015-03-26 15.42 24.80 13.03 84.32 2056.15 55.33 74.35   0.03 0.28 1.47
## 2015-03-27 15.31 24.86 12.98 83.58 2061.02 55.90 75.00   0.04 0.27 1.42
## 2015-03-30 15.52 25.12 13.14 85.63 2086.24 56.58 75.90   0.04 0.27 1.41
## 2015-03-31 15.39 24.81 13.12 85.00 2067.89 56.25 76.78   0.03 0.26 1.37
##            DGS10 DAAA DBAA DCOILWTICO
## 2015-03-24  1.88 3.50 4.41      47.03
## 2015-03-25  1.93 3.52 4.46      48.75
## 2015-03-26  2.01 3.61 4.56      51.41
## 2015-03-27  1.95 3.53 4.48      48.83
## 2015-03-30  1.96 3.55 4.51      48.66
## 2015-03-31  1.94 3.52 4.49      47.72
# 0.3 Define functions ----

# function to compute daily returns of a symbol
fcn.compute.r.daily.symbol0<-function(
  symbol0="SP500",
  rdbase0=casestudy1.data0.00, scaleLog=FALSE){
  
  indexcol.symbol0<-match(symbol0, dimnames(rdbase0)[[2]],nomatch=0)
  if (indexcol.symbol0==0){ return(NULL)}
  if(scaleLog==FALSE){
  r.daily.symbol0<-zoo(
    x=exp(as.matrix(diff(log(rdbase0[,indexcol.symbol0]))))-1,
    order.by=as.Date(time(rdbase0)[-1]))
    dimnames(r.daily.symbol0)[[2]]<-paste("r.daily.",symbol0,sep="")
    return(r.daily.symbol0)
  }

  if(scaleLog==TRUE){
    r.daily.symbol0<-zoo(
      x=(as.matrix(diff(log(rdbase0[,indexcol.symbol0])))),
      #order.by=time(rdbase0)[-1])
      order.by=as.Date(time(rdbase0)[-1]))
      dimnames(r.daily.symbol0)[[2]]<-paste("dlog.daily.",symbol0,sep="")
      return(r.daily.symbol0)
  }
  return(NULL)

}

fcn.compute.r.daily.riskfree<-function(rdbase0=casestudy1.data0.00){
  r.daily.riskfree<-(.01*coredata(rdbase0[-1,"DGS3MO"]) *
                     diff(as.numeric(time(rdbase0)))/360)
  dimnames(r.daily.riskfree)[[2]]<-"r.daily.riskfree"
  r.daily.riskfree0<-zoo(
    x=as.matrix(r.daily.riskfree),
    #order.by=time(rdbase0)[-1])
    order.by=as.Date(time(rdbase0)[-1]))
  return(r.daily.riskfree0)
}

# function to compute submatrix of kperiod returns
fcn.rollksub<-function(x,kperiod=2,...){
  x.0<-filter(as.matrix(coredata(x)), f=rep(1,times=kperiod), sides=1)
  n0=floor(length(x)/kperiod)
  indexsub0<-seq(kperiod,length(x),kperiod)
  x.00<-x.0[indexsub0]
  return(x.00)
  
}

# 1.1 Define list.symbol.00 
list.symbol.00<-c("GE","BAC","XOM","JDSU")

# 2.0 Setup CAPM for symbol0 ----
symbol0<-list.symbol.00[1]
#symbol0<-list.symbol.00[2]
#symbol0<-list.symbol.00[3]
#symbol0<-list.symbol.00[4]

#   2.1 Plot time series of symbol0, SP500 and risk-free asset -----
#   symbol0, SP500, and the risk-free interest rate

opar<-par()
# set graphics parameter to 3 panels
par(mfcol=c(3,1))
plot(casestudy1.data0.00[,symbol0],ylab="Price",
     main=paste(symbol0, " Stock",sep=""))

plot(casestudy1.data0.00[,"SP500"], ylab="Value",main="S&P500 Index")

plot(casestudy1.data0.00[,"DGS3MO"], ylab="Rate" ,
     main="3-Month Treasury Rate (Constant Maturity)")

# reset graphics parameter to 1 panel
par(mfcol=c(1,1))

#   2.2 Compute the returns series ----

r.daily.symbol0<-fcn.compute.r.daily.symbol0(symbol0, casestudy1.data0.00)
r.daily.SP500<-fcn.compute.r.daily.symbol0("SP500", casestudy1.data0.00)
r.daily.riskfree<-fcn.compute.r.daily.riskfree(casestudy1.data0.00)


# Note for returns time series of riskfree asset 
#   holding periods vary from 1-3 days corresponding
#   to mid-week and over-weekend returns)
#   plot(r.daily.riskfree)

# Compute excess returns of symbol0 and SP500
r.daily.symbol0.0 <-r.daily.symbol0 - r.daily.riskfree
 dimnames(r.daily.symbol0.0)[[2]]<-paste("r.daily.",symbol0,".0",sep="")

r.daily.SP500.0<-r.daily.SP500 - r.daily.riskfree
  dimnames(r.daily.SP500.0)[[2]]<-"r.daily.SP500.0"

dim(r.daily.SP500.0)
## [1] 3833    1
#   2.3 Create r.daily.data0 = merged series ----
#     and display first and last sets of rows
dimnames(r.daily.symbol0)[[2]]
## [1] "r.daily.GE"
# Note: r.daily.symbol0 has names that use symbol0
r.daily.data0<-merge(r.daily.symbol0, r.daily.SP500, r.daily.riskfree, 
                     r.daily.symbol0.0, r.daily.SP500.0)

head(r.daily.data0)
##               r.daily.GE r.daily.SP500 r.daily.riskfree  r.daily.GE.0
## 2000-01-04 -0.0400000000  -0.038344670     0.0001508333 -0.0401508333
## 2000-01-05 -0.0017359722   0.001922189     0.0001511111 -0.0018870833
## 2000-01-06  0.0133694590   0.000955674     0.0001502778  0.0132191812
## 2000-01-07  0.0387214059   0.027090400     0.0001494444  0.0385719615
## 2000-01-10 -0.0004129203   0.011189973     0.0004516667 -0.0008645869
## 2000-01-11  0.0016527601  -0.013062514     0.0001508333  0.0015019268
##            r.daily.SP500.0
## 2000-01-04   -0.0384955037
## 2000-01-05    0.0017710780
## 2000-01-06    0.0008053962
## 2000-01-07    0.0269409552
## 2000-01-10    0.0107383063
## 2000-01-11   -0.0132133472
tail(r.daily.data0)
##              r.daily.GE r.daily.SP500 r.daily.riskfree r.daily.GE.0
## 2015-03-24 -0.007852375  -0.006139421     5.555556e-07 -0.007852931
## 2015-03-25 -0.014246142  -0.014558905     1.111111e-06 -0.014247253
## 2015-03-26 -0.004415897  -0.002377502     8.333333e-07 -0.004416731
## 2015-03-27  0.002419355   0.002368563     1.111111e-06  0.002418244
## 2015-03-30  0.010458568   0.012236645     3.333333e-06  0.010455235
## 2015-03-31 -0.012340764  -0.008795776     8.333333e-07 -0.012341598
##            r.daily.SP500.0
## 2015-03-24    -0.006139977
## 2015-03-25    -0.014560016
## 2015-03-26    -0.002378335
## 2015-03-27     0.002367452
## 2015-03-30     0.012233312
## 2015-03-31    -0.008796610
#   2.4 Excess Returns plot:  symbol0 vs SP500 ----
par(mfcol=c(1,1))
plot(r.daily.SP500.0, r.daily.symbol0.0, main=symbol0)
abline(h=0,v=0)

# 3. Linear Regression for CAPM ----

#The linear regression model is fit using the R-function lm():

options(show.signif.stars=FALSE)
# (by default the output from summary.lm() uses stars (*) 
#   to indicate significant coefficients; 
#   automated processing of the output encounters errors
#   with  asterisks so the option to not show them is necessary)

lmfit0<-lm(r.daily.symbol0.0 ~ r.daily.SP500.0, x=TRUE, y=TRUE)

#   3.1 Apply lm(), summary.lm() ----
# The components of lmfit0:
names(lmfit0)
##  [1] "coefficients"  "residuals"     "effects"       "rank"         
##  [5] "fitted.values" "assign"        "qr"            "df.residual"  
##  [9] "xlevels"       "call"          "terms"         "model"        
## [13] "x"             "y"
lmfit0.summary<-summary.lm(lmfit0)
# The components of lmfit0.summary:
names(lmfit0.summary)
##  [1] "call"          "terms"         "residuals"     "coefficients" 
##  [5] "aliased"       "sigma"         "df"            "r.squared"    
##  [9] "adj.r.squared" "fstatistic"    "cov.unscaled"
print(lmfit0.summary)
## 
## Call:
## lm(formula = r.daily.symbol0.0 ~ r.daily.SP500.0, x = TRUE, y = TRUE)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.159282 -0.005513 -0.000422  0.005185  0.144856 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)
## (Intercept)     -5.225e-05  2.138e-04  -0.244    0.807
## r.daily.SP500.0  1.175e+00  1.673e-02  70.238   <2e-16
## 
## Residual standard error: 0.01323 on 3831 degrees of freedom
## Multiple R-squared:  0.5629, Adjusted R-squared:  0.5628 
## F-statistic:  4933 on 1 and 3831 DF,  p-value: < 2.2e-16
# Under CAPM, the intercept should be zero
tstat.intercept<-round(lmfit0.summary$coefficients["(Intercept)", "t value"],digits=4)
pvalue.intercept<-round(lmfit0.summary$coefficients["(Intercept)", "Pr(>|t|)"],digits=4)

# The $t$-statistic for the intercept is:
print(tstat.intercept)
## [1] -0.2444
# The p-value for testing whether the intercept is zero is
print(pvalue.intercept)
## [1] 0.8069
#   3.2  R-squared plot ----
lmfit0.rsquared<-lmfit0.summary$r.squared
lmfit0.rsquared.pvalue<-pf(
  q=lmfit0.summary$fstatistic[["value"]],
  df1=lmfit0.summary$fstatistic[["numdf"]],
  df2=lmfit0.summary$fstatistic[["dendf"]],
  lower.tail=FALSE)

length(as.numeric(lmfit0$fitted.values))
## [1] 3833
length(as.numeric(lmfit0$y))
## [1] 3833
modelname0<-"CAPM"

plot(x=lmfit0$fitted.values,
     y=lmfit0$y,
     xlab="Fitted Values",
     ylab="Actual Values",
     main=paste(c(modelname0,
                  "\n Plot: Actual vs Fitted Values\n",
                  "(R-Squared = ",round(lmfit0.rsquared,digits=4),
                  ")"),collapse=""))
abline(a=0,b=1,col='green',lwd=2)

#   3.3 Coefficients Plot ----
library("coefplot")
par(mfcol=c(1,1))
coefplot(lmfit0, lwdInner=4, lwdOuter=1, 
         title=paste(modelname0,
                     "\n Coefficients Plot",sep=""),
         plot=TRUE)

#   3.4a Residuals Analysis:  Histogram/Gaussian Fits----
std.residuals<-sort(as.numeric(lmfit0$residuals))

par(mfcol=c(1,1))
hist(std.residuals, freq=FALSE, nclass=100,
     main=paste(c(modelname0 ,
                  "\n Histogram of Std. Residuals \n",
                  "Normal Fits:  MLE(Green) and Robust(Blue)"),
                collapse=""))

std.residuals.mean=mean(std.residuals)
std.residuals.stdev.mle=sqrt(mean(std.residuals^2) - (mean(std.residuals))^2)
std.residuals.stdev.robust=IQR(std.residuals)/1.3490

std.residuals.density.mle<-dnorm(std.residuals, mean=std.residuals.mean, sd=std.residuals.stdev.mle)
std.residuals.density.robust<-dnorm(std.residuals, mean=std.residuals.mean, sd=std.residuals.stdev.robust)

lines(std.residuals, std.residuals.density.mle, col='green',lwd=2)
lines(std.residuals, std.residuals.density.robust, col='blue',lwd=2)

#   3.4b Residuals Analysis:  QQPlot/Gaussian Fits----
par(mfcol=c(1,1))
qqnorm(std.residuals,
       main=paste(modelname0,
                  "\n Normal QQ Plot of Std. Residuals",sep=""))

abline(a=0,b=sqrt(var(std.residuals)), col='green', lwd=3)
abline(a=0,b=IQR(std.residuals)/1.3490, col='blue', lwd=3)
title(sub=paste("Residual Sigma Fits: MLE(Green) and Robust(Blue)"))

#   3.4c Residuals Analysis:  MLE-Percentile Histogram ----
par(mfcol=c(1,1))
#
nclass0=100
hist(100.*pnorm(std.residuals,mean=std.residuals.mean, sd=std.residuals.stdev.mle), nclass=nclass0,
     xlab="Fitted Percentile",
     main=paste(c(modelname0,
                  "\nHistogram of Residuals\nMLE-Fitted Percentiles"),
                collapse=""))
abline(h=length(std.residuals)/nclass0, col="green",lwd=2)

#   3.4c Residuals Analysis:  Robust-Fitted Percentile Histogram ----
par(mfcol=c(1,1))
hist(100.*pnorm(std.residuals,mean=std.residuals.mean, sd=std.residuals.stdev.robust), nclass=nclass0,
     xlab="Fitted Percentile",
     main=paste(c(modelname0,
                  "\nHistogram of Residuals\nRobust-Fitted Percentiles"),
                collapse=""))

abline(h=length(std.residuals)/nclass0, col="blue",lwd=2)

#   3.5 Regression Diagnostics: Leverage/Hat Values ----
# For the functions hatvalues() and influence.measures()
# the input argument lmfit0 must handle indexes properly

y00=coredata(r.daily.symbol0.0)
x00=coredata(r.daily.SP500.0)
lmfit00<-lm(y00~x00, x=TRUE, y=TRUE)
# This replacement code snippet eliminates warnings and errors returned from
#   the functon influence.measures(lmfit0) below

#
lmfit0.hat<-zoo(x=hatvalues(lmfit00), 
                           order.by=as.Date(names(lmfit00$fitted.values)))
par(mfcol=c(1,1))
plot(lmfit0.hat, ylab="Leverage/Hat Value",
     main=paste(c(modelname0,
                  "\nLeverage/Hat Values"),
                collapse=""))

# Note the cases are time points of the time series data
#   The financial crisis of 2008 is evident 


#   3.6 Regression Diagnostics Plot----
#     The R function $plot.lm()$ generates a 
#     2x2 display of plots for various regression diagnostic statistics:
oldpar=par(no.readonly=TRUE)
layout(matrix(c(1,2,3,4),2,2)) # optional 4 graphs/page 
plot(lmfit00)

par(oldpar,no.readonly=TRUE)


#######ADDITION 
#   Some useful R functions

#   anova.lm():  conduct an Analysis of Variance for the linear regression model, detailing the computation of the F-statistic for no regression structure.
#                 #anova.lm(lmfit0)

#   influence.measures():  compute regression diagnostics evaluating case influence for the linear regression model; includes `hat' matirx, case-deletion statistics for the regression coefficients and for the residual standard deviation.



#   3.7 Apply influence.measures()  ----
# Compute influence measures (case-deletion statistics)
lmfit0.inflm<-influence.measures(lmfit00)
# Table counts of influential/non-influential cases
# as measured by the hat/leverage statistic.
table(lmfit0.inflm$is.inf[,"hat"])
## 
## FALSE  TRUE 
##  3682   151
#   3.8 Plot data, fitted model, influential cases ----
#      selective highlighting of influential cases

plot(r.daily.SP500.0, r.daily.symbol0.0,
     main=paste(symbol0,"  vs SP500 Data \n OLS Fit (Green line)\n High-Leverage Cases (red points)\n High Cooks Dist (blue Xs)",sep=""),
     cex.main=0.8)
abline(h=0,v=0)
abline(lmfit0, col=3, lwd=3)

# Plot cases with high leverage as red (col=2) "o"s
index.inf.hat<-which(lmfit0.inflm$is.inf[,"hat"]==TRUE)
points(r.daily.SP500.0[index.inf.hat], r.daily.symbol0.0[index.inf.hat], 
        col=2, pch="o")

# Plot cases with high cooks distance as big (cex=2) blue (col=4) "X"s
index.inf.cook.d<-which(lmfit0.inflm$is.inf[,"cook.d"]==TRUE)
dim(r.daily.SP500.0)
## [1] 3833    1
dim(r.daily.SP500.0)
## [1] 3833    1
points(r.daily.SP500.0[index.inf.cook.d], r.daily.symbol0.0[index.inf.cook.d], 
       col=4, pch="X", cex=2.)