# fm_casestudy_1_0_DownloadData.r
#
#   * Install/load R packages 
#   * Collect historical financial data from internet
#   * Create time series data matrix: casestudy1.data0.0
#         Closing prices on stocks (BAC, GE, JDSU, XOM)
#         Closing values of indexes (SP500)
#         Yields on constant maturity US rates/bonds (3MO, 1YR, 5YR, 10 YR)
#         Closing price on crude oil spot price
# 0. Install and load packages ----
#
# 0.1 Install packages ---
#     Set ind.install0 to TRUE if running script for first time on a computer
#     or updating the packages
ind.install0<-FALSE
#
if (ind.install0){
install.packages("quantmod") 
install.packages("tseries") 
install.packages("vars")
install.packages("fxregime")
install.packages("coefplot")
}
# 0.2 Load packages into R session

library("quantmod")  
## Warning: package 'quantmod' was built under R version 3.1.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 3.1.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.1.3
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 3.1.3
## Version 0.4-0 included new data defaults. See ?getSymbols.
library("tseries")  
## Warning: package 'tseries' was built under R version 3.1.3
library("vars")  
## Warning: package 'vars' was built under R version 3.1.3
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 3.1.3
## Loading required package: strucchange
## Warning: package 'strucchange' was built under R version 3.1.3
## Loading required package: sandwich
## Warning: package 'sandwich' was built under R version 3.1.3
## Loading required package: urca
## Warning: package 'urca' was built under R version 3.1.3
## Loading required package: lmtest
## Warning: package 'lmtest' was built under R version 3.1.3
library("fxregime")  
## Warning: package 'fxregime' was built under R version 3.1.3
# 1. Load data into R session ----

#   1.1  Stock Price Data from Yahoo
#         Apply quantmod(sub-package TTR)  function 
#           getYahoodata 
#
#             Returns historical data for any symbol at the website 
#              http://finance.yahoo.com
#
#     1.1.1 Set start and end date for collection in YYYYMMDD (numeric) format
date.start<-20000101
date.end<-20150331

#  format(Sys.Date(),"%Y%m%d")
#     1.1.2 Collect historical data for S&P 500 Index
SP500 <- getYahooData("^GSPC", start=date.start, end=date.end)
chartSeries(SP500[,1:5])

#     1.1.3 Collect historical data for 4 stocks
GE <- getYahooData("GE", start=date.start, end=date.end)
BAC <- getYahooData("BAC", start=date.start, end=date.end)
JDSU <- getYahooData("JDSU", start=date.start, end=date.end)
XOM <- getYahooData("XOM", start=date.start, end=date.end)
AEP <- getYahooData("AEP", start=date.start, end=date.end)
DUK <- getYahooData("DUK", start=date.start, end=date.end)

chartSeries(GE[,1:5])

chartSeries(BAC[,1:5])

chartSeries(JDSU[,1:5])

chartSeries(XOM[,1:5])

chartSeries(AEP[,1:5])

chartSeries(DUK[,1:5])

#     1.1.4 Details of data object GE from getYahoodata
#
# GE is a matrix object with 
#     row dimension equal to the number of dates 
#     column dimension equal to 9
is.matrix(GE)
## [1] TRUE
dim(GE)
## [1] 3834    9
#   Print out the first and last parts of the matrix:
head(GE)
##                Open     High      Low    Close   Volume Unadj.Close Div
## 2000-01-03 32.00652 32.15034 31.20898 31.37894 35166578    150.0000  NA
## 2000-01-04 30.80366 30.96055 30.12378 30.12378 35248799    144.0000  NA
## 2000-01-05 30.07149 30.75136 29.82306 30.07149 43489038    143.7500  NA
## 2000-01-06 29.94074 30.73829 29.83615 30.47353 31666460    145.6719  NA
## 2000-01-07 30.96055 31.77118 30.75136 31.65351 32093817    151.3125  NA
## 2000-01-10 31.94114 32.22879 31.61428 31.64044 24262291    151.2500  NA
##            Split Adj.Div
## 2000-01-03    NA      NA
## 2000-01-04    NA      NA
## 2000-01-05    NA      NA
## 2000-01-06    NA      NA
## 2000-01-07    NA      NA
## 2000-01-10    NA      NA
tail(GE)
##             Open  High   Low Close   Volume Unadj.Close Div Split Adj.Div
## 2015-03-24 25.38 25.48 25.27 25.27 25801300       25.27  NA    NA      NA
## 2015-03-25 25.23 25.33 24.91 24.91 34897400       24.91  NA    NA      NA
## 2015-03-26 24.80 24.92 24.67 24.80 32504400       24.80  NA    NA      NA
## 2015-03-27 24.92 24.92 24.71 24.86 28320600       24.86  NA    NA      NA
## 2015-03-30 24.98 25.20 24.97 25.12 27281400       25.12  NA    NA      NA
## 2015-03-31 25.09 25.09 24.81 24.81 34940900       24.81  NA    NA      NA
#   Some attributes of the object GE

mode(GE)  # storage mode of GE is "numeric"
## [1] "numeric"
class(GE) # object-oriented class(es) of GE are "xts" and "zoo"
## [1] "xts" "zoo"
          # xts is an extensible time-series object from the package xts
          # zoo is an object storing ordered observations in a vector or matrix with an index attribute 
              # Important zoo functions
              #   coredata() extracts or replaces core data
              #   index() extracts or replaces the  (sort.by) index of the object

# 1.2 Federal Reserve Economic Data (FRED) from the St. Louis Federal Reserve
#       Apply quantmod  function 
#           getSymbols( seriesname, src="FRED")
#
#             Returns historical data for any symbol at the website
#               http://research.stlouisfed.org/fred2/
#
# Series name | Description
# 
# DGS3MO      | 3-Month Treasury, constant maturity rate
# DGS1        | 1-Year Treasury, constant maturity rate
# DGS5        | 5-Year Treasury, constant maturity rate
# DGS10       | 10-Year Treasury, constant maturity rate
#
# DAAA        | Moody's Seasoned Aaa Corporate Bond Yield 
# DBAA        | Moody's Seasoned Baa Corporate Bond Yield 
#
# DCOILWTICO  | Crude Oil Prices: West Text Intermediate (WTI) - Cushing, Oklahoma
# 
#   1.2.1   Default setting collects entire series
#           and assigns to object of same name as the series
getSymbols("DGS3MO", src="FRED")
##     As of 0.4-0, 'getSymbols' uses env=parent.frame() and
##  auto.assign=TRUE by default.
## 
##  This  behavior  will be  phased out in 0.5-0  when the call  will
##  default to use auto.assign=FALSE. getOption("getSymbols.env") and 
##  getOptions("getSymbols.auto.assign") are now checked for alternate defaults
## 
##  This message is shown once per session and may be disabled by setting 
##  options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.
## [1] "DGS3MO"
getSymbols("DGS1", src="FRED")
## [1] "DGS1"
getSymbols("DGS5", src="FRED")
## [1] "DGS5"
getSymbols("DGS10", src="FRED")
## [1] "DGS10"
getSymbols("DAAA", src="FRED")
## [1] "DAAA"
getSymbols("DBAA", src="FRED")
## [1] "DBAA"
getSymbols("DCOILWTICO", src="FRED")
## [1] "DCOILWTICO"
# Each object is a 1-column matrix with time series data
#   The column-name is the same as the object name
is.matrix(DGS3MO) #
## [1] TRUE
dim(DGS3MO)
## [1] 8687    1
head(DGS3MO)
##            DGS3MO
## 1982-01-04  11.87
## 1982-01-05  12.20
## 1982-01-06  12.16
## 1982-01-07  12.17
## 1982-01-08  11.98
## 1982-01-11  12.49
tail(DGS3MO)
##            DGS3MO
## 2015-04-14   0.02
## 2015-04-15   0.02
## 2015-04-16   0.02
## 2015-04-17   0.01
## 2015-04-20   0.03
## 2015-04-21   0.03
mode(DGS3MO)
## [1] "numeric"
class(DGS3MO)
## [1] "xts" "zoo"
#
# 2.0   Merge data series together

#   2.1 Create data frame with all FRED series from 2000/01/01 on
# 
#   Useful functions/methods   for zoo objects
#     merge()
#     lag (lag.zoo)
#     diff()
#     window.zoo()
#
#     na.locf() # replace NAs by last previou non-NA
#     rollmean(), rollmax() # compute rolling functions, column-wise

fred.data0<-merge(
  DGS3MO,
  DGS1,
  DGS5,
  DGS10,
  DAAA,
  DBAA,
  DCOILWTICO)["2000::2015-03"]

tail(fred.data0)
##            DGS3MO DGS1 DGS5 DGS10 DAAA DBAA DCOILWTICO
## 2015-03-24   0.02 0.24 1.37  1.88 3.50 4.41      47.03
## 2015-03-25   0.04 0.25 1.41  1.93 3.52 4.46      48.75
## 2015-03-26   0.03 0.28 1.47  2.01 3.61 4.56      51.41
## 2015-03-27   0.04 0.27 1.42  1.95 3.53 4.48      48.83
## 2015-03-30   0.04 0.27 1.41  1.96 3.55 4.51      48.66
## 2015-03-31   0.03 0.26 1.37  1.94 3.52 4.49      47.72
# Determine data dimensions
dim(fred.data0)
## [1] 3977    7
class(fred.data0)
## [1] "xts" "zoo"
# Check first and last rows in object
head(fred.data0) ; tail(fred.data0)
##            DGS3MO DGS1 DGS5 DGS10 DAAA DBAA DCOILWTICO
## 2000-01-03   5.48 6.09 6.50  6.58 7.75 8.27         NA
## 2000-01-04   5.43 6.00 6.40  6.49 7.69 8.21      25.56
## 2000-01-05   5.44 6.05 6.51  6.62 7.78 8.29      24.65
## 2000-01-06   5.41 6.03 6.46  6.57 7.72 8.24      24.79
## 2000-01-07   5.38 6.00 6.42  6.52 7.69 8.22      24.79
## 2000-01-10   5.42 6.07 6.49  6.57 7.72 8.27      24.71
##            DGS3MO DGS1 DGS5 DGS10 DAAA DBAA DCOILWTICO
## 2015-03-24   0.02 0.24 1.37  1.88 3.50 4.41      47.03
## 2015-03-25   0.04 0.25 1.41  1.93 3.52 4.46      48.75
## 2015-03-26   0.03 0.28 1.47  2.01 3.61 4.56      51.41
## 2015-03-27   0.04 0.27 1.42  1.95 3.53 4.48      48.83
## 2015-03-30   0.04 0.27 1.41  1.96 3.55 4.51      48.66
## 2015-03-31   0.03 0.26 1.37  1.94 3.52 4.49      47.72
# Count the number of NAs in each column
apply(is.na(fred.data0),2,sum)
##     DGS3MO       DGS1       DGS5      DGS10       DAAA       DBAA 
##        164        164        164        164        164        164 
## DCOILWTICO 
##        150
#  Plot the rates series all togehter
opar<-par()

par(fg="blue",bg="black",
    col.axis="gray",
    col.lab="gray",
    col.main="blue",
    col.sub="blue")

ts.plot(as.ts(fred.data0[,1:6]),col=rainbow(6),bg="black",
        main="FRED Data:  Rates")
## Warning in xy.coords(x = matrix(rep.int(tx, k), ncol = k), y = x, log =
## log): NAs introduced by coercion
## Warning in xy.coords(x, y): NAs introduced by coercion
par(opar)
## Warning in par(opar): graphical parameter "cin" cannot be set
## Warning in par(opar): graphical parameter "cra" cannot be set
## Warning in par(opar): graphical parameter "csi" cannot be set
## Warning in par(opar): graphical parameter "cxy" cannot be set
## Warning in par(opar): graphical parameter "din" cannot be set
## Warning in par(opar): graphical parameter "page" cannot be set
# add legend to plot
legend(x=0,y=2, 
       legend=dimnames(fred.data0)[[2]][1:6],
       lty=rep(1,times=6), 
       col=rainbow(6), 
       cex=0.75)

# Plot the Crude Oil PRice
chartSeries(to.monthly(fred.data0[,"DCOILWTICO"]), main="FRED Data: Crude Oil (WTI)")
## Warning in to.period(x, "months", indexAt = indexAt, name = name, ...):
## missing values removed from data

chartSeries(to.monthly(XOM[,1:5]))

#   2.2 Merge the closing prices for the stock market data series
yahoo.data0<-merge(BAC$Close,
           GE$Close,
           JDSU$Close,
           XOM$Close,
           SP500$Close,
           AEP$Close,
           DUK$Close)
# Replace the index of yahoo data with date values that do not include hours/minutes

dimnames(yahoo.data0)[[2]]<-c("BAC","GE","JDSU","XOM","SP500","AEP","DUK")
yahoo.data0.0<-zoo(x=coredata(yahoo.data0), order.by=as.Date(time(yahoo.data0)))

# fred.data0 is already indexed by this scale

#   2.3 Merge the yahoo and Fred data together

#     2.3.1 merge with all dates
casestudy1.data0<-merge(yahoo.data0.0, fred.data0)

dim(casestudy1.data0)
## [1] 3977   14
head(casestudy1.data0)
##                 BAC       GE   JDSU      XOM   SP500      AEP      DUK
## 2000-01-03 15.61121 31.37894 752.00 27.45076 1455.22 14.97396 40.60250
## 2000-01-04 14.68461 30.12378 684.50 26.92497 1399.42 15.15257 41.23363
## 2000-01-05 14.84576 30.07149 633.00 28.39281 1402.11 15.71819 42.91664
## 2000-01-06 16.11480 30.47353 599.00 29.86064 1403.45 15.80750 44.07370
## 2000-01-07 15.69179 31.65351 719.75 29.77301 1441.47 16.01588 45.23077
## 2000-01-10 15.14791 31.64044 801.50 29.35676 1457.60 15.95634 45.17818
##            DGS3MO DGS1 DGS5 DGS10 DAAA DBAA DCOILWTICO
## 2000-01-03   5.48 6.09 6.50  6.58 7.75 8.27         NA
## 2000-01-04   5.43 6.00 6.40  6.49 7.69 8.21      25.56
## 2000-01-05   5.44 6.05 6.51  6.62 7.78 8.29      24.65
## 2000-01-06   5.41 6.03 6.46  6.57 7.72 8.24      24.79
## 2000-01-07   5.38 6.00 6.42  6.52 7.69 8.22      24.79
## 2000-01-10   5.42 6.07 6.49  6.57 7.72 8.27      24.71
tail(casestudy1.data0)
##              BAC    GE  JDSU   XOM   SP500   AEP   DUK DGS3MO DGS1 DGS5
## 2015-03-24 15.61 25.27 13.48 84.52 2091.50 57.25 76.06   0.02 0.24 1.37
## 2015-03-25 15.41 24.91 13.04 84.86 2061.05 55.91 74.96   0.04 0.25 1.41
## 2015-03-26 15.42 24.80 13.03 84.32 2056.15 55.33 74.35   0.03 0.28 1.47
## 2015-03-27 15.31 24.86 12.98 83.58 2061.02 55.90 75.00   0.04 0.27 1.42
## 2015-03-30 15.52 25.12 13.14 85.63 2086.24 56.58 75.90   0.04 0.27 1.41
## 2015-03-31 15.39 24.81 13.12 85.00 2067.89 56.25 76.78   0.03 0.26 1.37
##            DGS10 DAAA DBAA DCOILWTICO
## 2015-03-24  1.88 3.50 4.41      47.03
## 2015-03-25  1.93 3.52 4.46      48.75
## 2015-03-26  2.01 3.61 4.56      51.41
## 2015-03-27  1.95 3.53 4.48      48.83
## 2015-03-30  1.96 3.55 4.51      48.66
## 2015-03-31  1.94 3.52 4.49      47.72
apply(is.na(casestudy1.data0),2,sum)
##        BAC         GE       JDSU        XOM      SP500        AEP 
##        143        143        143        143        143        143 
##        DUK     DGS3MO       DGS1       DGS5      DGS10       DAAA 
##        143        164        164        164        164        164 
##       DBAA DCOILWTICO 
##        164        150
#     2.3.2 Subset out days when SP500 is not missing (not == NA)

index.notNA.SP500<-which(is.na(coredata(casestudy1.data0$SP500))==FALSE)
casestudy1.data0.0<-casestudy1.data0[index.notNA.SP500,]

head(casestudy1.data0.0)
##                 BAC       GE   JDSU      XOM   SP500      AEP      DUK
## 2000-01-03 15.61121 31.37894 752.00 27.45076 1455.22 14.97396 40.60250
## 2000-01-04 14.68461 30.12378 684.50 26.92497 1399.42 15.15257 41.23363
## 2000-01-05 14.84576 30.07149 633.00 28.39281 1402.11 15.71819 42.91664
## 2000-01-06 16.11480 30.47353 599.00 29.86064 1403.45 15.80750 44.07370
## 2000-01-07 15.69179 31.65351 719.75 29.77301 1441.47 16.01588 45.23077
## 2000-01-10 15.14791 31.64044 801.50 29.35676 1457.60 15.95634 45.17818
##            DGS3MO DGS1 DGS5 DGS10 DAAA DBAA DCOILWTICO
## 2000-01-03   5.48 6.09 6.50  6.58 7.75 8.27         NA
## 2000-01-04   5.43 6.00 6.40  6.49 7.69 8.21      25.56
## 2000-01-05   5.44 6.05 6.51  6.62 7.78 8.29      24.65
## 2000-01-06   5.41 6.03 6.46  6.57 7.72 8.24      24.79
## 2000-01-07   5.38 6.00 6.42  6.52 7.69 8.22      24.79
## 2000-01-10   5.42 6.07 6.49  6.57 7.72 8.27      24.71
tail(casestudy1.data0.0)
##              BAC    GE  JDSU   XOM   SP500   AEP   DUK DGS3MO DGS1 DGS5
## 2015-03-24 15.61 25.27 13.48 84.52 2091.50 57.25 76.06   0.02 0.24 1.37
## 2015-03-25 15.41 24.91 13.04 84.86 2061.05 55.91 74.96   0.04 0.25 1.41
## 2015-03-26 15.42 24.80 13.03 84.32 2056.15 55.33 74.35   0.03 0.28 1.47
## 2015-03-27 15.31 24.86 12.98 83.58 2061.02 55.90 75.00   0.04 0.27 1.42
## 2015-03-30 15.52 25.12 13.14 85.63 2086.24 56.58 75.90   0.04 0.27 1.41
## 2015-03-31 15.39 24.81 13.12 85.00 2067.89 56.25 76.78   0.03 0.26 1.37
##            DGS10 DAAA DBAA DCOILWTICO
## 2015-03-24  1.88 3.50 4.41      47.03
## 2015-03-25  1.93 3.52 4.46      48.75
## 2015-03-26  2.01 3.61 4.56      51.41
## 2015-03-27  1.95 3.53 4.48      48.83
## 2015-03-30  1.96 3.55 4.51      48.66
## 2015-03-31  1.94 3.52 4.49      47.72
apply(is.na(casestudy1.data0.0)==TRUE, 2,sum)
##        BAC         GE       JDSU        XOM      SP500        AEP 
##          0          0          0          0          0          0 
##        DUK     DGS3MO       DGS1       DGS5      DGS10       DAAA 
##          0         28         28         28         28         28 
##       DBAA DCOILWTICO 
##         28         14
# Remaining missing values are for interest rates and the crude oil spot price
#   There are days when the stock market is open but the bond market and/or commodities market
#   is closed 
# For the rates and commodity data, replace NAs with previoius non-NA values
casestudy1.data0.00<-na.locf(casestudy1.data0.0)

apply(is.na(casestudy1.data0.00),2,sum) # Only 1 NA left, the first DCOILWTICO value
##        BAC         GE       JDSU        XOM      SP500        AEP 
##          0          0          0          0          0          0 
##        DUK     DGS3MO       DGS1       DGS5      DGS10       DAAA 
##          0          0          0          0          0          0 
##       DBAA DCOILWTICO 
##          0          1
save(file="casestudy_1_0.RData", list=c("casestudy1.data0.00"))