# File:   mit18_05_s22_studio8-grader.r 
# Authors: Jeremy Orloff and Jennifer French
#
# MIT OpenCourseWare: https://ocw.mit.edu
# 18.05 Introduction to Probability and Statistics
# Spring 2022
# For information about citing these materials or our Terms of Use, visit:
# https://ocw.mit.edu/terms.
#
# Studio 8 grading script
# Expected output in studio8-grader.html
# If this file changes --need to rebuild studio*-grader.html

# Use 'File > Compile report...' to create an R Markdown report from this.
# Because this opens a new session, it doesn't see the environment.
# So we need the following line, which should be commented out when using the grading script for grading.

 source('mit18_05_s22_studio8-solutions.r')  ### COMMENT OUT FOR GRADING
 cat("WARNING: make sure source('mit18_05_s22_studio*-solutions.r') is commented out before grading\n")
## WARNING: make sure source('mit18_05_s22_studio*-solutions.r') is commented out before grading
# For grading, open this file and set working directory to source file location
studio8_problem_1(7, 12, 7, 3, 10000)
## ----------------------------------
## Problem 1: Simulate F statistic

## See plots
# Generated data with the following code
# printarray = function(x, name, rnd) {
#   if (name != '') {
#     cat(name, ' = ', sep='')
#   }
#   cat('c(', sep='')
#   cat(round(x, rnd), sep=', ')
#   cat(')\n')
# }
# n = 100
# x = rnorm(n, 5, 3)
# printarray(x, 'grade_data_problem_2', 3)
#
# n = 16
# T1 = rnorm(n, 3, 2)
# T2 = rnorm(n, 3, 2.5)
# T3 = rnorm(n, 3, 2)
# printarray(T1, 'T1', 3)
# printarray(T2, 'T2', 3)
# printarray(T3, 'T3', 3)
grade_data_problem_2 = c(12.217, 3.57,  -2.703,  0.653,  5.092,  4.35,
                         6.39,   4.872,  6.357,  0.891,  9.489, 13.523,
                         2.446,  4.848,  5.061,  3.858, 10.368,  6.457,
                         5.032,  1.283,  9.803,  4.274,  7.705,  7.908,
                         1.901,  3.546,  2.906,  6.397,  7.417,  6.675,
                         1.7,    3.424,  4.526,  0.296,  3.12,   6.113,
                         5.228,  7.037,  6.992,  1.46,   1.334,  3.812,
                         0.72,   8.16,  -0.94,   4.83,   6.861, 10.365,
                         2.485,  1.754,  7.712,  4.995,  2.061,  5.622,
                         7.864,  5.101,  8.978,  2.97,   7.046,  4.988,
                         6.619,  5.281,  2.7,   10.346,  7.425,  2.039,
                         5.084,  7.623, 10.517,  4.579, 11.71,   6.949,
                         0.627,  4.082,  5.864,  8.183,  3.89,   3.513,
                         10.302, 3.392,  6.154,  8.054,  1.368,  6.464,
                         7.77,   4.589,  5.961,  6.07,   9.4,    3.898,
                         4.242,  7.963,  4.907,  4.759,  7.903,  9.101,
                         10.617, 7.79,   6.306,  9.564)

studio8_problem_2(grade_data_problem_2, 5, 3, 0.1)
## ----------------------------------
## Problem 2: Code z-test by hand
## mean of data = 5.52805 
## z = 1.760167 
## p = 0.07837955 
## Because the p-value 0.07837955 < 0.1 , the data supports rejecting the null hypothesis in favor of the alternative.
studio8_problem_2(grade_data_problem_2, 5, 3, 0.01)
## ----------------------------------
## Problem 2: Code z-test by hand
## mean of data = 5.52805 
## z = 1.760167 
## p = 0.07837955 
## Because the p-value 0.07837955 > 0.01 , the data does not support rejecting the null hypothesis in favor of the alternative.
studio8_problem_2(grade_data_problem_2, 0, 3, 0.1)
## ----------------------------------
## Problem 2: Code z-test by hand
## mean of data = 5.52805 
## z = 18.42683 
## p = 0 
## Because the p-value 0 < 0.1 , the data supports rejecting the null hypothesis in favor of the alternative.
studio8_problem_2(grade_data_problem_2, 0, 3, 0.01)
## ----------------------------------
## Problem 2: Code z-test by hand
## mean of data = 5.52805 
## z = 18.42683 
## p = 0 
## Because the p-value 0 < 0.01 , the data supports rejecting the null hypothesis in favor of the alternative.
studio8_problem_3("mit18_05_s22_studio8Problem3_grade.tbl", 0.05)
## ----------------------------------
## Problem 3: Chi-square test for independence
##            Married.once Married.more
## College             500          100
## No College          600          160
## ---Using chisq.test
## 
##  Pearson's Chi-squared test
## 
## data:  contingency_tbl
## X-squared = 4.1713, df = 1, p-value = 0.04112
## 
## The p-value  0.04111503  is less than or equal to  0.05 , so at significance level  0.05  we should reject H0 in favor of the alternative that the levels of marriage and education are not independent.
## ----------
## Same calculation by hand.
## Test stat X^2 = 4.171267 
## p-value = 0.04111503
studio8_problem_3("mit18_05_s22_studio8Problem3_grade.tbl", 0.01)
## ----------------------------------
## Problem 3: Chi-square test for independence
##            Married.once Married.more
## College             500          100
## No College          600          160
## ---Using chisq.test
## 
##  Pearson's Chi-squared test
## 
## data:  contingency_tbl
## X-squared = 4.1713, df = 1, p-value = 0.04112
## 
## The p-value  0.04111503  is greater than  0.01 , so at significance level  0.01  the data does not support rejecting H0, that the levels of marriage and education are  independent.
## ----------
## Same calculation by hand.
## Test stat X^2 = 4.171267 
## p-value = 0.04111503
T1 = c(1.592, -0.102, 3.924, 4.333,  4.68,  4.22,  2.749, -0.016,
       0.16,   6.067, 2.152, 2.211,  2.86,  5.831, 3.941,  3.104)
T2 = c(2.052, -0.785, 2.24,  5.096, -1.119, 4.221, 2.687, -0.134,
       8.31,   2.265, 2.169, 0.957,  3.851, 3.562, 1.218,  3.67)
T3 = c(4.812,  5.025, 3.341, 1.199,  1.588, 3.911, 5.098,  2.667,
       4.152,  0.49,  3.625, 2.848,  4.15, -0.041, 2.26,  -1.563)

studio8_problem_4(T1, T2, T3, 0.05)
## ----------------------------------
## Problem 4: ANOVA using aov()
##    procedure   pain
## 1         T1  1.592
## 2         T1 -0.102
## 3         T1  3.924
## 4         T1  4.333
## 5         T1  4.680
## 6         T1  4.220
## 7         T1  2.749
## 8         T1 -0.016
## 9         T1  0.160
## 10        T1  6.067
## 11        T1  2.152
## 12        T1  2.211
## 13        T1  2.860
## 14        T1  5.831
## 15        T1  3.941
## 16        T1  3.104
## 17        T2  2.052
## 18        T2 -0.785
## 19        T2  2.240
## 20        T2  5.096
## 21        T2 -1.119
## 22        T2  4.221
## 23        T2  2.687
## 24        T2 -0.134
## 25        T2  8.310
## 26        T2  2.265
## 27        T2  2.169
## 28        T2  0.957
## 29        T2  3.851
## 30        T2  3.562
## 31        T2  1.218
## 32        T2  3.670
## 33        T3  4.812
## 34        T3  5.025
## 35        T3  3.341
## 36        T3  1.199
## 37        T3  1.588
## 38        T3  3.911
## 39        T3  5.098
## 40        T3  2.667
## 41        T3  4.152
## 42        T3  0.490
## 43        T3  3.625
## 44        T3  2.848
## 45        T3  4.150
## 46        T3 -0.041
## 47        T3  2.260
## 48        T3 -1.563
## 
## Summary:
##             Df Sum Sq Mean Sq F value Pr(>F)
## procedure    2   1.74   0.870     0.2  0.819
## Residuals   45 195.37   4.341               
## 
## p-value: 0.8191381 
## The p-value  0.8191381  is greater than  0.05 . So, at significance level  0.05  the data does not support rejecting H0, that the levels of pain in the different treatments are all the same.
studio8_problem_4(T1, T1, T1, 0.01)
## ----------------------------------
## Problem 4: ANOVA using aov()
##    procedure   pain
## 1         T1  1.592
## 2         T1 -0.102
## 3         T1  3.924
## 4         T1  4.333
## 5         T1  4.680
## 6         T1  4.220
## 7         T1  2.749
## 8         T1 -0.016
## 9         T1  0.160
## 10        T1  6.067
## 11        T1  2.152
## 12        T1  2.211
## 13        T1  2.860
## 14        T1  5.831
## 15        T1  3.941
## 16        T1  3.104
## 17        T2  1.592
## 18        T2 -0.102
## 19        T2  3.924
## 20        T2  4.333
## 21        T2  4.680
## 22        T2  4.220
## 23        T2  2.749
## 24        T2 -0.016
## 25        T2  0.160
## 26        T2  6.067
## 27        T2  2.152
## 28        T2  2.211
## 29        T2  2.860
## 30        T2  5.831
## 31        T2  3.941
## 32        T2  3.104
## 33        T3  1.592
## 34        T3 -0.102
## 35        T3  3.924
## 36        T3  4.333
## 37        T3  4.680
## 38        T3  4.220
## 39        T3  2.749
## 40        T3 -0.016
## 41        T3  0.160
## 42        T3  6.067
## 43        T3  2.152
## 44        T3  2.211
## 45        T3  2.860
## 46        T3  5.831
## 47        T3  3.941
## 48        T3  3.104
## 
## Summary:
##             Df Sum Sq Mean Sq F value Pr(>F)
## procedure    2    0.0   0.000       0      1
## Residuals   45  166.3   3.696               
## 
## p-value: 1 
## The p-value  1  is greater than  0.01 . So, at significance level  0.01  the data does not support rejecting H0, that the levels of pain in the different treatments are all the same.