May 31, 2024





HW2 Yuchen Zou 2024-04-15 #Q13 ##a library (ISLR2) ## Warning: package 'ISLR2' was built under R version 4.2.3 View(Weekly) attach (Weekly) names(Weekly) ## [1] "Year" "Lag1" "Lag2" "Lag3" "Lag4" "Lag5" ## [7] "Volume" "Today" "Direction" dim(Weekly) ## [1] 1089 9 summary(Weekly) ## Year Lag1 Lag2 Lag3 ## Min. :1990 Min. :-18.1950 Min. :-18.1950 Min. :-18.1950 ## 1st Qu.:1995 1st Qu.: -1.1540 1st Qu.: -1.1540 1st Qu.: -1.1580 ## Median :2000 Median : 0.2410 Median : 0.2410 Median : 0.2410 ## Mean :2000 Mean : 0.1506 Mean : 0.1511 Mean : 0.1472 ## 3rd Qu.:2005 3rd Qu.: 1.4050 3rd Qu.: 1.4090 3rd Qu.: 1.4090 ## Max. :2010 Max. : 12.0260 Max. : 12.0260 Max. : 12.0260 ## Lag4 Lag5 Volume Today ## Min. :-18.1950 Min. :-18.1950 Min. :0.08747 Min. :-18.1950 ## 1st Qu.: -1.1580 1st Qu.: -1.1660 1st Qu.:0.33202 1st Qu.: -1.1540 ## Median : 0.2380 Median : 0.2340 Median :1.00268 Median : 0.2410 ## Mean : 0.1458 Mean : 0.1399 Mean :1.57462 Mean : 0.1499 ## 3rd Qu.: 1.4090 3rd Qu.: 1.4050 3rd Qu.:2.05373 3rd Qu.: 1.4050 ## Max. : 12.0260 Max. : 12.0260 Max. :9.32821 Max. : 12.0260 ## Direction ## Down:484 ## Up :605 ## ## ## ## class(Direction)
## [1] "factor" contrasts(Direction) ## Up ## Down 0 ## Up 1 pairs(Weekly) cor(Weekly[, -9])
## Year Lag1 Lag2 Lag3 Lag4 ## Year 1.00000000 -0.032289274 -0.03339001 -0.03000649 -0.031127923 ## Lag1 -0.03228927 1.000000000 -0.07485305 0.05863568 -0.071273876 ## Lag2 -0.03339001 -0.074853051 1.00000000 -0.07572091 0.058381535 ## Lag3 -0.03000649 0.058635682 -0.07572091 1.00000000 -0.075395865 ## Lag4 -0.03112792 -0.071273876 0.05838153 -0.07539587 1.000000000 ## Lag5 -0.03051910 -0.008183096 -0.07249948 0.06065717 -0.075675027 ## Volume 0.84194162 -0.064951313 -0.08551314 -0.06928771 -0.061074617 ## Today -0.03245989 -0.075031842 0.05916672 -0.07124364 -0.007825873 ## Lag5 Volume Today ## Year -0.030519101 0.84194162 -0.032459894 ## Lag1 -0.008183096 -0.06495131 -0.075031842 ## Lag2 -0.072499482 -0.08551314 0.059166717 ## Lag3 0.060657175 -0.06928771 -0.071243639 ## Lag4 -0.075675027 -0.06107462 -0.007825873 ## Lag5 1.000000000 -0.05851741 0.011012698 ## Volume -0.058517414 1.00000000 -0.033077783 ## Today 0.011012698 -0.03307778 1.000000000 boxplot(Today~Direction) ##b logistic_model<- glm(Direction~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume,family = binomial, da ta = Weekly) summary(logistic_model)
## ## Call: ## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + ## Volume, family = binomial, data = Weekly) ## ## Deviance Residuals: ## Min 1Q Median 3Q Max ## -1.6949 -1.2565 0.9913 1.0849 1.4579 ## ## Coefficients: ## Estimate Std. Error z value Pr(>|z|) ## (Intercept) 0.26686 0.08593 3.106 0.0019 ** ## Lag1 -0.04127 0.02641 -1.563 0.1181 ## Lag2 0.05844 0.02686 2.175 0.0296 * ## Lag3 -0.01606 0.02666 -0.602 0.5469 ## Lag4 -0.02779 0.02646 -1.050 0.2937 ## Lag5 -0.01447 0.02638 -0.549 0.5833 ## Volume -0.02274 0.03690 -0.616 0.5377 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## (Dispersion parameter for binomial family taken to be 1) ## ## Null deviance: 1496.2 on 1088 degrees of freedom ## Residual deviance: 1486.4 on 1082 degrees of freedom ## AIC: 1500.4 ## ## Number of Fisher Scoring iterations: 4 #c probs<- predict(logistic_model, type = "response") pred<- ifelse(probs > 0.5, "Up", "Down") confusion_matrix<- table(pred,Weekly$Direction) confusion_matrix ## ## pred Down Up ## Down 54 48 ## Up 430 557 accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix) accuracy ## [1] 0.5610652
#d train_data<- Weekly[Weekly$Year <= 2008, ] test_data<- Weekly[Weekly$Year > 2008, ] fit.d<- glm(Direction ~ Lag2, data = train_data, family = binomial) fit.d.prob<- predict(fit.d, newdata = test_data, type = "response") fit.d.pred<- ifelse(fit.d.prob > 0.5, "Up", "Down") confusion_matrix_d<- table(fit.d.pred, test_data$Direction) confusion_matrix_d ## ## fit.d.pred Down Up ## Down 9 5 ## Up 34 56 accuracy_d <- sum(diag(confusion_matrix_d)) / sum(confusion_matrix_d) accuracy_d ## [1] 0.625 ##e library (MASS) ## ## Attaching package: 'MASS' ## The following object is masked from 'package:ISLR2': ## ## Boston lda.fit<- lda(Direction ~ Lag2, data = train_data) lda.pred<- predict(lda.fit, newdata = test_data) confusion_matrix_e<- table(lda.pred$class, test_data$Direction) confusion_matrix_e ## ## Down Up ## Down 9 5 ## Up 34 56 accuracy_e<- sum(diag(confusion_matrix_e)) / sum(confusion_matrix_e) accuracy_e ## [1] 0.625
