HW2

pdf

School

University of Washington *

*We aren’t endorsed by this school

Course

484

Subject

Economics

Date

May 31, 2024

Type

pdf

Pages

14

Uploaded by ChefTank6577

Report
HW2 Yuchen Zou 2024-04-15 #Q13 ##a library (ISLR2) ## Warning: package 'ISLR2' was built under R version 4.2.3 View(Weekly) attach (Weekly) names(Weekly) ## [1] "Year" "Lag1" "Lag2" "Lag3" "Lag4" "Lag5" ## [7] "Volume" "Today" "Direction" dim(Weekly) ## [1] 1089 9 summary(Weekly) ## Year Lag1 Lag2 Lag3 ## Min. :1990 Min. :-18.1950 Min. :-18.1950 Min. :-18.1950 ## 1st Qu.:1995 1st Qu.: -1.1540 1st Qu.: -1.1540 1st Qu.: -1.1580 ## Median :2000 Median : 0.2410 Median : 0.2410 Median : 0.2410 ## Mean :2000 Mean : 0.1506 Mean : 0.1511 Mean : 0.1472 ## 3rd Qu.:2005 3rd Qu.: 1.4050 3rd Qu.: 1.4090 3rd Qu.: 1.4090 ## Max. :2010 Max. : 12.0260 Max. : 12.0260 Max. : 12.0260 ## Lag4 Lag5 Volume Today ## Min. :-18.1950 Min. :-18.1950 Min. :0.08747 Min. :-18.1950 ## 1st Qu.: -1.1580 1st Qu.: -1.1660 1st Qu.:0.33202 1st Qu.: -1.1540 ## Median : 0.2380 Median : 0.2340 Median :1.00268 Median : 0.2410 ## Mean : 0.1458 Mean : 0.1399 Mean :1.57462 Mean : 0.1499 ## 3rd Qu.: 1.4090 3rd Qu.: 1.4050 3rd Qu.:2.05373 3rd Qu.: 1.4050 ## Max. : 12.0260 Max. : 12.0260 Max. :9.32821 Max. : 12.0260 ## Direction ## Down:484 ## Up :605 ## ## ## ## class(Direction)
## [1] "factor" contrasts(Direction) ## Up ## Down 0 ## Up 1 pairs(Weekly) cor(Weekly[, -9])
## Year Lag1 Lag2 Lag3 Lag4 ## Year 1.00000000 -0.032289274 -0.03339001 -0.03000649 -0.031127923 ## Lag1 -0.03228927 1.000000000 -0.07485305 0.05863568 -0.071273876 ## Lag2 -0.03339001 -0.074853051 1.00000000 -0.07572091 0.058381535 ## Lag3 -0.03000649 0.058635682 -0.07572091 1.00000000 -0.075395865 ## Lag4 -0.03112792 -0.071273876 0.05838153 -0.07539587 1.000000000 ## Lag5 -0.03051910 -0.008183096 -0.07249948 0.06065717 -0.075675027 ## Volume 0.84194162 -0.064951313 -0.08551314 -0.06928771 -0.061074617 ## Today -0.03245989 -0.075031842 0.05916672 -0.07124364 -0.007825873 ## Lag5 Volume Today ## Year -0.030519101 0.84194162 -0.032459894 ## Lag1 -0.008183096 -0.06495131 -0.075031842 ## Lag2 -0.072499482 -0.08551314 0.059166717 ## Lag3 0.060657175 -0.06928771 -0.071243639 ## Lag4 -0.075675027 -0.06107462 -0.007825873 ## Lag5 1.000000000 -0.05851741 0.011012698 ## Volume -0.058517414 1.00000000 -0.033077783 ## Today 0.011012698 -0.03307778 1.000000000 boxplot(Today~Direction) ##b logistic_model<- glm(Direction~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume,family = binomial, da ta = Weekly) summary(logistic_model)
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
## ## Call: ## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + ## Volume, family = binomial, data = Weekly) ## ## Deviance Residuals: ## Min 1Q Median 3Q Max ## -1.6949 -1.2565 0.9913 1.0849 1.4579 ## ## Coefficients: ## Estimate Std. Error z value Pr(>|z|) ## (Intercept) 0.26686 0.08593 3.106 0.0019 ** ## Lag1 -0.04127 0.02641 -1.563 0.1181 ## Lag2 0.05844 0.02686 2.175 0.0296 * ## Lag3 -0.01606 0.02666 -0.602 0.5469 ## Lag4 -0.02779 0.02646 -1.050 0.2937 ## Lag5 -0.01447 0.02638 -0.549 0.5833 ## Volume -0.02274 0.03690 -0.616 0.5377 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## (Dispersion parameter for binomial family taken to be 1) ## ## Null deviance: 1496.2 on 1088 degrees of freedom ## Residual deviance: 1486.4 on 1082 degrees of freedom ## AIC: 1500.4 ## ## Number of Fisher Scoring iterations: 4 #c probs<- predict(logistic_model, type = "response") pred<- ifelse(probs > 0.5, "Up", "Down") confusion_matrix<- table(pred,Weekly$Direction) confusion_matrix ## ## pred Down Up ## Down 54 48 ## Up 430 557 accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix) accuracy ## [1] 0.5610652
#d train_data<- Weekly[Weekly$Year <= 2008, ] test_data<- Weekly[Weekly$Year > 2008, ] fit.d<- glm(Direction ~ Lag2, data = train_data, family = binomial) fit.d.prob<- predict(fit.d, newdata = test_data, type = "response") fit.d.pred<- ifelse(fit.d.prob > 0.5, "Up", "Down") confusion_matrix_d<- table(fit.d.pred, test_data$Direction) confusion_matrix_d ## ## fit.d.pred Down Up ## Down 9 5 ## Up 34 56 accuracy_d <- sum(diag(confusion_matrix_d)) / sum(confusion_matrix_d) accuracy_d ## [1] 0.625 ##e library (MASS) ## ## Attaching package: 'MASS' ## The following object is masked from 'package:ISLR2': ## ## Boston lda.fit<- lda(Direction ~ Lag2, data = train_data) lda.pred<- predict(lda.fit, newdata = test_data) confusion_matrix_e<- table(lda.pred$class, test_data$Direction) confusion_matrix_e ## ## Down Up ## Down 9 5 ## Up 34 56 accuracy_e<- sum(diag(confusion_matrix_e)) / sum(confusion_matrix_e) accuracy_e ## [1] 0.625
##f qda.fit<- qda(Direction ~ Lag2, data = train_data) qda.pred<- predict(qda.fit, newdata = test_data) confusion_matrix_f<- table(qda.pred$class, test_data$Direction) confusion_matrix_f ## ## Down Up ## Down 0 0 ## Up 43 61 accuracy_f<- sum(diag(confusion_matrix_f)) / sum(confusion_matrix_f) accuracy_f ## [1] 0.5865385 ##g library (class) knn.fit<- knn(train = as.matrix(train_data$Lag2), test = as.matrix(test_data$Lag2), cl = train_ data$Direction, k = 1) confusion_matrix_g<- table(knn.fit, test_data$Direction) confusion_matrix_g ## ## knn.fit Down Up ## Down 21 29 ## Up 22 32 accuracy_g<- sum(diag(confusion_matrix_g)) / sum(confusion_matrix_g) accuracy_g ## [1] 0.5096154 ##h library (e1071) ## Warning: package 'e1071' was built under R version 4.2.3 nb.fit<- naiveBayes(Direction ~ Lag2, data = train_data) nb.pred<- predict(nb.fit, newdata = test_data) confusion_matrix_h<- table(nb.pred, test_data$Direction) confusion_matrix_h ## ## nb.pred Down Up ## Down 0 0 ## Up 43 61
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
accuracy_h<- sum(diag(confusion_matrix_h)) / sum(confusion_matrix_h) accuracy_h ## [1] 0.5865385 ##i results<- data.frame( Method = c("Logistic Regression", "LDA", "QDA", "KNN", "Naive Bayes"), Accuracy = c(accuracy_ d, accuracy_e, accuracy_f, accuracy_g, accuracy_h)) results ## Method Accuracy ## 1 Logistic Regression 0.6250000 ## 2 LDA 0.6250000 ## 3 QDA 0.5865385 ## 4 KNN 0.5096154 ## 5 Naive Bayes 0.5865385 ##From the results, we can see that the Logistic Regression model and the LDA model provide us with the most accuracy results. ##j glm.fit_lag1_lag2<- glm(Direction ~ Lag1 + Lag2, data = train_data, family = binomial) glm.probs_lag1_lag2<- predict(glm.fit_lag1_lag2, newdata = test_data, type = "response") glm.pred_lag1_lag2<- ifelse(glm.probs_lag1_lag2 > 0.5, "Up", "Down") confusion_matrix_lag1_lag2<- table(glm.pred_lag1_lag2, test_data$Direction) confusion_matrix_lag1_lag2 ## ## glm.pred_lag1_lag2 Down Up ## Down 7 8 ## Up 36 53 accuracy_lag1_lag2<- sum(diag(confusion_matrix_lag1_lag2)) / sum(confusion_matrix_lag1_lag2) accuracy_lag1_lag2 ## [1] 0.5769231 ##Q15 ##a Power<- function () { result<- 2^3 print(result) } Power() ## [1] 8
##b Power2<- function (x,a) { result<- x^a print(result) } Power2(3,8) ## [1] 6561 ##c Power2(10,3) ## [1] 1000 Power2(8,17) ## [1] 2.2518e+15 Power2(131,3) ## [1] 2248091 ##d Power3<- function (x,a) { resuslt<- x^a return (resuslt) } ##e x<- 1:10 y<- Power3(x, 2) plot(x,y,type = "l", xlab = "x", ylab = "f(x) = x^2", main = "Plot of f(x) = x^2", log = "xy")
##f PlotPower<- function (x,a){ y<- x^a plot(x, y, type = "l", xlab = "x", ylab = paste("f(x) = x^", a, sep = ""), main = paste("Plot of f(x) = x^", a, sep = ""), log = "xy") } PlotPower(1:10,3)
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
##Q16 library (caret) ## Warning: package 'caret' was built under R version 4.2.3 ## Loading required package: ggplot2 ## Warning: package 'ggplot2' was built under R version 4.2.3 ## Loading required package: lattice View(Boston) Boston<- as.data.frame(Boston) median_crime_rate<- median(Boston$crim) Boston$crime_binary <- ifelse(Boston$crim > median(Boston$crim), 1, 0) set.seed(123) trainIndex<- createDataPartition(Boston$crime_binary, p = .8, list = FALSE, times = 1) train<- Boston[ trainIndex,] test<- Boston[-trainIndex,] # Logistic Regression logit_model <- glm(crime_binary ~ ., data = train, family = "binomial")
## Warning: glm.fit: algorithm did not converge ## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred logit_pred <- predict(logit_model, newdata = test, type = "response") logit_pred <- ifelse(logit_pred > 0.5, 1, 0) #LDA lda_model <- lda(crime_binary ~ ., data = train) lda_pred <- predict(lda_model, newdata = test)$class #Naive Bayes nb_model <- naiveBayes(crime_binary ~ ., data = train) nb_pred <- predict(nb_model, newdata = test) #KNN train_labels <- train$crime_binary test_labels <- test$crime_binary train$crime_binary <- NULL test$crime_binary <- NULL knn_pred <- knn(train, test, train_labels, k=10) #Results confusionMatrix(as.factor(logit_pred), as.factor(test_labels)) ## Confusion Matrix and Statistics ## ## Reference ## Prediction 0 1 ## 0 49 2 ## 1 1 48 ## ## Accuracy : 0.97 ## 95% CI : (0.9148, 0.9938) ## No Information Rate : 0.5 ## P-Value [Acc > NIR] : <2e-16 ## ## Kappa : 0.94 ## ## Mcnemar's Test P-Value : 1 ## ## Sensitivity : 0.9800 ## Specificity : 0.9600 ## Pos Pred Value : 0.9608 ## Neg Pred Value : 0.9796 ## Prevalence : 0.5000 ## Detection Rate : 0.4900 ## Detection Prevalence : 0.5100 ## Balanced Accuracy : 0.9700 ## ## 'Positive' Class : 0 ##
confusionMatrix(as.factor(lda_pred), as.factor(test_labels)) ## Confusion Matrix and Statistics ## ## Reference ## Prediction 0 1 ## 0 47 12 ## 1 3 38 ## ## Accuracy : 0.85 ## 95% CI : (0.7647, 0.9135) ## No Information Rate : 0.5 ## P-Value [Acc > NIR] : 2.413e-13 ## ## Kappa : 0.7 ## ## Mcnemar's Test P-Value : 0.03887 ## ## Sensitivity : 0.9400 ## Specificity : 0.7600 ## Pos Pred Value : 0.7966 ## Neg Pred Value : 0.9268 ## Prevalence : 0.5000 ## Detection Rate : 0.4700 ## Detection Prevalence : 0.5900 ## Balanced Accuracy : 0.8500 ## ## 'Positive' Class : 0 ## confusionMatrix(as.factor(nb_pred), as.factor(test_labels))
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
## Confusion Matrix and Statistics ## ## Reference ## Prediction 0 1 ## 0 50 4 ## 1 0 46 ## ## Accuracy : 0.96 ## 95% CI : (0.9007, 0.989) ## No Information Rate : 0.5 ## P-Value [Acc > NIR] : <2e-16 ## ## Kappa : 0.92 ## ## Mcnemar's Test P-Value : 0.1336 ## ## Sensitivity : 1.0000 ## Specificity : 0.9200 ## Pos Pred Value : 0.9259 ## Neg Pred Value : 1.0000 ## Prevalence : 0.5000 ## Detection Rate : 0.5000 ## Detection Prevalence : 0.5400 ## Balanced Accuracy : 0.9600 ## ## 'Positive' Class : 0 ## confusionMatrix(as.factor(knn_pred), as.factor(test_labels))
## Confusion Matrix and Statistics ## ## Reference ## Prediction 0 1 ## 0 48 5 ## 1 2 45 ## ## Accuracy : 0.93 ## 95% CI : (0.8611, 0.9714) ## No Information Rate : 0.5 ## P-Value [Acc > NIR] : <2e-16 ## ## Kappa : 0.86 ## ## Mcnemar's Test P-Value : 0.4497 ## ## Sensitivity : 0.9600 ## Specificity : 0.9000 ## Pos Pred Value : 0.9057 ## Neg Pred Value : 0.9574 ## Prevalence : 0.5000 ## Detection Rate : 0.4800 ## Detection Prevalence : 0.5300 ## Balanced Accuracy : 0.9300 ## ## 'Positive' Class : 0 ##