HW5
pdf
keyboard_arrow_up
School
University of California, Los Angeles *
*We aren’t endorsed by this school
Course
101C
Subject
Computer Science
Date
Jan 9, 2024
Type
Pages
18
Uploaded by ShashvatPatel1234
Homework 4
Neftali Lemus (805105856)
11/3/2021
setwd
(
"C:/Users/domin/Desktop/Stats 101C"
)
college <-
read.csv
(
"College Fall 2021.csv"
)
dim
(college)
## [1] 2000
20
names
(college)
##
[1] "X"
"School.Name" "Private"
"Apps"
"Accept"
##
[6] "Enroll"
"Top10perc"
"Top25perc"
"F.Undergrad" "P.Undergrad"
## [11] "Outstate"
"Room.Board"
"Books"
"Personal"
"PhD"
## [16] "Terminal"
"S.F.Ratio"
"perc.alumni" "Grad.Rate"
"Expend"
table
(
is.na
(college))
# Data has no missing values, we can proceed with no issues
##
## FALSE
## 40000
college <-college[,
-
c
(
1
,
2
,
3
)]
#Remove troublesome columns
dim
(college)
## [1] 2000
17
Question 1
#Split Data into 70% training, 30% testing
set.seed
(
1128
)
c.training.i <-
sample
(
1
:
2000
,
1400
,
replace =
F)
college.train <- college[c.training.i,]
college.test <- college[
-
c.training.i,]
dim
(college.train)
## [1] 1400
17
1
dim
(college.test)
## [1] 600
17
a)
m1 <-
lm
(Expend
~
.,
data =
college.train)
pred.train <-
predict
(m1,
newdata=
college.train)
#Calculating MSE Training
MSE.train <-
mean
((college.train
$
Expend
-
pred.train)
^
2
)
MSE.train
## [1] 8423397
#Calculating MSE Test
pred.test <-
predict
(m1,
newdata=
college.test)
MSE.test <-
mean
((college.test
$
Expend
-
pred.test)
^
2
)
MSE.test
## [1] 9658396
b)
#Ridge Regression
str
(college)
## ’data.frame’:
2000 obs. of
17 variables:
##
$ Apps
: int
1758 14463 838 1127 735 504 280 1373 1455 2379 ...
##
$ Accept
: int
1485 6166 651 884 423 482 143 1373 1064 2133 ...
##
$ Enroll
: int
419 1757 159 308 366 185 79 724 452 1292 ...
##
$ Top10perc
: int
27 60 11 30 20 10 5 6 1 8 ...
##
$ Top25perc
: int
58 94 25 64 48 36 27 21 16 25 ...
##
$ F.Undergrad: int
2041 8544 654 1310 2448 550 327 2754 2632 4283 ...
##
$ P.Undergrad: int
174 671 162 766 707 84 110 474 617 2973 ...
##
$ Outstate
: int
12040 6550 8640 11718 9210 9130 5590 2700 6806 4973 ...
##
$ Room.Board : int
4100 4598 3700 7398 3782 3322 2900 2660 2550 3192 ...
##
$ Books
: int
600 700 400 450 700 450 650 540 350 500 ...
##
$ Personal
: int
1100 1000 1915 1800 1000 1450 1952 1660 766 1425 ...
##
$ PhD
: int
92 83 62 73 49 46 53 60 75 56 ...
##
$ Terminal
: int
96 100 62 87 51 51 63 68 75 65 ...
##
$ S.F.Ratio
: num
13.2 18 12.2 16.4 39.8 12.6 15.1 20.3 15.1 22 ...
##
$ perc.alumni: int
17 15 13 33 15 25 4 29 10 21 ...
##
$ Grad.Rate
: int
72 80 48 76 34 54 90 52 24 38 ...
##
$ Expend
: int
9060 8055 7634 8871 6562 8686 4839 4550 6972 4078 ...
i=
seq
(
10
,
-
2
,
length=
100
)
lambda.v=
10
^
i
length
(lambda.v)
2
## [1] 100
#No missing Values, No factors, Predictors in Matrix, Response in Vector (Data Prep)
X <-
model.matrix
(Expend
~
.,college)
Y <- college
$
Expend
#Fit Ridge Regression Model
library
(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-2
c.model.ridge <-
glmnet
(X,Y,
alpha =
0
,
lambda =
lambda.v)
# Alpha = 0 gives ridge
plot
(c.model.ridge)
# 16 coefficients survived
0
100
200
300
400
500
-300
-200
-100
0
100
L1 Norm
Coefficients
16
16
16
16
16
16
#Find the optimal lambda value via cross validation
set.seed
(
1128
)
cv.out.ridge <-
cv.glmnet
(X,Y,
alpha=
0
)
bestlam.cv.ridge <- cv.out.ridge
$
lambda.min
bestlam.cv.ridge
## [1] 355.5073
3
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help
#Compute the test error with bestlam.cv for Test and Train
ridge.pred.test <-
predict
(c.model.ridge,
s=
bestlam.cv.ridge,
newx=
X[
-
c.training.i,])
ridge.pred.train <-
predict
(c.model.ridge,
s=
bestlam.cv.ridge,
newx=
X[c.training.i,])
ridge.mse.test <-
round
(
mean
((ridge.pred.test
-
Y[
-
c.training.i])
^
2
))
ridge.mse.train<-
round
(
mean
((ridge.pred.train
-
Y[c.training.i])
^
2
))
ridge.mse.test
## [1] 9899790
ridge.mse.train
## [1] 8729050
c)
# Lasso Regression
set.seed
(
1128
)
i=
seq
(
10
,
-
2
,
length=
100
)
lambda.v=
10
^
i
length
(lambda.v)
## [1] 100
#No missing Values, No factors, Predictors in Matrix, Response in Vector (Data Prep)
X <-
model.matrix
(Expend
~
.,college)
Y <- college
$
Expend
#Fit Lasso Regression Model
library
(glmnet)
c.model.lasso <-
glmnet
(X,Y,
alpha =
1
,
lambda =
lambda.v)
# Alpha = 1 gives lasso
plot
(c.model.lasso)
4
0
100
200
300
400
500
-300
-200
-100
0
100
L1 Norm
Coefficients
0
3
3
5
8
12
#Finding Optimal Lambda
set.seed
(
1128
)
cv.out.lasso <-
cv.glmnet
(X,Y,
alpha=
1
)
bestlam.cv.lasso <- cv.out.lasso
$
lambda.min
bestlam.cv.lasso
## [1] 6.35878
#Compute MSE of training
lasso.pred <-
predict
(c.model.lasso,
s=
bestlam.cv.lasso,
newx=
X[c.training.i,])
lasso.mse.train <-
round
(
mean
((lasso.pred
-
Y[c.training.i])
^
2
))
lasso.mse.train
## [1] 8468312
#Compute MSE of test
lasso.pred <-
predict
(c.model.lasso,
s=
bestlam.cv.lasso,
newx=
X[
-
c.training.i,])
lasso.mse.test <-
round
(
mean
((lasso.pred
-
Y[
-
c.training.i])
^
2
))
lasso.mse.test
## [1] 9448019
5
#Non-Zero lasso coefficients
lasscoef <-
predict
(c.model.lasso,
type=
"coefficients"
,
s=
bestlam.cv.lasso)
lasscoef[lasscoef
!=
0
]
#Only one was killed
## <sparse>[ <logic> ] : .M.sub.i.logical() maybe inefficient
##
[1]
6.097158e+03
7.023811e-01 -8.942004e-01
4.689058e-01
1.233637e+02
##
[6] -6.181335e+01 -2.247071e-02
5.172989e-03
5.228978e-01
9.943960e-01
## [11]
2.108246e-01
3.287954e+00
2.960737e+01 -3.130042e+02
1.020996e+01
## [16] -2.563767e+01
Question 2
a)
#Using same college data set as above
#PCR Model
library
(pls)
##
## Attaching package: ’pls’
## The following object is masked from ’package:stats’:
##
##
loadings
library
(ggplot2)
library
(ISLR)
library
(readr)
str
(college)
## ’data.frame’:
2000 obs. of
17 variables:
##
$ Apps
: int
1758 14463 838 1127 735 504 280 1373 1455 2379 ...
##
$ Accept
: int
1485 6166 651 884 423 482 143 1373 1064 2133 ...
##
$ Enroll
: int
419 1757 159 308 366 185 79 724 452 1292 ...
##
$ Top10perc
: int
27 60 11 30 20 10 5 6 1 8 ...
##
$ Top25perc
: int
58 94 25 64 48 36 27 21 16 25 ...
##
$ F.Undergrad: int
2041 8544 654 1310 2448 550 327 2754 2632 4283 ...
##
$ P.Undergrad: int
174 671 162 766 707 84 110 474 617 2973 ...
##
$ Outstate
: int
12040 6550 8640 11718 9210 9130 5590 2700 6806 4973 ...
##
$ Room.Board : int
4100 4598 3700 7398 3782 3322 2900 2660 2550 3192 ...
##
$ Books
: int
600 700 400 450 700 450 650 540 350 500 ...
##
$ Personal
: int
1100 1000 1915 1800 1000 1450 1952 1660 766 1425 ...
##
$ PhD
: int
92 83 62 73 49 46 53 60 75 56 ...
##
$ Terminal
: int
96 100 62 87 51 51 63 68 75 65 ...
##
$ S.F.Ratio
: num
13.2 18 12.2 16.4 39.8 12.6 15.1 20.3 15.1 22 ...
##
$ perc.alumni: int
17 15 13 33 15 25 4 29 10 21 ...
##
$ Grad.Rate
: int
72 80 48 76 34 54 90 52 24 38 ...
##
$ Expend
: int
9060 8055 7634 8871 6562 8686 4839 4550 6972 4078 ...
6
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help
#Recall that PCR and PLS only take numerical information
set.seed
(
1128
)
#PCR fit on training set
pcr.fit <-
pcr
(Expend
~
.,
data=
college.train,
scale=
TRUE
,
validation=
"CV"
)
#Does: PC, Standarizing, Valida
#Keeping the 85% threshold in the X matrix
summary
(pcr.fit)
# 7 components explain 86% variance of X matrix and explain 59% of Expend
## Data:
X dimension: 1400 16
##
Y dimension: 1400 1
## Fit method: svdpc
## Number of components considered: 16
##
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
##
(Intercept)
1 comps
2 comps
3 comps
4 comps
5 comps
6 comps
## CV
5155
4289
3543
3458
3460
3347
3307
## adjCV
5155
4288
3542
3457
3461
3346
3305
##
7 comps
8 comps
9 comps
10 comps
11 comps
12 comps
13 comps
## CV
3307
3219
3214
3209
3145
3128
3173
## adjCV
3306
3217
3213
3207
3144
3126
3174
##
14 comps
15 comps
16 comps
## CV
2993
2980
2962
## adjCV
2990
2977
2958
##
## TRAINING: % variance explained
##
1 comps
2 comps
3 comps
4 comps
5 comps
6 comps
7 comps
8 comps
## X
32.10
59.47
66.61
72.39
77.80
82.81
86.64
90.05
## Expend
31.35
53.14
55.51
55.54
58.28
59.50
59.55
61.70
##
9 comps
10 comps
11 comps
12 comps
13 comps
14 comps
15 comps
## X
93.06
95.65
97.16
98.16
99.03
99.66
99.88
## Expend
61.79
62.02
63.57
63.99
64.07
67.43
67.91
##
16 comps
## X
100.00
## Expend
68.26
validationplot
(pcr.fit,
val.type=
"MSEP"
)
7
0
5
10
15
1.0e+07
2.0e+07
Expend
number of components
MSEP
#Based on the plot, MSE is lowest when N components = 16, we use the full model.
# M = 7 is the optimal Principal Components using CV, keeping 85% info of data
#MSE for Training
pcr.pred.train <-
predict
(pcr.fit,college.train,
ncomp=
7
)
mean
((pcr.pred.train
-
college.train
$
Expend)
^
2
)
## [1] 10734336
#MSE for Testing
pcr.pred.test <-
predict
(pcr.fit,college.test,
ncomp=
7
)
mean
((pcr.pred.test
-
college.test
$
Expend)
^
2
)
## [1] 12435645
b)
#PLS Model
library
(pls)
library
(ggplot2)
library
(ISLR)
library
(readr)
set.seed
(
1128
)
#Fitting PLS model on training set
8
pls.fit <-
plsr
(Expend
~
.,
data =
college.train,
scale=
TRUE
,
validation =
"CV"
)
#Keeping 85% threshold of X matrix
summary
(pls.fit)
# 9 Components explain 85% var of X and account for 68% of Expend
## Data:
X dimension: 1400 16
##
Y dimension: 1400 1
## Fit method: kernelpls
## Number of components considered: 16
##
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
##
(Intercept)
1 comps
2 comps
3 comps
4 comps
5 comps
6 comps
## CV
5155
3438
3261
3127
3053
3002
2983
## adjCV
5155
3437
3260
3126
3050
2999
2980
##
7 comps
8 comps
9 comps
10 comps
11 comps
12 comps
13 comps
## CV
2973
2973
2970
2964
2964
2963
2963
## adjCV
2970
2970
2966
2961
2961
2960
2960
##
14 comps
15 comps
16 comps
## CV
2962
2963
2962
## adjCV
2959
2959
2958
##
## TRAINING: % variance explained
##
1 comps
2 comps
3 comps
4 comps
5 comps
6 comps
7 comps
8 comps
## X
30.38
50.25
64.89
69.15
72.77
75.99
78.11
82.18
## Expend
55.97
60.55
63.97
65.97
67.22
67.73
67.97
68.03
##
9 comps
10 comps
11 comps
12 comps
13 comps
14 comps
15 comps
## X
84.23
86.64
90.96
92.93
95.78
97.52
99.13
## Expend
68.13
68.21
68.23
68.24
68.25
68.25
68.26
##
16 comps
## X
100.00
## Expend
68.26
validationplot
(pls.fit,
val.type=
"MSEP"
)
9
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help
0
5
10
15
1.0e+07
2.0e+07
Expend
number of components
MSEP
# This model shows a sharper drop in MSE
# If we wish to stick to the 85% mark, 9 Comp is ok, but 6 or 7 can get the job done.
#MSE for Training
pls.pred.train <-
predict
(pls.fit,college.train,
ncomp=
9
)
mean
((pls.pred.train
-
college.train
$
Expend)
^
2
)
## [1] 8455719
#MSE for Testing
pls.pred.test <-
predict
(pls.fit,college.test,
ncomp=
9
)
mean
((pls.pred.test
-
college.test
$
Expend)
^
2
)
## [1] 9633312
Question 3
a)
#BIC Model
m3full <-
lm
(Expend
~
.,
data=
college.train)
summary
(m3full)
10
##
## Call:
## lm(formula = Expend ~ ., data = college.train)
##
## Residuals:
##
Min
1Q Median
3Q
Max
##
-8941
-1422
-263
819
32325
##
## Coefficients:
##
Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6160.27522
814.72305
7.561 7.24e-14 ***
## Apps
0.72830
0.07592
9.593
< 2e-16 ***
## Accept
-0.95921
0.15307
-6.267 4.92e-10 ***
## Enroll
0.92317
0.43755
2.110
0.03505 *
## Top10perc
125.79354
11.61572
10.830
< 2e-16 ***
## Top25perc
-59.04195
9.37418
-6.298 4.03e-10 ***
## F.Undergrad
-0.12285
0.07404
-1.659
0.09730 .
## P.Undergrad
0.04972
0.05948
0.836
0.40333
## Outstate
0.47114
0.03638
12.951
< 2e-16 ***
## Room.Board
0.05946
0.10301
0.577
0.56388
## Books
0.87815
0.54049
1.625
0.10445
## Personal
0.25173
0.13414
1.877
0.06078 .
## PhD
11.89817
9.39622
1.266
0.20563
## Terminal
25.54205
10.29774
2.480
0.01324 *
## S.F.Ratio
-334.15330
25.53823 -13.084
< 2e-16 ***
## perc.alumni
3.71509
8.50341
0.437
0.66226
## Grad.Rate
-23.34631
6.39241
-3.652
0.00027 ***
## ---
## Signif. codes:
0 ’***’ 0.001 ’**’ 0.01 ’*’ 0.05 ’.’ 0.1 ’ ’ 1
##
## Residual standard error: 2920 on 1383 degrees of freedom
## Multiple R-squared:
0.6826, Adjusted R-squared:
0.6789
## F-statistic: 185.9 on 16 and 1383 DF,
p-value: < 2.2e-16
backwardBIC <-
step
(m3full,
direction =
"backward"
,
data =
college.train,
k =
log
(
nrow
(college.train)))
## Start:
AIC=22448.29
## Expend ~ Apps + Accept + Enroll + Top10perc + Top25perc + F.Undergrad +
##
P.Undergrad + Outstate + Room.Board + Books + Personal +
##
PhD + Terminal + S.F.Ratio + perc.alumni + Grad.Rate
##
##
Df
Sum of Sq
RSS
AIC
## - perc.alumni
1
1627593 1.1794e+10 22441
## - Room.Board
1
2841112 1.1796e+10 22441
## - P.Undergrad
1
5958766 1.1799e+10 22442
## - PhD
1
13672457 1.1806e+10 22443
## - Books
1
22509277 1.1815e+10 22444
## - F.Undergrad
1
23474376 1.1816e+10 22444
## - Personal
1
30029069 1.1823e+10 22445
## - Enroll
1
37957696 1.1831e+10 22446
## - Terminal
1
52459134 1.1845e+10 22447
## <none>
1.1793e+10 22448
## - Grad.Rate
1
113736655 1.1906e+10 22455
## - Accept
1
334851190 1.2128e+10 22480
11
## - Top25perc
1
338258037 1.2131e+10 22481
## - Apps
1
784746187 1.2578e+10 22531
## - Top10perc
1 1000040366 1.2793e+10 22555
## - Outstate
1 1430171436 1.3223e+10 22601
## - S.F.Ratio
1 1459832373 1.3253e+10 22604
##
## Step:
AIC=22441.23
## Expend ~ Apps + Accept + Enroll + Top10perc + Top25perc + F.Undergrad +
##
P.Undergrad + Outstate + Room.Board + Books + Personal +
##
PhD + Terminal + S.F.Ratio + Grad.Rate
##
##
Df
Sum of Sq
RSS
AIC
## - Room.Board
1
2523617 1.1797e+10 22434
## - P.Undergrad
1
6116451 1.1800e+10 22435
## - PhD
1
13611529 1.1808e+10 22436
## - Books
1
22366146 1.1817e+10 22437
## - F.Undergrad
1
24634351 1.1819e+10 22437
## - Personal
1
28993481 1.1823e+10 22437
## - Enroll
1
40186508 1.1835e+10 22439
## - Terminal
1
53644334 1.1848e+10 22440
## <none>
1.1794e+10 22441
## - Grad.Rate
1
112520334 1.1907e+10 22447
## - Top25perc
1
337554910 1.2132e+10 22474
## - Accept
1
339899172 1.2134e+10 22474
## - Apps
1
783128499 1.2578e+10 22524
## - Top10perc
1 1014680805 1.2809e+10 22550
## - S.F.Ratio
1 1474494785 1.3269e+10 22599
## - Outstate
1 1541042978 1.3335e+10 22606
##
## Step:
AIC=22434.29
## Expend ~ Apps + Accept + Enroll + Top10perc + Top25perc + F.Undergrad +
##
P.Undergrad + Outstate + Books + Personal + PhD + Terminal +
##
S.F.Ratio + Grad.Rate
##
##
Df
Sum of Sq
RSS
AIC
## - P.Undergrad
1
6901836 1.1804e+10 22428
## - PhD
1
13164767 1.1810e+10 22429
## - Books
1
23024051 1.1820e+10 22430
## - F.Undergrad
1
24360553 1.1821e+10 22430
## - Personal
1
28334659 1.1825e+10 22430
## - Enroll
1
38607077 1.1836e+10 22432
## - Terminal
1
57885442 1.1855e+10 22434
## <none>
1.1797e+10 22434
## - Grad.Rate
1
110313614 1.1907e+10 22440
## - Accept
1
338529839 1.2135e+10 22467
## - Top25perc
1
341379288 1.2138e+10 22467
## - Apps
1
800038854 1.2597e+10 22519
## - Top10perc
1 1016519256 1.2813e+10 22543
## - S.F.Ratio
1 1476960637 1.3274e+10 22592
## - Outstate
1 1893007841 1.3690e+10 22635
##
## Step:
AIC=22427.86
## Expend ~ Apps + Accept + Enroll + Top10perc + Top25perc + F.Undergrad +
##
Outstate + Books + Personal + PhD + Terminal + S.F.Ratio +
12
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help
##
Grad.Rate
##
##
Df
Sum of Sq
RSS
AIC
## - PhD
1
15037633 1.1819e+10 22422
## - F.Undergrad
1
19983387 1.1824e+10 22423
## - Books
1
24844677 1.1829e+10 22424
## - Personal
1
32429170 1.1836e+10 22425
## - Enroll
1
38462741 1.1842e+10 22425
## - Terminal
1
57529394 1.1861e+10 22427
## <none>
1.1804e+10 22428
## - Grad.Rate
1
121372989 1.1925e+10 22435
## - Top25perc
1
341480664 1.2145e+10 22461
## - Accept
1
357413950 1.2161e+10 22462
## - Apps
1
823788120 1.2628e+10 22515
## - Top10perc
1 1012778629 1.2817e+10 22536
## - S.F.Ratio
1 1485696987 1.3290e+10 22587
## - Outstate
1 1931208015 1.3735e+10 22633
##
## Step:
AIC=22422.4
## Expend ~ Apps + Accept + Enroll + Top10perc + Top25perc + F.Undergrad +
##
Outstate + Books + Personal + Terminal + S.F.Ratio + Grad.Rate
##
##
Df
Sum of Sq
RSS
AIC
## - Books
1
19452131 1.1838e+10 22418
## - F.Undergrad
1
19817037 1.1839e+10 22418
## - Personal
1
32492881 1.1851e+10 22419
## - Enroll
1
38016971 1.1857e+10 22420
## <none>
1.1819e+10 22422
## - Grad.Rate
1
118957244 1.1938e+10 22429
## - Terminal
1
241369802 1.2060e+10 22444
## - Top25perc
1
342446852 1.2161e+10 22455
## - Accept
1
349420303 1.2168e+10 22456
## - Apps
1
817922760 1.2637e+10 22509
## - Top10perc
1 1077122885 1.2896e+10 22537
## - S.F.Ratio
1 1471350640 1.3290e+10 22579
## - Outstate
1 1943689663 1.3763e+10 22628
##
## Step:
AIC=22417.46
## Expend ~ Apps + Accept + Enroll + Top10perc + Top25perc + F.Undergrad +
##
Outstate + Personal + Terminal + S.F.Ratio + Grad.Rate
##
##
Df
Sum of Sq
RSS
AIC
## - F.Undergrad
1
19601780 1.1858e+10 22413
## - Enroll
1
37675603 1.1876e+10 22415
## - Personal
1
42999327 1.1881e+10 22415
## <none>
1.1838e+10 22418
## - Grad.Rate
1
123307527 1.1962e+10 22425
## - Terminal
1
248427716 1.2087e+10 22439
## - Top25perc
1
336166585 1.2174e+10 22449
## - Accept
1
353799856 1.2192e+10 22451
## - Apps
1
829700495 1.2668e+10 22505
## - Top10perc
1 1089888226 1.2928e+10 22534
## - S.F.Ratio
1 1460859872 1.3299e+10 22573
## - Outstate
1 1954974285 1.3793e+10 22624
13
##
## Step:
AIC=22412.53
## Expend ~ Apps + Accept + Enroll + Top10perc + Top25perc + Outstate +
##
Personal + Terminal + S.F.Ratio + Grad.Rate
##
##
Df
Sum of Sq
RSS
AIC
## - Enroll
1
19013534 1.1877e+10 22408
## - Personal
1
36489170 1.1894e+10 22410
## <none>
1.1858e+10 22413
## - Grad.Rate
1
114076513 1.1972e+10 22419
## - Terminal
1
236097589 1.2094e+10 22433
## - Accept
1
343184224 1.2201e+10 22445
## - Top25perc
1
363640759 1.2222e+10 22448
## - Apps
1
810806804 1.2669e+10 22498
## - Top10perc
1 1150513489 1.3008e+10 22535
## - S.F.Ratio
1 1486356412 1.3344e+10 22571
## - Outstate
1 2015806516 1.3874e+10 22625
##
## Step:
AIC=22407.53
## Expend ~ Apps + Accept + Top10perc + Top25perc + Outstate + Personal +
##
Terminal + S.F.Ratio + Grad.Rate
##
##
Df
Sum of Sq
RSS
AIC
## - Personal
1
44140991 1.1921e+10 22406
## <none>
1.1877e+10 22408
## - Grad.Rate
1
119437764 1.1996e+10 22414
## - Terminal
1
244851195 1.2122e+10 22429
## - Top25perc
1
380438087 1.2257e+10 22444
## - Accept
1
437983009 1.2315e+10 22451
## - Apps
1
792219767 1.2669e+10 22491
## - Top10perc
1 1308098232 1.3185e+10 22547
## - S.F.Ratio
1 1471314303 1.3348e+10 22564
## - Outstate
1 2096806680 1.3974e+10 22628
##
## Step:
AIC=22405.48
## Expend ~ Apps + Accept + Top10perc + Top25perc + Outstate + Terminal +
##
S.F.Ratio + Grad.Rate
##
##
Df
Sum of Sq
RSS
AIC
## <none>
1.1921e+10 22406
## - Grad.Rate
1
147988977 1.2069e+10 22416
## - Terminal
1
248745576 1.2170e+10 22427
## - Top25perc
1
390929485 1.2312e+10 22443
## - Accept
1
424613672 1.2346e+10 22447
## - Apps
1
800453200 1.2722e+10 22489
## - Top10perc
1 1347346841 1.3268e+10 22548
## - S.F.Ratio
1 1557236483 1.3478e+10 22570
## - Outstate
1 2062690562 1.3984e+10 22622
BICmodel <-
lm
(Expend
~
Apps
+
Accept
+
Top10perc
+
Top25perc
+
Outstate
+
Terminal
+
S.F.Ratio
+
Grad.Rate, college.train)
#MSE for Training
pred.bic.train <-
predict
(BICmodel,college.train)
mse.bic.train <-
mean
((college.train
$
Expend
-
pred.bic.train)
^
2
)
14
#MSe for Testing
pred.bic.test <-
predict
(BICmodel,college.test)
mse.bic.test <-
mean
((college.test
$
Expend
-
pred.bic.test)
^
2
)
mse.bic.test
## [1] 9701061
b)
library
(gam)
## Loading required package: splines
## Loading required package: foreach
## Loaded gam 1.20
#Fit GAM on Training using BIC predictors from (a)
set.seed
(
1128
)
gam1 <-
gam
(Expend
~
Apps
+
Accept
+
Enroll
+
Top10perc
+
Top25perc
+
Outstate
+
Terminal
+
S.F.Ratio
+
Grad.Rate,
family =
gaussian
(),
data =
college.train)
#MSE of Training
predict.gam.train <-
predict
(gam1,
newdata =
college.train)
mse.gam.train <-
mean
((college.train
$
Expend
-
predict.gam.train)
^
2
)
mse.gam.train
## [1] 8495993
#MSE of Testing
predict.gam.test <-
predict
(gam1,
newdata =
college.test)
mse.gam.test <-
mean
((college.test
$
Expend
-
predict.gam.test)
^
2
)
mse.gam.test
## [1] 9636946
gplot
(gam1)
#Plot
par
(
mfrow=
c
(
3
,
3
))
plot.Gam
(gam1,
se=
TRUE
,
col=
"red"
)
15
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help
0
20000
40000
0
Apps
partial for Apps
0
10000
20000
-30000
Accept
partial for Accept
0
2000
4000
6000
0
Enroll
partial for Enroll
0
20
40
60
80
0
Top10perc
partial for Top10perc
20
40
60
80
100
-2000
Top25perc
partial for Top25perc
5000
15000
-4000
Outstate
partial for Outstate
40
60
80
100
-2000
Terminal
partial for Terminal
10
20
30
40
-10000
S.F.Ratio
partial for S.F.Ratio
20
40
60
80
100
-2000
Grad.Rate
partial for Grad.Rate
Based on the outcome of the GAM model, out of the 9 BIC predictors, Accept, Enroll, Termina, and
Grad.Rate are the most troublesome predictors when ploted against response variable.
c)
gam1t <-
gam
(Expend
~
Apps
+
Accept
+
Enroll
+
Top10perc
+
Top25perc
+
Outstate
+
Terminal
+
S.F.Ratio
+
Grad.Rate,
family =
gaussian
(),
data =
college.test)
par
(
mfrow=
c
(
3
,
3
))
plot.Gam
(gam1t,
se=
TRUE
,
col=
"red"
)
#Same as above
16
0
5000
15000
0
Apps
partial for Apps
0
5000
10000
15000
-20000
Accept
partial for Accept
0
2000
4000
6000
0
Enroll
partial for Enroll
0
20
40
60
80
-4000
Top10perc
partial for Top10perc
20
40
60
80
100
-4000
Top25perc
partial for Top25perc
5000
15000
-4000
Outstate
partial for Outstate
40
60
80
100
-3000
Terminal
partial for Terminal
10
20
30
40
-8000
S.F.Ratio
partial for S.F.Ratio
20
40
60
80
120
-2000
Grad.Rate
partial for Grad.Rate
d)
The most notable non linear relationships are with Enroll, Accept, Apps and Terminal as their information
does not follows the response variable Expend
Question 4
Based on these models, it seems like our predictions will output good predictions but they can be further
improved with transformations.
Although the MSE is quite high, all models attained the lowest MSE
possible. Their error rates were quite similar as they all fell in the (800000 - 11000000) range except for my
PCR model.
#1. Least Squares
#MSE
Test
Train
#
9658396
8423397
#2. Ridge model with the best lambda
#MSE
Test
Train
#
9899790
8729050
#3. Lasso model with the best lambda
#MSE
Test
Train
#
9448019
8468312
17
#4. PCR model (Worst Model)
#MSE
Test
Train
#
12435645
10734336
#5. PLS model
#MSE
Test
Train
#
9633312
8455719
#6. BIC model
#MSE
Test
Train
#
9701061
8515039
#7. GAM
#MSE
Test
Train
#
9636946
8495993
18
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help
Recommended textbooks for you

EBK JAVA PROGRAMMING
Computer Science
ISBN:9781337671385
Author:FARRELL
Publisher:CENGAGE LEARNING - CONSIGNMENT
Programming Logic & Design Comprehensive
Computer Science
ISBN:9781337669405
Author:FARRELL
Publisher:Cengage

Programming with Microsoft Visual Basic 2017
Computer Science
ISBN:9781337102124
Author:Diane Zak
Publisher:Cengage Learning

Microsoft Visual C#
Computer Science
ISBN:9781337102100
Author:Joyce, Farrell.
Publisher:Cengage Learning,
Np Ms Office 365/Excel 2016 I Ntermed
Computer Science
ISBN:9781337508841
Author:Carey
Publisher:Cengage

EBK JAVA PROGRAMMING
Computer Science
ISBN:9781305480537
Author:FARRELL
Publisher:CENGAGE LEARNING - CONSIGNMENT
Recommended textbooks for you
- EBK JAVA PROGRAMMINGComputer ScienceISBN:9781337671385Author:FARRELLPublisher:CENGAGE LEARNING - CONSIGNMENTProgramming Logic & Design ComprehensiveComputer ScienceISBN:9781337669405Author:FARRELLPublisher:CengageProgramming with Microsoft Visual Basic 2017Computer ScienceISBN:9781337102124Author:Diane ZakPublisher:Cengage Learning
- Microsoft Visual C#Computer ScienceISBN:9781337102100Author:Joyce, Farrell.Publisher:Cengage Learning,Np Ms Office 365/Excel 2016 I NtermedComputer ScienceISBN:9781337508841Author:CareyPublisher:CengageEBK JAVA PROGRAMMINGComputer ScienceISBN:9781305480537Author:FARRELLPublisher:CENGAGE LEARNING - CONSIGNMENT

EBK JAVA PROGRAMMING
Computer Science
ISBN:9781337671385
Author:FARRELL
Publisher:CENGAGE LEARNING - CONSIGNMENT
Programming Logic & Design Comprehensive
Computer Science
ISBN:9781337669405
Author:FARRELL
Publisher:Cengage

Programming with Microsoft Visual Basic 2017
Computer Science
ISBN:9781337102124
Author:Diane Zak
Publisher:Cengage Learning

Microsoft Visual C#
Computer Science
ISBN:9781337102100
Author:Joyce, Farrell.
Publisher:Cengage Learning,
Np Ms Office 365/Excel 2016 I Ntermed
Computer Science
ISBN:9781337508841
Author:Carey
Publisher:Cengage

EBK JAVA PROGRAMMING
Computer Science
ISBN:9781305480537
Author:FARRELL
Publisher:CENGAGE LEARNING - CONSIGNMENT