I am paritcipating public kaggle competition for practice and when i submit my code i cant seem to get a better score. What can i do to get a better score.  All the variables are: Id, LotFrontage, LotArea, BldgType, OverallQual, OverallCond, FullBath, HalfBath, TotRmsAbvGrd, YearBuilt, TotalBsmtSF, Heating, BedroomAbvGr, CentralAir, GrLivArea, PoolArea, YrSold, SalePrice(The test dataset doesnt have SalePrice as that is the variable im predicting).

MATLAB: An Introduction with Applications
6th Edition
ISBN:9781119256830
Author:Amos Gilat
Publisher:Amos Gilat
Chapter1: Starting With Matlab
Section: Chapter Questions
Problem 1P
icon
Related questions
Question

I am paritcipating public kaggle competition for practice and when i submit my code i cant seem to get a better score. What can i do to get a better score. 

All the variables are:

Id, LotFrontage, LotArea, BldgType, OverallQual, OverallCond, FullBath, HalfBath, TotRmsAbvGrd, YearBuilt, TotalBsmtSF, Heating, BedroomAbvGr, CentralAir, GrLivArea, PoolArea, YrSold, SalePrice(The test dataset doesnt have SalePrice as that is the variable im predicting).

requirements.R

library(tidyverse)

library(ggplot2)

library(mgcv)

library(glmnet)

requirements.R x
build_features.R x
ी | B Source on Save
train_model.R x
train_match <-
train[, !colnames (train) %in% "SalePrice"]
#combine for cleaning
rbind(train_match, test)
train x
coefficients *
1 ###Reading data
2 train <- read.csv("project/volume/data/raw/Stat_380_train.csv")
3 test <- read.csv("project/volume/data/raw/Stat_380_test.csv")
4 submission <- read.csv("project/volume/data/raw/Stat_380_sample_submission.csv")
5
6 #match train and test data
7
8
9
10
11 trainAndTest <-
12
13
➡Run
trainClean >>
Source
G
14 #fill in missing values for LotFrontage
15
meanLot Frontage <- mean(train$LotFrontage, na.rm = TRUE)
16 trainAndTest$Lot Frontage [is.na(trainAndTest$LotFrontage)] <- meanLotFrontage
17
18 #change to numbers
19 trainAndTest$BldgType <- as.numeric(as.factor(trainAnd Test$BldgType))
20
trainAndTest$Heating <-as.numeric(as.factor(trainAndTest$Heating))
21 trainAndTest$CentralAir <- as.numeric(as.factor(trainAndTest$CentralAir))
22
23 #split cleaned data back up
24 trainCleaned <- trainAndTest [1:nrow(train), ]
25
testCleaned <- trainAndTest[(nrow(train) + 1):nrow(trainAndTest), ]
26
27
#add Sale Price Back to Train
28 trainCleaned$Sale Price <- train$Sale Price
29
30
31
32
33 #Save the cleaned test dataset
34
write.csv(testCleaned, file = "project/volume/data/raw/test_cleaned.csv", row.names =
FALSE)
35
36
# Save the cleaned training dataset
write.csv(trainCleaned, file "project/volume/data/raw/train_cleaned.csv", row. names
FALSE)
Transcribed Image Text:requirements.R x build_features.R x ी | B Source on Save train_model.R x train_match <- train[, !colnames (train) %in% "SalePrice"] #combine for cleaning rbind(train_match, test) train x coefficients * 1 ###Reading data 2 train <- read.csv("project/volume/data/raw/Stat_380_train.csv") 3 test <- read.csv("project/volume/data/raw/Stat_380_test.csv") 4 submission <- read.csv("project/volume/data/raw/Stat_380_sample_submission.csv") 5 6 #match train and test data 7 8 9 10 11 trainAndTest <- 12 13 ➡Run trainClean >> Source G 14 #fill in missing values for LotFrontage 15 meanLot Frontage <- mean(train$LotFrontage, na.rm = TRUE) 16 trainAndTest$Lot Frontage [is.na(trainAndTest$LotFrontage)] <- meanLotFrontage 17 18 #change to numbers 19 trainAndTest$BldgType <- as.numeric(as.factor(trainAnd Test$BldgType)) 20 trainAndTest$Heating <-as.numeric(as.factor(trainAndTest$Heating)) 21 trainAndTest$CentralAir <- as.numeric(as.factor(trainAndTest$CentralAir)) 22 23 #split cleaned data back up 24 trainCleaned <- trainAndTest [1:nrow(train), ] 25 testCleaned <- trainAndTest[(nrow(train) + 1):nrow(trainAndTest), ] 26 27 #add Sale Price Back to Train 28 trainCleaned$Sale Price <- train$Sale Price 29 30 31 32 33 #Save the cleaned test dataset 34 write.csv(testCleaned, file = "project/volume/data/raw/test_cleaned.csv", row.names = FALSE) 35 36 # Save the cleaned training dataset write.csv(trainCleaned, file "project/volume/data/raw/train_cleaned.csv", row. names FALSE)
Ⓡrequirements.R x
build_features.R X
Source on Save
| train_model.R X
data =
=
#Saving the submission file
write.csv(x = submission, file
train x
2
traîn <- read.csv("project/volume/data/raw/Stat_380_train.csv",
3 test <- read.csv("project/volume/data/raw/Stat_380_test.csv")
4 submission <- read.csv("project/volume/data/raw/Stat_380_sample_submission.csv")
5
6 #shrinkage method
7 X <- model.matrix(SalePrice
8
Y
trainCleaned $Sale Price
9 ridge.model <- cv.glmnet(X, Y, alpha 0, nfolds
10 plot(ridge.model)
11 bestlam <- ridge.model$lambda.min
12 bestlam
trainCleaned) [, -1]
=
100)
coefficients *
Run
13 coefficients <- predict(ridge.model, s = bestlam, type "coefficients")
14
15 coefficients <- as.matrix(coefficients)
16 selected_vars<- rownames(coefficients)
17 selected_coeffs <- coefficients [, "s1"]
18 selected_vars
19
selected_coeffs
20
21 #select relevent variables
22
23
24
25
26
27
28
29
test_selected <- testCleaned [, selected_vars]
30 test_selected_matrix <- as.matrix(test_selected)
31
32 #Using fitted model to predict the sales price of test set
33
predictions <- predict.glmnet(object = model, newx = test_selected_matrix)
test$SalePrice <- predictions [, 1]
34
35
36
37 #Saving the predicted sales price in the submission file
38
#making sure we have the right order
39
all.equal (submission$Id, test$Id)
40
submission $Sale Price <- test$Sale Price
41
42
43
selected_vars <- c("LotFrontage", "LotArea", "OverallQual", "OverallCond",
"TotRmsAbvGrd", "YearBuilt", "BedroomAbvGr",
"CentralAir", "GrLivArea", "PoolArea")
X_selected <- model.matrix(SalePrice
model <- glmnet(X_selected, Y, alpha 0, lambda = bestlam)
trainClean
data trainCleaned) [, selected_vars]
"FullBath",
Source
"project/volume/models/submission.csv", row. names
F)
Transcribed Image Text:Ⓡrequirements.R x build_features.R X Source on Save | train_model.R X data = = #Saving the submission file write.csv(x = submission, file train x 2 traîn <- read.csv("project/volume/data/raw/Stat_380_train.csv", 3 test <- read.csv("project/volume/data/raw/Stat_380_test.csv") 4 submission <- read.csv("project/volume/data/raw/Stat_380_sample_submission.csv") 5 6 #shrinkage method 7 X <- model.matrix(SalePrice 8 Y trainCleaned $Sale Price 9 ridge.model <- cv.glmnet(X, Y, alpha 0, nfolds 10 plot(ridge.model) 11 bestlam <- ridge.model$lambda.min 12 bestlam trainCleaned) [, -1] = 100) coefficients * Run 13 coefficients <- predict(ridge.model, s = bestlam, type "coefficients") 14 15 coefficients <- as.matrix(coefficients) 16 selected_vars<- rownames(coefficients) 17 selected_coeffs <- coefficients [, "s1"] 18 selected_vars 19 selected_coeffs 20 21 #select relevent variables 22 23 24 25 26 27 28 29 test_selected <- testCleaned [, selected_vars] 30 test_selected_matrix <- as.matrix(test_selected) 31 32 #Using fitted model to predict the sales price of test set 33 predictions <- predict.glmnet(object = model, newx = test_selected_matrix) test$SalePrice <- predictions [, 1] 34 35 36 37 #Saving the predicted sales price in the submission file 38 #making sure we have the right order 39 all.equal (submission$Id, test$Id) 40 submission $Sale Price <- test$Sale Price 41 42 43 selected_vars <- c("LotFrontage", "LotArea", "OverallQual", "OverallCond", "TotRmsAbvGrd", "YearBuilt", "BedroomAbvGr", "CentralAir", "GrLivArea", "PoolArea") X_selected <- model.matrix(SalePrice model <- glmnet(X_selected, Y, alpha 0, lambda = bestlam) trainClean data trainCleaned) [, selected_vars] "FullBath", Source "project/volume/models/submission.csv", row. names F)
Expert Solution
trending now

Trending now

This is a popular solution!

steps

Step by step

Solved in 3 steps

Blurred answer
Similar questions
Recommended textbooks for you
MATLAB: An Introduction with Applications
MATLAB: An Introduction with Applications
Statistics
ISBN:
9781119256830
Author:
Amos Gilat
Publisher:
John Wiley & Sons Inc
Probability and Statistics for Engineering and th…
Probability and Statistics for Engineering and th…
Statistics
ISBN:
9781305251809
Author:
Jay L. Devore
Publisher:
Cengage Learning
Statistics for The Behavioral Sciences (MindTap C…
Statistics for The Behavioral Sciences (MindTap C…
Statistics
ISBN:
9781305504912
Author:
Frederick J Gravetter, Larry B. Wallnau
Publisher:
Cengage Learning
Elementary Statistics: Picturing the World (7th E…
Elementary Statistics: Picturing the World (7th E…
Statistics
ISBN:
9780134683416
Author:
Ron Larson, Betsy Farber
Publisher:
PEARSON
The Basic Practice of Statistics
The Basic Practice of Statistics
Statistics
ISBN:
9781319042578
Author:
David S. Moore, William I. Notz, Michael A. Fligner
Publisher:
W. H. Freeman
Introduction to the Practice of Statistics
Introduction to the Practice of Statistics
Statistics
ISBN:
9781319013387
Author:
David S. Moore, George P. McCabe, Bruce A. Craig
Publisher:
W. H. Freeman