Supplement
Rmd
keyboard_arrow_up
School
Wake Tech *
*We aren’t endorsed by this school
Course
320
Subject
Industrial Engineering
Date
Dec 6, 2023
Type
Rmd
Pages
6
Uploaded by MateScience9335
---
title: "Supplement for Modeling 3"
author: "Mario Giacomazzo"
date: "`r format(Sys.time(), '%B %d, %Y')`"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE,warning=F)
options(scipen=999)
library(tidyverse)
#Essential Functions
library(modelr)
#Helpful Functions in Modeling
library(xtable)
DATA=read_csv("AirWaterTemp.csv",col_types=cols()) #River Data
```
# Introduction
Today, we will work with daily water temperature and air temperature data observed
for `r length(unique(DATA$L))` rivers in Spain. The goal of this tutorial is to
identify the best model for predicting the maximum water temperature given the
maximum air temperature. In the preview below, `W` represents the daily maximum
water temperature and `A` represents the daily maximum air temperature. The data
contains almost a full year of data for each of the `r length(unique(DATA$L))`
different rivers.
```{r,echo=F}
head(DATA)
```
# Part 1: Examining the Relationship
## Chunk 1: Overall Relationship
```{r,echo=F,eval=T,message=F}
ggplot(data=DATA) +
geom_point(aes(x=A,y=W),alpha=0.3)+
geom_smooth(aes(x=A,y=W)) +
theme_minimal()
```
## Chunk 2: Location-Specific Relationship
```{r,echo=F,eval=T}
WAPLOT.func=function(Location){
DATA %>% filter(L == Location) %>%
ggplot()+
geom_point(aes(x=A,y=W),alpha=0.3)+
geom_smooth(aes(x=A,y=W)) +
theme_minimal()
}
WAPLOT.func(103)
WAPLOT.func(105)
WAPLOT.func(918)
```
## Chunk 3: Split Data into Train and Test Sets
```{r,eval=T}
set.seed(216)
TEST.LOCATIONS=sample(x=unique(DATA$L),size=3,replace=F)
TRAIN = anti_join(DATA,tibble(L=TEST.LOCATIONS),by="L")
TEST = semi_join(DATA,tibble(L=TEST.LOCATIONS),by="L")
```
## Chunk 4: Plots of Relationship for Train and Test Data
```{r,echo=F,eval=T}
WAPLOT2.func=function(DATA){
ggplot(data=DATA)+
geom_point(aes(x=A,y=W),alpha=0.3)+
geom_smooth(aes(x=A,y=W)) +
theme_minimal()
}
WAPLOT2.func(TRAIN)
WAPLOT2.func(TEST)
```
# Part 2: Linear Regression Model
## Chunk 1: Fitting Linear Model to Train Data
```{r,echo=F,eval=T}
linmod=lm(W~A,data=TRAIN)
summary(linmod)
```
## Chunk 2: Getting Predictions from Linear Model
```{r,eval=T}
TRAIN2 = TRAIN %>% add_predictions(linmod,var="linpred")
TEST2 = TEST %>% add_predictions(linmod,var="linpred")
```
## Chunk 3: Getting Residuals from Linear Model
```{r,eval=T}
TRAIN3 = TRAIN2 %>% add_residuals(linmod,var="linres")
TEST3 = TEST2 %>% add_residuals(linmod,var="linres")
```
# Part 3: Polynomial Regression Model
## Chunk 1: Fitting Polynomial Regression Models
```{r,eval=T}
poly2mod=lm(W~A+I(A^2),data=TRAIN)
poly3mod=lm(W~A+I(A^2)+I(A^3),data=TRAIN)
poly4mod=lm(W~A+I(A^2)+I(A^3)+I(A^4),data=TRAIN)
anova(linmod,poly2mod,poly3mod,poly4mod,test="Chisq")
```
## Chunk 2: Getting Predictions from Polynomial Models
```{r,eval=T}
TRAIN4 =TRAIN3 %>%
add_predictions(poly2mod,var="poly2pred") %>%
add_predictions(poly3mod,var="poly3pred") %>%
add_predictions(poly4mod,var="poly4pred")
TEST4 =TEST3 %>%
add_predictions(poly2mod,var="poly2pred") %>%
add_predictions(poly3mod,var="poly3pred") %>%
add_predictions(poly4mod,var="poly4pred")
```
## Chunk 3: Getting Residuals from Polynomial Models
```{r,eval=T}
TRAIN5 =TRAIN4 %>%
add_residuals(poly2mod,var="poly2res") %>%
add_residuals(poly3mod,var="poly3res") %>%
add_residuals(poly4mod,var="poly4res")
TEST5 =TEST4 %>%
add_residuals(poly2mod,var="poly2res") %>%
add_residuals(poly3mod,var="poly3res") %>%
add_residuals(poly4mod,var="poly4res")
```
# Part 4: Nonlinear Logistic Model
## Chunk 1: Logistic Model Investigation
```{r,echo=F,eval=F}
set.seed(216)
EXAMPLE=tibble(
x=rnorm(10000,mean=0,sd=5),
y=7+12/(1+exp(4-1*x))
)
ggplot(data=EXAMPLE) +
geom_point(aes(x=x,y=y)) +
theme_minimal()
```
## Chunk 2: Essential Functions for Nonlinear Logistic Model
```{r,eval=F}
logistic.model=function(COEF,DATA){
pred=COEF[1]+COEF[2]/(1+exp(COEF[3]-COEF[4]*DATA$A))
}
MSE.logistic=function(COEF,DATA){
error=DATA$W-logistic.model(DATA=DATA,COEF=COEF)
sq.error=error^2
mse=mean(sq.error,na.rm=T)
return(mse)
}
```
## Chunk 3: Estimate Parameters for Nonlinear Logistic Model
```{r,eval=F}
logistic.mod=optim(
par=c(min(TRAIN$W,na.rm=T),
max(TRAIN$W,na.rm=T)-min(TRAIN$W,na.rm=T),
median(TRAIN$A,na.rm=T),
1),
#Smart Starting Values
fn=MSE.logistic,
#Function to Minimize
DATA=TRAIN
#Required Argument
)
print(logistic.mod)
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help
```
## Chunk 4: Obtain Predictions and Residuals
```{r,eval=F}
TRAIN6=TRAIN5 %>% mutate(logpred=logistic.model(COEF=logistic.mod$par,DATA=TRAIN5),
logres=W-logpred)
TEST6=TEST5 %>% mutate(logpred=logistic.model(COEF=logistic.mod$par,DATA=TEST5),
logres=W-logpred)
```
# Intermission:
The function `save.image()` in R can be used to save all objects in the global
environment. This is very helpful when you want work off your results without
rerunning all previous R code. The name of the exported information should contain
the file extension *.Rdata*. These files can be extremely large depending how much
RAM was utilized in your R session.
The function `load()` can be used to import a
previous workspace.
For more information on *.Rdata* file types, see
https://fileinfo.com/extension/rdata for help.
```{r,eval=F}
save.image("Tutorial.Rdata")
```
# Part 5: Evaluation by Visualization
## Chunk 1: Plotting Models
```{r,echo=F,eval=F}
TEST6 %>%
select(L,A,W,linpred,poly2pred,poly3pred,poly4pred,logpred)%>%
gather(linpred:logpred,key="Model",value="Pred",factor_key=T) %>%
ggplot() +
geom_point(aes(x=A,y=W),color="gray") +
theme_minimal() +
geom_line(aes(x=A,y=Pred,color=Model),size=2)
```
## Chunk 2: Predicted Versus Actual Max Water Temperature
```{r,echo=F,eval=F}
TEST6 %>%
select(L,A,W,linpred,poly2pred,poly3pred,poly4pred,logpred)%>%
gather(linpred:logpred,key="Model",value="Pred",factor_key=T) %>%
ggplot() +
geom_point(aes(x=W,y=Pred,color=Model)) +
geom_abline(a=0,b=1,color="black",size=2) +
xlab("Maximum Water Temperature") +
ylab("Predicted Water Temperature") +
theme_minimal()
```
## Chunk 3: Plotting Residuals Versus Time
```{r,echo=F,eval=F}
TEST6 %>%
select(L,A,W,TIME,linres,poly2res,poly3res,poly4res,logres)%>%
gather(linres:logres,key="Model",value="Res",factor_key=T) %>%
ggplot() +
geom_line(aes(x=TIME,y=Res),color="lightskyblue2") +
geom_hline(yintercept=0,color="black",linetype=2,size=1.5) +
xlab("Time") +
ylab("Residual") +
theme_dark() +
facet_grid(Model~.)
```
## Chunk 4: Evaluating The Location-Specific Error
```{r,echo=F,eval=F}
TEST6 %>%
select(L,A,W,TIME,linpred,logpred) %>%
gather(linpred:logpred,key="Model",value="Pred",factor_key=T) %>%
ggplot() +
geom_point(aes(x=A,y=W),alpha=0.2) +
geom_line(aes(x=A,y=Pred,color=Model),size=2) +
theme_minimal()+facet_grid(L~.)
TEST6 %>%
select(L,A,W,TIME,linres,logres) %>%
gather(linres:logres,key="Model",value="Res",factor_key=T) %>%
ggplot() +
geom_point(aes(x=A,y=Res,color=Model)) +
geom_hline(yintercept=0) +
theme_minimal()+facet_grid(L~.)
TEST6 %>%
select(L,A,W,TIME,linres,logres) %>%
gather(linres:logres,key="Model",value="Res",factor_key=T) %>%
ggplot() +
geom_point(aes(x=TIME,y=Res,color=Model)) +
geom_hline(yintercept=0) +
theme_minimal()+facet_grid(L~.)
```
# Part 6: Evaluation by Numerical Summary
## Chunk 1: Functions Required For Evaluating Prediction
```{r,eval=F}
bias.func=function(res){
bias=mean(res,na.rm=T)
return(bias)
}
mae.func=function(res){
mae=mean(abs(res),na.rm=T)
return(mae)
}
rmse.func=function(res){
mse=mean(res^2,na.rm=T)
rmse=sqrt(mse)
return(rmse)
}
```
## Chunk 2: Checking Functions
```{r,eval=F}
ex.res=TEST6$linres
c(bias.func(ex.res),mae.func(ex.res),rmse.func(ex.res))
ex.res.mat=TEST6 %>% select(linres,poly2res,poly3res,poly4res,logres)
apply(ex.res.mat,2,bias.func)
apply(ex.res.mat,2,mae.func)
apply(ex.res.mat,2,rmse.func)
```
## Chunk 3: Get Table Quickly
```{r,echo=F,eval=F}
SUMM1=TEST6 %>%
select(L,A,W,TIME,linres,poly2res,poly3res,poly4res,logres)%>%
rename(Linear=linres,`Poly(2)`=poly2res,`Poly(3)`=poly3res,`Poly(4)`=poly4res,Logis
tic=logres)%>%
gather(Linear:Logistic,key="Model",value="Residual",factor_key=T)
SUMM2= SUMM1 %>%
group_by(Model) %>%
summarize(MB=bias.func(Residual),
MAE=mae.func(Residual),
RMSE=rmse.func(Residual))
print(SUMM2)
```
## Chunk 4: HTML Formatted Table
```{r,echo=F,eval=F}
SUMM3=xtable(SUMM2,digits=4,align=c("l","l","r","r","r"))
print(SUMM3,include.rownames=F,type="html")
```
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help