worksheet_regression1
pdf
keyboard_arrow_up
School
University of the Fraser Valley *
*We aren’t endorsed by this school
Course
176
Subject
Statistics
Date
Feb 20, 2024
Type
Pages
3
Uploaded by PrivateIron6895
15/06/2023, 11:36
worksheet_regression1
file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html
1/11
tidymodels
answer0.0
"F"
In [ ]:
### Run this cell before continuing.
library
(
tidyverse
)
library
(
repr
)
library
(
tidymodels
)
options
(
repr.matrix.max.rows =
6
)
source
(
"tests.R"
)
source
(
'cleanup.R'
)
In [ ]:
### BEGIN SOLUTION
answer0.0 <-
"B"
15/06/2023, 11:36
worksheet_regression1
file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html
2/11
answer0.1
answer0.2
### END SOLUTION
answer0.0
In [ ]:
test_0.0
()
In [ ]:
### BEGIN SOLUTION
answer0.1 <-
"C"
### END SOLUTION
answer0.1
In [ ]:
test_0.1
()
In [ ]:
### BEGIN SOLUTION
answer0.2 <-
sqrt
(((
1 -
1.3
)
^
2 +
(
1 -
1.5
)
^
2 +
(
2 -
1.5
)
^
2 +
(
3 -
2
)
^
2 +
(
5 ### END SOLUTION
answer0.2
15/06/2023, 11:36
worksheet_regression1
file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html
3/11
answer0.3
In [ ]:
test_0.2
()
In [ ]:
### BEGIN SOLUTION
answer0.3 <-
"A"
### END SOLUTION
answer0.3
In [ ]:
test_0.3
()
15/06/2023, 11:36
worksheet_regression1
file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html
4/11
marathon.csv
data/
marathon
time_hrs
max
marathon_50
time_hrs
max
answer2
sample_n()
In [ ]:
### BEGIN SOLUTION
marathon <-
read_csv
(
'data/marathon.csv'
)
### END SOLUTION
marathon
In [ ]:
test_1.0
()
In [ ]:
options
(
repr.plot.width =
8
, repr.plot.height =
7
)
set.seed
(
2000
) ### DO NOT CHANGE
#... <- ... |>
# sample_n(...)
### BEGIN SOLUTION
marathon_50 <-
marathon |>
sample_n
(
50
)
answer2 <-
marathon_50 |>
ggplot
(
aes
(
x =
max
, y =
time_hrs
)) +
geom_point
(
alpha =
0.5
) +
xlab
(
"Maximum Distance Ran per\nWeek During Training (miles)"
) +
ylab
(
"Race Time (hours)"
) +
theme
(
text =
element_text
(
size =
20
))
### END SOLUTION
answer2
In [ ]:
test_2.0
()
15/06/2023, 11:36
worksheet_regression1
file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html
5/11
answer3
answer4
In [ ]:
# run this cell to see a visualization of the 4 nearest neighbours
options
(
repr.plot.height =
6
, repr.plot.width =
7
)
marathon_50 |>
ggplot
(
aes
(
x =
max
, y =
time_hrs
)) +
geom_point
(
color =
'dodgerblue'
, alpha =
0.4
) +
geom_vline
(
xintercept =
100
, linetype =
"dotted"
) +
xlab
(
"Maximum Distance Ran per \n Week During Training (mi)"
) +
ylab
(
"Race Time (hours)"
) +
geom_segment
(
aes
(
x =
100
, y =
2.56
, xend =
107
, yend =
2.56
), col =
geom_segment
(
aes
(
x =
100
, y =
2.65
, xend =
90
, yend =
2.65
), col =
"
geom_segment
(
aes
(
x =
100
, y =
2.99
, xend =
86
, yend =
2.99
), col =
"
geom_segment
(
aes
(
x =
100
, y =
3.05
, xend =
82
, yend =
3.05
), col =
"
theme
(
text =
element_text
(
size =
20
))
In [ ]:
#... <- ... |> # mutate(diff = abs(100 - ...)) |> # ...(diff) |> # slice(...) |> # summarise(predicted = ...(...)) |>
# pull()
### BEGIN SOLUTION
answer3 <-
marathon_50 |>
mutate
(
diff =
abs
(
100 -
max
)) |>
arrange
(
diff
) |>
slice
(
1
:
4
) |>
summarise
(
predicted =
mean
(
time_hrs
)) |>
pull
()
### END SOLUTION
answer3
In [ ]:
test_3.0
()
In [ ]:
### BEGIN SOLUTION
answer4 <-
marathon_50 |>
mutate
(
diff =
abs
(
100 -
max
)) |>
arrange
(
diff
) |>
slice
(
1
:
2
) |>
15/06/2023, 11:36
worksheet_regression1
file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html
6/11
answer5
"F"
tidymodels
marathon
time_hrs
strata
marathon_split
training
testing
marathon_training
marathon_testing
summarise
(
predicted =
mean
(
time_hrs
)) |>
pull
()
### END SOLUTION
answer4
In [ ]:
test_4.0
()
In [ ]:
### BEGIN SOLUTION
answer5 <-
"C"
### END SOLUTION
answer5
In [ ]:
test_5.0
()
In [ ]:
set.seed
(
2000
) ### DO NOT CHANGE
#... <- initial_split(..., prop = ..., strata = ...)
#... <- training(...)
#... <- testing(...)
15/06/2023, 11:36
worksheet_regression1
file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html
7/11
"kknn"
set_mode("regression")
marathon_spec
marathon_recipe
### BEGIN SOLUTION
marathon_split <-
initial_split
(
marathon
, prop =
0.75
, strata =
time_hrs
)
marathon_training <-
training
(
marathon_split
)
marathon_testing <-
testing
(
marathon_split
)
### END SOLUTION
In [ ]:
test_6.0
()
In [ ]:
set.seed
(
1234
) #DO NOT REMOVE
#... <- nearest_neighbor(weight_func = ..., neighbors = ...) |> # set_engine(...) |>
# set_mode(...) #... <- recipe(... ~ ..., data = ...) |>
# step_scale(...) |>
# step_center(...)
# ### BEGIN SOLUTION
marathon_spec <-
nearest_neighbor
(
weight_func =
"rectangular"
, neighbors =
t
set_engine
(
"kknn"
) |>
set_mode
(
"regression"
)
marathon_recipe <-
recipe
(
time_hrs ~
max
, data =
marathon_training
) |>
step_scale
(
all_predictors
()) |>
step_center
(
all_predictors
())
### END SOLUTION
marathon_recipe
In [ ]:
test_7.0
()
15/06/2023, 11:36
worksheet_regression1
file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html
8/11
vfold_cv
marathon_vfold
strata
workflow
marathon_workflow
set_mode
tidymodels
neighbors
seq
gridvals
gridvals
marathon_results
In [ ]:
set.seed
(
1234
) # DO NOT REMOVE
### BEGIN SOLUTION
marathon_vfold <-
vfold_cv
(
marathon_training
, v =
5
, strata =
time_hrs
)
marathon_workflow <-
workflow
() |>
add_recipe
(
marathon_recipe
) |>
add_model
(
marathon_spec
)
### END SOLUTION
marathon_workflow
In [ ]:
test_7.1
()
In [ ]:
set.seed
(
2019
) # DO NOT CHANGE
### BEGIN SOLUTION
gridvals <-
tibble
(
neighbors =
seq
(
from =
1
, to =
81
, by =
10
))
marathon_results <-
marathon_workflow |>
tune_grid
(
resamples =
marathon_vfold
, grid =
gridval
collect_metrics
()
### END SOLUTION
marathon_results
In [ ]:
test_8.0
()
15/06/2023, 11:36
worksheet_regression1
file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html
9/11
marathon_min
neighbors
marathon_min
k_min
k_min
marathon_best_spec
marathon_best_fit
marathon_recipe
predict
metrics
marathon_summary
In [ ]:
set.seed
(
2020
) # DO NOT REMOVE
#... <- marathon_results |>
# filter(.metric == ...) |>
# arrange(...) |> # ...
### BEGIN SOLUTION
marathon_min <-
marathon_results |>
filter
(
.metric ==
"rmse"
) |>
arrange
(
mean
) |>
slice
(
1
)
### END SOLUTION
marathon_min
In [ ]:
test_8.1
()
In [ ]:
set.seed
(
1234
) # DO NOT REMOVE
#... <- marathon_min |>
# pull(...)
#... <- nearest_neighbor(weight_func = ..., neighbors = ...) |>
# set_engine(...) |>
# set_mode(...)
#... <- workflow() |>
# add_recipe(...) |>
# add_model(...) |>
15/06/2023, 11:36
worksheet_regression1
file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html
10/11
answer8.3
"true"
"false"
# fit(data = ...)
#... <- marathon_best_fit |>
# predict(...) |>
# bind_cols(...) |>
# metrics(truth = ..., estimate = ...)
### BEGIN SOLUTION
k_min <-
marathon_min |>
pull
(
neighbors
)
marathon_best_spec <-
nearest_neighbor
(
weight_func =
"rectangular"
, neighbor
set_engine
(
"kknn"
) |>
set_mode
(
"regression"
)
marathon_best_fit <-
workflow
() |>
add_recipe
(
marathon_recipe
) |>
add_model
(
marathon_best_spec
) |>
fit
(
data =
marathon_training
)
marathon_summary <-
marathon_best_fit |>
predict
(
marathon_testing
) |>
bind_cols
(
marathon_testing
) |>
metrics
(
truth =
time_hrs
, estimate =
.pred
) ### END SOLUTION
marathon_summary
In [ ]:
test_8.2
()
In [ ]:
### BEGIN SOLUTION
answer8.3 <-
"false"
### END SOLUTION
15/06/2023, 11:36
worksheet_regression1
file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html
11/11
max
time_hrs
predict
marathon_best_fit
marathon_training
marathon_training
bind_cols
marathon_preds
marathon_preds
marathon_plot
geom_point
geom_line
In [ ]:
test_8.3
()
In [ ]:
set.seed
(
2019
) # DO NOT CHANGE
options
(
repr.plot.width =
7
, repr.plot.height =
7
)
### BEGIN SOLUTION
marathon_preds <-
marathon_best_fit |>
predict
(
marathon_training
) |>
bind_cols
(
marathon_training
)
marathon_plot <-
ggplot
(
marathon_preds
, aes
(
x =
max
, y =
time_hrs
)) +
geom_point
(
alpha =
0.4
) +
xlab
(
"Maximum Distance Ran per \n Week During Training (mi)"
) +
ylab
(
"Race Time (hours)"
) +
geom_line
(
data =
marathon_preds
, aes
(
x =
max
, y =
.pred
), color ggtitle
(
paste0
(
"K = "
, k_min
)) +
theme
(
text =
element_text
(
size =
20
))
### END SOLUTION
marathon_plot
In [ ]:
test_9.0
()
In [ ]:
source
(
'cleanup.R'
)
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help
Related Documents
Related Questions
Prepare Multiple Regression Model in Matrix Notation?
arrow_forward
Find the equation y = po + B₁x of the least-squares line that best fits the given data points.
(2,9), (3,6), (5,3), (6,0)
The line is y=+ x
(Type integers or decimals.)
Jou
arrow_forward
Explain regression model When Estimator in Matrix Form?
arrow_forward
Define equation used for least squares regression of a line?
arrow_forward
Use Lagrange multipliers to find the dimensions of a rectangular box with the largest volume if the total surface area is 4cm^2 and write it as a 1x3 matrix, with the measurements in cm.
arrow_forward
For each of the choices of A and b that follow, determine whether the system Ax = b is consistent by examining how b relates to the column vectors of A. Explain your answers in each case.
arrow_forward
Suppose column 1 + column 3 + column 5 = 0 in a 4 by 5 matrix with four pivots. Which column has no pivot? What is the special solution? Describe N(A).
arrow_forward
Find the least squares regression line for the points.
(0, 5), (3, 3), (5, 1), (7, -4), (9, -5)
1.) y =
arrow_forward
Find the equation y = Bo + B₁x of the least-squares line that best fits the given data points.
(3,6), (4,4), (6,2), (7,0)
The line is y=
O
(Type integers or decimals.)
+
X.
arrow_forward
Find the equation y = ẞo + B₁x of the least-squares line that best fits the given data points.
(3,6), (4,4), (6,2), (7,0)
arrow_forward
SEE MORE QUESTIONS
Recommended textbooks for you

Trigonometry (MindTap Course List)
Trigonometry
ISBN:9781305652224
Author:Charles P. McKeague, Mark D. Turner
Publisher:Cengage Learning
Algebra & Trigonometry with Analytic Geometry
Algebra
ISBN:9781133382119
Author:Swokowski
Publisher:Cengage
Related Questions
- Prepare Multiple Regression Model in Matrix Notation?arrow_forwardFind the equation y = po + B₁x of the least-squares line that best fits the given data points. (2,9), (3,6), (5,3), (6,0) The line is y=+ x (Type integers or decimals.) Jouarrow_forwardExplain regression model When Estimator in Matrix Form?arrow_forward
- Define equation used for least squares regression of a line?arrow_forwardUse Lagrange multipliers to find the dimensions of a rectangular box with the largest volume if the total surface area is 4cm^2 and write it as a 1x3 matrix, with the measurements in cm.arrow_forwardFor each of the choices of A and b that follow, determine whether the system Ax = b is consistent by examining how b relates to the column vectors of A. Explain your answers in each case.arrow_forward
- Suppose column 1 + column 3 + column 5 = 0 in a 4 by 5 matrix with four pivots. Which column has no pivot? What is the special solution? Describe N(A).arrow_forwardFind the least squares regression line for the points. (0, 5), (3, 3), (5, 1), (7, -4), (9, -5) 1.) y =arrow_forwardFind the equation y = Bo + B₁x of the least-squares line that best fits the given data points. (3,6), (4,4), (6,2), (7,0) The line is y= O (Type integers or decimals.) + X.arrow_forward
arrow_back_ios
arrow_forward_ios
Recommended textbooks for you
- Trigonometry (MindTap Course List)TrigonometryISBN:9781305652224Author:Charles P. McKeague, Mark D. TurnerPublisher:Cengage LearningAlgebra & Trigonometry with Analytic GeometryAlgebraISBN:9781133382119Author:SwokowskiPublisher:Cengage

Trigonometry (MindTap Course List)
Trigonometry
ISBN:9781305652224
Author:Charles P. McKeague, Mark D. Turner
Publisher:Cengage Learning
Algebra & Trigonometry with Analytic Geometry
Algebra
ISBN:9781133382119
Author:Swokowski
Publisher:Cengage