worksheet_regression1

pdf

School

University of the Fraser Valley *

*We aren’t endorsed by this school

Course

176

Subject

Statistics

Date

Feb 20, 2024

Type

pdf

Pages

3

Uploaded by PrivateIron6895

Report
15/06/2023, 11:36 worksheet_regression1 file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html 1/11 tidymodels answer0.0 "F" In [ ]: ### Run this cell before continuing. library ( tidyverse ) library ( repr ) library ( tidymodels ) options ( repr.matrix.max.rows = 6 ) source ( "tests.R" ) source ( 'cleanup.R' ) In [ ]: ### BEGIN SOLUTION answer0.0 <- "B" 15/06/2023, 11:36 worksheet_regression1 file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html 2/11 answer0.1 answer0.2 ### END SOLUTION answer0.0 In [ ]: test_0.0 () In [ ]: ### BEGIN SOLUTION answer0.1 <- "C" ### END SOLUTION answer0.1 In [ ]: test_0.1 () In [ ]: ### BEGIN SOLUTION answer0.2 <- sqrt ((( 1 - 1.3 ) ^ 2 + ( 1 - 1.5 ) ^ 2 + ( 2 - 1.5 ) ^ 2 + ( 3 - 2 ) ^ 2 + ( 5 ### END SOLUTION answer0.2 15/06/2023, 11:36 worksheet_regression1 file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html 3/11 answer0.3 In [ ]: test_0.2 () In [ ]: ### BEGIN SOLUTION answer0.3 <- "A" ### END SOLUTION answer0.3 In [ ]: test_0.3 () 15/06/2023, 11:36 worksheet_regression1 file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html 4/11 marathon.csv data/ marathon time_hrs max marathon_50 time_hrs max answer2 sample_n() In [ ]: ### BEGIN SOLUTION marathon <- read_csv ( 'data/marathon.csv' ) ### END SOLUTION marathon In [ ]: test_1.0 () In [ ]: options ( repr.plot.width = 8 , repr.plot.height = 7 ) set.seed ( 2000 ) ### DO NOT CHANGE #... <- ... |> # sample_n(...) ### BEGIN SOLUTION marathon_50 <- marathon |> sample_n ( 50 ) answer2 <- marathon_50 |> ggplot ( aes ( x = max , y = time_hrs )) + geom_point ( alpha = 0.5 ) + xlab ( "Maximum Distance Ran per\nWeek During Training (miles)" ) + ylab ( "Race Time (hours)" ) + theme ( text = element_text ( size = 20 )) ### END SOLUTION answer2 In [ ]: test_2.0 ()
15/06/2023, 11:36 worksheet_regression1 file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html 5/11 answer3 answer4 In [ ]: # run this cell to see a visualization of the 4 nearest neighbours options ( repr.plot.height = 6 , repr.plot.width = 7 ) marathon_50 |> ggplot ( aes ( x = max , y = time_hrs )) + geom_point ( color = 'dodgerblue' , alpha = 0.4 ) + geom_vline ( xintercept = 100 , linetype = "dotted" ) + xlab ( "Maximum Distance Ran per \n Week During Training (mi)" ) + ylab ( "Race Time (hours)" ) + geom_segment ( aes ( x = 100 , y = 2.56 , xend = 107 , yend = 2.56 ), col = geom_segment ( aes ( x = 100 , y = 2.65 , xend = 90 , yend = 2.65 ), col = " geom_segment ( aes ( x = 100 , y = 2.99 , xend = 86 , yend = 2.99 ), col = " geom_segment ( aes ( x = 100 , y = 3.05 , xend = 82 , yend = 3.05 ), col = " theme ( text = element_text ( size = 20 )) In [ ]: #... <- ... |> # mutate(diff = abs(100 - ...)) |> # ...(diff) |> # slice(...) |> # summarise(predicted = ...(...)) |> # pull() ### BEGIN SOLUTION answer3 <- marathon_50 |> mutate ( diff = abs ( 100 - max )) |> arrange ( diff ) |> slice ( 1 : 4 ) |> summarise ( predicted = mean ( time_hrs )) |> pull () ### END SOLUTION answer3 In [ ]: test_3.0 () In [ ]: ### BEGIN SOLUTION answer4 <- marathon_50 |> mutate ( diff = abs ( 100 - max )) |> arrange ( diff ) |> slice ( 1 : 2 ) |> 15/06/2023, 11:36 worksheet_regression1 file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html 6/11 answer5 "F" tidymodels marathon time_hrs strata marathon_split training testing marathon_training marathon_testing summarise ( predicted = mean ( time_hrs )) |> pull () ### END SOLUTION answer4 In [ ]: test_4.0 () In [ ]: ### BEGIN SOLUTION answer5 <- "C" ### END SOLUTION answer5 In [ ]: test_5.0 () In [ ]: set.seed ( 2000 ) ### DO NOT CHANGE #... <- initial_split(..., prop = ..., strata = ...) #... <- training(...) #... <- testing(...) 15/06/2023, 11:36 worksheet_regression1 file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html 7/11 "kknn" set_mode("regression") marathon_spec marathon_recipe ### BEGIN SOLUTION marathon_split <- initial_split ( marathon , prop = 0.75 , strata = time_hrs ) marathon_training <- training ( marathon_split ) marathon_testing <- testing ( marathon_split ) ### END SOLUTION In [ ]: test_6.0 () In [ ]: set.seed ( 1234 ) #DO NOT REMOVE #... <- nearest_neighbor(weight_func = ..., neighbors = ...) |> # set_engine(...) |> # set_mode(...) #... <- recipe(... ~ ..., data = ...) |> # step_scale(...) |> # step_center(...) # ### BEGIN SOLUTION marathon_spec <- nearest_neighbor ( weight_func = "rectangular" , neighbors = t set_engine ( "kknn" ) |> set_mode ( "regression" ) marathon_recipe <- recipe ( time_hrs ~ max , data = marathon_training ) |> step_scale ( all_predictors ()) |> step_center ( all_predictors ()) ### END SOLUTION marathon_recipe In [ ]: test_7.0 () 15/06/2023, 11:36 worksheet_regression1 file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html 8/11 vfold_cv marathon_vfold strata workflow marathon_workflow set_mode tidymodels neighbors seq gridvals gridvals marathon_results In [ ]: set.seed ( 1234 ) # DO NOT REMOVE ### BEGIN SOLUTION marathon_vfold <- vfold_cv ( marathon_training , v = 5 , strata = time_hrs ) marathon_workflow <- workflow () |> add_recipe ( marathon_recipe ) |> add_model ( marathon_spec ) ### END SOLUTION marathon_workflow In [ ]: test_7.1 () In [ ]: set.seed ( 2019 ) # DO NOT CHANGE ### BEGIN SOLUTION gridvals <- tibble ( neighbors = seq ( from = 1 , to = 81 , by = 10 )) marathon_results <- marathon_workflow |> tune_grid ( resamples = marathon_vfold , grid = gridval collect_metrics () ### END SOLUTION marathon_results In [ ]: test_8.0 ()
15/06/2023, 11:36 worksheet_regression1 file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html 9/11 marathon_min neighbors marathon_min k_min k_min marathon_best_spec marathon_best_fit marathon_recipe predict metrics marathon_summary In [ ]: set.seed ( 2020 ) # DO NOT REMOVE #... <- marathon_results |> # filter(.metric == ...) |> # arrange(...) |> # ... ### BEGIN SOLUTION marathon_min <- marathon_results |> filter ( .metric == "rmse" ) |> arrange ( mean ) |> slice ( 1 ) ### END SOLUTION marathon_min In [ ]: test_8.1 () In [ ]: set.seed ( 1234 ) # DO NOT REMOVE #... <- marathon_min |> # pull(...) #... <- nearest_neighbor(weight_func = ..., neighbors = ...) |> # set_engine(...) |> # set_mode(...) #... <- workflow() |> # add_recipe(...) |> # add_model(...) |> 15/06/2023, 11:36 worksheet_regression1 file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html 10/11 answer8.3 "true" "false" # fit(data = ...) #... <- marathon_best_fit |> # predict(...) |> # bind_cols(...) |> # metrics(truth = ..., estimate = ...) ### BEGIN SOLUTION k_min <- marathon_min |> pull ( neighbors ) marathon_best_spec <- nearest_neighbor ( weight_func = "rectangular" , neighbor set_engine ( "kknn" ) |> set_mode ( "regression" ) marathon_best_fit <- workflow () |> add_recipe ( marathon_recipe ) |> add_model ( marathon_best_spec ) |> fit ( data = marathon_training ) marathon_summary <- marathon_best_fit |> predict ( marathon_testing ) |> bind_cols ( marathon_testing ) |> metrics ( truth = time_hrs , estimate = .pred ) ### END SOLUTION marathon_summary In [ ]: test_8.2 () In [ ]: ### BEGIN SOLUTION answer8.3 <- "false" ### END SOLUTION 15/06/2023, 11:36 worksheet_regression1 file:///Users/bhavishkhosla/Downloads/worksheet_regression1_solution.html 11/11 max time_hrs predict marathon_best_fit marathon_training marathon_training bind_cols marathon_preds marathon_preds marathon_plot geom_point geom_line In [ ]: test_8.3 () In [ ]: set.seed ( 2019 ) # DO NOT CHANGE options ( repr.plot.width = 7 , repr.plot.height = 7 ) ### BEGIN SOLUTION marathon_preds <- marathon_best_fit |> predict ( marathon_training ) |> bind_cols ( marathon_training ) marathon_plot <- ggplot ( marathon_preds , aes ( x = max , y = time_hrs )) + geom_point ( alpha = 0.4 ) + xlab ( "Maximum Distance Ran per \n Week During Training (mi)" ) + ylab ( "Race Time (hours)" ) + geom_line ( data = marathon_preds , aes ( x = max , y = .pred ), color ggtitle ( paste0 ( "K = " , k_min )) + theme ( text = element_text ( size = 20 )) ### END SOLUTION marathon_plot In [ ]: test_9.0 () In [ ]: source ( 'cleanup.R' )
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help