4.3

docx

School

Indiana Wesleyan University, Marion *

*We aren’t endorsed by this school

Course

BADM-707-0

Subject

Statistics

Date

Apr 3, 2024

Type

docx

Pages

15

Uploaded by ConstableLapwingPerson2642

Report
15.1 code: install.packages('tidyverse') install.packages('gcookbook') library(ggplot2) library(dplyr) library(gcookbook) # Two starting vectors g <- c("A", "B", "C") x <- 1:3 dat <- data.frame(g, x) dat #> g x #> 1 A 1 #> 2 B 2 #> 3 C 3 lst <- list(group = g, value = x) # A list of vectors dat <- as.data.frame(lst) data_frame(g, x) #> Warning: `data_frame()` was deprecated in tibble 1.1.0. #> Please use `tibble()` instead. #> This warning is displayed once every 8 hours. #> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was #> generated. #> # A tibble: 3 × 2 #> g x #> <chr> <int>
#> 1 A 1 #> 2 B 2 #> 3 C 3 #> # Convert the list of vectors to a tibble as_data_frame(lst) as_tibble(dat) #> # A tibble: 3 × 2 #> group value #> <chr> <int> #> 1 A 1 #> 2 B 2 #> 3 C 3 15.1 output:
15.2 code: str(ToothGrowth) #> 'data.frame': 60 obs. of 3 variables: #> $ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ... #> $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ... #> $ dose: num 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ... summary(ToothGrowth) #> len supp dose #> Min. : 4.20 OJ:30 Min. :0.500 #> 1st Qu.:13.07 VC:30 1st Qu.:0.500 #> Median :19.25 Median :1.000 #> Mean :18.81 Mean :1.167 #> 3rd Qu.:25.27 3rd Qu.:2.000 #> Max. :33.90 Max. :2.000 tg <- ToothGrowth tg$supp <- as.character(tg$supp) str(tg) #> 'data.frame': 60 obs. of 3 variables: #> $ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ... #> $ supp: chr "VC" "VC" "VC" "VC" ... #> $ dose: num 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ... # Print out the columns by themselves # From old data frame (factor) ToothGrowth$supp #> [1] VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC #> [25] VC VC VC VC VC VC OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ #> [49] OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
#> Levels: OJ VC # From new data frame (character) tg$supp #> [1] "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" #> [15] "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" #> [29] "VC" "VC" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" #> [43] "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" #> [57] "OJ" "OJ" "OJ" "OJ" 15.2 output: 15.3 code: library(dplyr) ToothGrowth %>% mutate(newcol = NA) #> len supp dose newcol
#> 1 4.2 VC 0.5 NA #> 2 11.5 VC 0.5 NA #> ...<56 more rows>... #> 59 29.4 OJ 2.0 NA #> 60 23.0 OJ 2.0 NA # Since ToothGrowth has 60 rows, we must create a new vector that has 60 rows vec <- rep(c(1, 2), 30) ToothGrowth %>% mutate(newcol = vec) #> len supp dose newcol #> 1 4.2 VC 0.5 1 #> 2 11.5 VC 0.5 2 #> ...<56 more rows>... #> 59 29.4 OJ 2.0 1 #> 60 23.0 OJ 2.0 2 # Make a copy of ToothGrowth for this example ToothGrowth2 <- ToothGrowth # Assign NA's for the whole column ToothGrowth2$newcol <- NA # Assign 1 and 2, automatically repeating to fill ToothGrowth2$newcol <- c(1, 2)
15.3 output: 15.7 code: library(gcookbook) # Load gcookbook for the climate data set climate #> Source Year Anomaly1y Anomaly5y Anomaly10y Unc10y #> 1 Berkeley 1800 NA NA -0.435 0.505 #> 2 Berkeley 1801 NA NA -0.453 0.493 #> ...<495 more rows>... #> 498 CRUTEM3 2010 0.8023 NA NA NA #> 499 CRUTEM3 2011 0.6193 NA NA NA climate[climate$Source == "Berkeley" & climate$Year >= 1900 & climate$Year <= 2000, c("Year", "Anomaly10y")] #> Year Anomaly10y #> 101 1900 -0.171 #> 102 1901 -0.162 #> ...<97 more rows>...
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
#> 200 1999 0.734 #> 201 2000 0.748 15.7 Output: 15.15 code: library(gcookbook) # Load gcookbook for the heightweight data set heightweight #> sex ageYear ageMonth heightIn weightLb #> 1 f 11.92 143 56.3 85.0 #> 2 f 12.92 155 62.3 105.0 #> ...<232 more rows>... #> 236 m 13.92 167 62.0 107.5
#> 237 m 12.58 151 59.3 87.0 library(dplyr) heightweight %>% mutate(heightCm = heightIn * 2.54) #> sex ageYear ageMonth heightIn weightLb heightCm #> 1 f 11.92 143 56.3 85.0 143.002 #> 2 f 12.92 155 62.3 105.0 158.242 #> ...<232 more rows>... #> 236 m 13.92 167 62.0 107.5 157.480 #> 237 m 12.58 151 59.3 87.0 150.622 heightweight %>% mutate( heightCm = heightIn * 2.54, weightKg = weightLb / 2.204 ) #> sex ageYear ageMonth heightIn weightLb heightCm weightKg #> 1 f 11.92 143 56.3 85.0 143.002 38.56624 #> 2 f 12.92 155 62.3 105.0 158.242 47.64065 #> ...<232 more rows>... #> 236 m 13.92 167 62.0 107.5 157.480 48.77495 #> 237 m 12.58 151 59.3 87.0 150.622 39.47368 heightweight mutate(bmi = weightLb / (heightCm / 100)^2) heightweight %>% mutate( heightCm = heightIn * 2.54,
weightKg = weightLb / 2.204, bmi = weightKg / (heightCm / 100)^2 ) #> sex ageYear ageMonth heightIn weightLb heightCm weightKg bmi #> 1 f 11.92 143 56.3 85.0 143.002 38.56624 18.85919 #> 2 f 12.92 155 62.3 105.0 158.242 47.64065 19.02542 #> ...<232 more rows>... #> 236 m 13.92 167 62.0 107.5 157.480 48.77495 19.66736 #> 237 m 12.58 151 59.3 87.0 150.622 39.47368 17.39926 #> heightweight$heightCm <- heightweight$heightIn * 2.54 15.15 output: 15.17 code: library(MASS) # Load MASS for the cabbages data set library(dplyr) cabbages %>% group_by(Cult, Date) %>% summarise( Weight = mean(HeadWt), VitC = mean(VitC) ) #> `summarise()` has grouped output by 'Cult'. You can override using the #> `.groups` argument. #> # A tibble: 6 × 4
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
#> # Groups: Cult [2] #> Cult Date Weight VitC #> <fct> <fct> <dbl> <dbl> #> 1 c39 d16 3.18 50.3 #> 2 c39 d20 2.8 49.4 #> 3 c39 d21 2.74 54.8 #> 4 c52 d16 2.26 62.5 #> 5 c52 d20 3.11 58.9 #> 6 c52 d21 1.47 71.8 #> cabbages #> Cult Date HeadWt VitC #> 1 c39 d16 2.5 51 #> 2 c39 d16 2.2 55 #> ...<56 more rows>... #> 59 c52 d21 1.5 66 #> 60 c52 d21 1.6 72 library(dplyr) summarise(cabbages, Weight = mean(HeadWt)) #> Weight #> 1 2.593333 #> tmp <- group_by(cabbages, Cult) summarise(tmp, Weight = mean(HeadWt)) #> # A tibble: 2 × 2 #> Cult Weight #> <fct> <dbl> #> 1 c39 2.91 #> 2 c52 2.28
#> group_by(cabbages, Cult) # The pipe operator moves `cabbages` to the first argument position of group_by() cabbages %>% group_by(Cult) summarise(group_by(cabbages, Cult), Weight = mean(HeadWt)) cabbages %>% group_by(Cult) %>% summarise(Weight = mean(HeadWt)) cabbages %>% group_by(Cult, Date) %>% summarise( Weight = mean(HeadWt), Vitc = mean(VitC) ) #> `summarise()` has grouped output by 'Cult'. You can override using the #> `.groups` argument. #> # A tibble: 6 × 4 #> # Groups: Cult [2] #> Cult Date Weight Vitc #> <fct> <fct> <dbl> <dbl> #> 1 c39 d16 3.18 50.3 #> 2 c39 d20 2.8 49.4 #> 3 c39 d21 2.74 54.8 #> 4 c52 d16 2.26 62.5 #> 5 c52 d20 3.11 58.9 #> 6 c52 d21 1.47 71.8 #>
#> cabbages %>% group_by(Cult, Date) %>% summarise( Weight = mean(HeadWt), sd = sd(HeadWt), n = n() ) #> `summarise()` has grouped output by 'Cult'. You can override using the #> `.groups` argument. #> # A tibble: 6 × 5 #> # Groups: Cult [2] #> Cult Date Weight sd n #> <fct> <fct> <dbl> <dbl> <int> #> 1 c39 d16 3.18 0.957 10 #> 2 c39 d20 2.8 0.279 10 #> 3 c39 d21 2.74 0.983 10 #> 4 c52 d16 2.26 0.445 10 #> 5 c52 d20 3.11 0.791 10 #> 6 c52 d21 1.47 0.211 10 c1 <- cabbages # Make a copy c1$HeadWt[c(1, 20, 45)] <- NA # Set some values to NA c1 %>% group_by(Cult) %>% summarise( Weight = mean(HeadWt), sd = sd(HeadWt), n = n()
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
) #> # A tibble: 2 × 4 #> Cult Weight sd n #> <fct> <dbl> <dbl> <int> #> 1 c39 NA NA 30 #> 2 c52 NA NA 30 c1 %>% group_by(Cult) %>% summarise( Weight = mean(HeadWt, na.rm = TRUE), sd = sd(HeadWt, na.rm = TRUE), n = n() ) #> # A tibble: 2 × 4 #> Cult Weight sd n #> <fct> <dbl> <dbl> <int> #> 1 c39 2.9 0.822 30 #> 2 c52 2.23 0.828 30 # Copy cabbages and remove all rows with both c52 and d21 c2 <- filter(cabbages, !( Cult == "c52" & Date == "d21" )) c2a <- c2 %>% group_by(Cult, Date) %>% summarise(Weight = mean(HeadWt)) ggplot(c2a, aes(x = Date, fill = Cult, y = Weight)) + geom_col(position = "dodge") library(tidyr)
c2b <- c2a %>% ungroup() %>% complete(Cult, Date) ggplot(c2b, aes(x = Date, fill = Cult, y = Weight)) + geom_col(position = "dodge") # Copy cabbages and remove all rows with both c52 and d21 c2 <- filter(cabbages, !( Cult == "c52" & Date == "d21" )) c2a <- c2 %>% group_by(Cult, Date) %>% summarise(Weight = mean(HeadWt)) #> `summarise()` has grouped output by 'Cult'. You can override using the #> `.groups` argument. ggplot(c2a, aes(x = Date, fill = Cult, y = Weight)) + geom_col(position = "dodge") library(tidyr) c2b <- c2a %>% ungroup() %>% complete(Cult, Date) ggplot(c2b, aes(x = Date, fill = Cult, y = Weight)) + geom_col(position = "dodge") 15.17 output:
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help