HW3_ Working with Tables and Tidyverse

pdf

School

University of California, Berkeley *

*We aren’t endorsed by this school

Course

33B

Subject

Statistics

Date

Apr 3, 2024

Type

pdf

Pages

13

Uploaded by ProfProton22246

Report
qmd hw3- template.qmd hw3-first-last.qmd first last hw3- gaston-sanchez.qmd qmd embed-resources: true "dplyr" "ggplot2" diamonds "ggplot2" diamonds help(diamonds) tibble [53,940 × 10] (S3: tbl_df/tbl/data.frame) $ carat : num [1:53940] 0.23 0.21 ... $ cut : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 ... $ color : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 ... $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 ... diamonds # inspect structure str (diamonds, vec.len = 1 )
$ depth : num [1:53940] 61.5 59.8 ... $ table : num [1:53940] 55 61 ... $ price : int [1:53940] 326 326 ... $ x : num [1:53940] 3.95 3.89 ... $ y : num [1:53940] 3.98 3.84 ... $ z : num [1:53940] 2.43 2.31 ... dplyr # A tibble: 5 × 2 cut count <ord> <int> 1 Ideal 21551 2 Premium 13791 3 Very Good 12082 4 Good 4906 5 Fair 1610 dplyr premiumVS1 "premium" "VS1" carat color price # A tibble: 1,989 × 3 carat color price <dbl> <ord> <int> 1 0.24 I 355 "dplyr" # your code diamonds %>% group_by (cut) %>% summarise ( count = n ()) %>% arrange ( desc (count)) # your code premiumVS1 <- diamonds %>% filter (cut == "Premium" & clarity == "VS1" ) %>% select (carat, color, price) premiumVS1
2 0.71 F 2765 3 0.73 G 2770 4 0.7 E 2782 5 0.77 H 2789 6 0.71 F 2790 7 0.71 F 2790 8 0.7 F 2792 9 0.7 F 2792 10 0.7 E 2800 # 1,979 more rows dplyr color avg_carat avg_price # A tibble: 7 × 3 color avg_carat avg_price <ord> <dbl> <dbl> 1 D 0.658 3170. 2 E 0.658 3077. 3 F 0.737 3725. 4 G 0.771 3999. 5 H 0.912 4487. 6 I 1.03 5092. 7 J 1.16 5324. dplyr fairE "fair" "E" carat depth table price price # A tibble: 5 × 4 carat depth table price <dbl> <dbl> <dbl> <int> # your code diamonds %>% group_by (color) %>% summarise ( avg_carat = mean (carat), avg_price = mean (price)) # your code fairE <- diamonds %>% filter (cut == "Fair" & color == "E" ) %>% select (carat, depth, table, price) %>% arrange (price) head (fairE, 5 )
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
1 0.22 65.1 61 337 2 0.25 55.2 64 361 3 0.27 66.4 58 371 4 0.31 56.9 66 579 5 0.31 55.9 62 581 fairE fairE price_per_carat price carat price_per_carat price_per_carat # A tibble: 205 × 5 carat depth table price price_per_carat <dbl> <dbl> <dbl> <int> <dbl> 1 1.01 64.5 54 2036 2016. 2 0.4 64.7 58 813 2032. 3 0.3 64.5 49 630 2100 4 0.3 66.7 59 630 2100 5 0.35 66.2 61 738 2109. 6 0.35 66.2 58 738 2109. 7 0.45 65.8 58 951 2113. 8 0.3 56.3 62 640 2133. 9 0.45 66.9 60 980 2178. 10 0.5 66.5 58 1098 2196 # 195 more rows case_when() mutate() # your code fairE <- fairE %>% mutate ( price_per_carat = price / carat) %>% filter (price_per_carat >= 2000 ) %>% arrange (price_per_carat) fairE # your code diamonds_segmented <- diamonds %>% mutate ( price_category = case_when ( price <= quantile (price, 0.25 ) ~ 'Low' , price <= quantile (price, 0.75 ) ~ 'Medium' , TRUE ~ 'High'
# A tibble: 3 × 2 price_category avg_carat <chr> <dbl> 1 High 1.43 2 Low 0.331 3 Medium 0.718 diamonds price )) %>% group_by (price_category) %>% summarise ( avg_carat = mean (carat)) diamonds_segmented "ggplot2" # your code fair_diamonds <- diamonds %>% filter (cut == "Fair" ) ggplot (fair_diamonds, aes ( x = price)) + geom_histogram ( binwidth = 500 , fill = "skyblue" , color = "black" ) + labs ( title = "Meaningful! Price vs Count" , x = "Price" , y = "Count" )
diamonds carat cut factor(cut) color factor(cut) fill alpha = 0.5 # your code ggplot (diamonds, aes ( x = carat, color = factor (cut), fill = factor (cut))) + geom_density ( alpha = 0.5 ) + labs ( title = "Carat by Cut, Density!" , x = "Carat" , y = "Density" , fill = "Cut" , color = "Cut" )
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
diamonds clarity reorder() # your code clarity_cnts <- diamonds %>% group_by (clarity) %>% summarise ( count = n ()) %>% mutate ( clarity = reorder (clarity, - count)) ggplot (clarity_cnts, aes ( x = clarity, y = count, fill = clarity)) + geom_bar ( stat = "identity" ) + labs ( title = "Clarity Frequencies of Diamonds" , x = "Clarity" , y = "Frequency" ) + theme ( axis.text.x = element_text ( angle = 45 , hjust = 1 ))
diamonds carat price color alpha alpha = 0.5 "ggplot2" # your code ggplot (diamonds, aes ( x = carat, y = price)) + geom_point ( alpha = 0.5 ) + facet_wrap ( ~ color, scales = "free" ) + labs ( title = "Carat vs Price, by Color" , x = "Carat" , y = "Price" )
diamonds depth table cut color theme_bw() theme_minimal() theme_classic() # your code ggplot (diamonds, aes ( x = depth, y = table, color = color)) + geom_point () + facet_wrap ( ~ cut, scales = "free" ) + labs ( title = "Scatterplot of Depth vs Table Faceted by Cut" , x = "Depth" , y = "Table" ) + theme_minimal ()
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
df product quantity price filter(select(arrange(df, price), product, price, quantity), price > 100) arrange(select(filter(df, price > 100), product, price, quantity), desc(price)) arrange(df, desc(price)) |> filter(select(product, price, quantity), price > 100) arrange(df, desc(price)) |> select(product, price, quantity) |> filter(price > 100) df |> filter (price > 100 ) |> select (product, price, quantity) |> arrange ( desc (price))
select(df, product, price, quantity) |> filter(arrange(desc(price)), price > 100) df price quantity mutate(df, sales = price * quantity) |> filter(price > 100) |> slice(1:100) |> select(product, price, quantity, sales) |> arrange(desc(sales)) arrange( mutate( filter( slice(df, 1:100), price > 100), sales = price * quantity), desc(sales)) |> select(product, price, quantity, sales) slice( mutate( df |> filter (price > 100 ) |> mutate ( sales = price * quantity) |> slice ( 1 : 100 ) |> select (product, price, quantity, sales) |> arrange ( desc (sales))
filter( select(df, product, price, quantity), price > 100), sales = price * quantity), 1:100) |> arrange(desc(sales)) filter(price > 100) |> mutate(sales = price * quantity) |> arrange(desc(sales)) |> select(product, price, quantity, sales) |> slice(1:100) slice( select( mutate(df, sales = price * quantity), product, price, quantity, sales), 1:100) |> filter(price > 100) |> arrange(desc(sales))
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help