Assignment_1_Rishabh_arora

docx

School

Conestoga College *

*We aren’t endorsed by this school

Course

8010

Subject

Anthropology

Date

Feb 20, 2024

Type

docx

Pages

8

Uploaded by UltraWater15179

Report
Assignment_1_Rishabh_arora.R Rishabh Arora 2024-01-30 library (ggplot2) library (dplyr) ## ## Attaching package: 'dplyr' ## The following objects are masked from 'package:stats': ## ## filter, lag ## The following objects are masked from 'package:base': ## ## intersect, setdiff, setequal, union file_path <- "C:/Users/Rishabh Arora/Downloads/africanelephants.xlsx" africanelephants <- readxl :: read_excel (file_path) #Q1.1 geometric mean for annual change from 1979 to 1989 africanelephants $ change_79to89 <- ((africanelephants $ ` 1989 Elephant Population ` / africanelephants $ ` 1979 Elephant Population ` ) ^ ( 1 / 10 ) - 1 ) * 100 print (africanelephants) ## # A tibble: 14 × 6 ## Country 1979 Elephant Popula…¹ 1989 Elephant Popula…² 2007 Elephant Popula…³ ## <chr> <dbl> <dbl> <dbl> ## 1 Angola 12400 12400 2530 ## 2 Botswana 20000 51000 175487 ## 3 Cameroon 16200 21200 15387 ## 4 Cen Afr… 63000 19000 3334 ## 5 Chad 15000 3100 6435 ## 6 Congo 10800 70000 22102 ## 7 Dem Rep… 377700 85000 23714
## 8 Gabon 13400 76000 70637 ## 9 Kenya 65000 19000 31636 ## 10 Mozambi… 54800 18600 26088 ## 11 Somalia 24300 6000 70 ## 12 Tanzania 316300 80000 167003 ## 13 Zambia 150000 41000 29231 ## 14 Zimbabwe 30000 43000 99107 ## # abbreviated names: ¹ `1979 Elephant Population`, ## # ²`1989 Elephant Population`, ³`2007 Elephant Population` ## # 2 more variables: `2012 Elephant Population` <dbl>, change_79to89 <dbl> ## based on geometric mean calculated Congo and Gabon saw the significant increase in elephant population from 1979 to 1989 #Q1.2 geometric mean for annual change from 1989 to 2007 over 18 years africanelephants $ change_89to07 <- ((africanelephants $ ` 2007 Elephant Population ` / africanelephants $ ` 1989 Elephant Population ` ) ^ ( 1 / 18 ) - 1 ) * 100 print (africanelephants) ## # A tibble: 14 × 7 ## Country 1979 Elephant Popula…¹ 1989 Elephant Popula…² 2007 Elephant Popula…³ ## <chr> <dbl> <dbl> <dbl> ## 1 Angola 12400 12400 2530 ## 2 Botswana 20000 51000 175487 ## 3 Cameroon 16200 21200 15387 ## 4 Cen Afr… 63000 19000 3334 ## 5 Chad 15000 3100 6435 ## 6 Congo 10800 70000 22102 ## 7 Dem Rep… 377700 85000 23714 ## 8 Gabon 13400 76000 70637 ## 9 Kenya 65000 19000
31636 ## 10 Mozambi… 54800 18600 26088 ## 11 Somalia 24300 6000 70 ## 12 Tanzania 316300 80000 167003 ## 13 Zambia 150000 41000 29231 ## 14 Zimbabwe 30000 43000 99107 ## # abbreviated names: ¹ `1979 Elephant Population`, ## # ²`1989 Elephant Population`, ³`2007 Elephant Population` ## # 3 more variables: `2012 Elephant Population` <dbl>, change_79to89 <dbl>, ## # change_89to07 <dbl> ##from the year 1989 to 2007 Somalia saw the biggest decrease in elephant population #Q1.3 geometric mean for annual change from 2007 to 2012 over 5 years africanelephants $ change_07to12 <- ((africanelephants $ ` 2012 Elephant Population ` / africanelephants $ ` 2007 Elephant Population ` ) ^ ( 1 / 5 ) - 1 ) * 100 print (africanelephants) ## # A tibble: 14 × 8 ## Country 1979 Elephant Popula…¹ 1989 Elephant Popula…² 2007 Elephant Popula…³ ## <chr> <dbl> <dbl> <dbl> ## 1 Angola 12400 12400 2530 ## 2 Botswana 20000 51000 175487 ## 3 Cameroon 16200 21200 15387 ## 4 Cen Afr… 63000 19000 3334 ## 5 Chad 15000 3100 6435 ## 6 Congo 10800 70000 22102 ## 7 Dem Rep… 377700 85000 23714 ## 8 Gabon 13400 76000 70637 ## 9 Kenya 65000 19000 31636 ## 10 Mozambi… 54800 18600
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
26088 ## 11 Somalia 24300 6000 70 ## 12 Tanzania 316300 80000 167003 ## 13 Zambia 150000 41000 29231 ## 14 Zimbabwe 30000 43000 99107 ## # abbreviated names: ¹ `1979 Elephant Population`, ## # ²`1989 Elephant Population`, ³`2007 Elephant Population` ## # 4 more variables: `2012 Elephant Population` <dbl>, change_79to89 <dbl>, ## # change_89to07 <dbl>, change_07to12 <dbl> ##from the year 2007 to 2012 Chad and Dem Rep of Congo saw the biggest decrease in elephant population #Q1.4 multiple boxplot graph that includes boxplots of the elephant population observations in each year 1979, 1989, 2007, 2012 elephant_data <- tidyr :: gather (africanelephants, key = "Year" , value = "Population" , - Country) elephant_data_Year <- factor (elephant_data $ Year, levels = c ( "1979 Elephant Population" , "1989 Elephant Population" , "2007 Elephant Population" , "2012 Elephant Population" )) ggplot ( data = elephant_data, aes ( x = elephant_data_Year, y = Population , fill = Country)) + geom_boxplot () + labs ( title = "Elephant Population in Each Year" , x = "Year" , y = "Elephant Population" ) + theme_minimal ()
## 1989 box heights are lower than 1979 that shows elephant population decreased in many countries. ##From 1989 to 2007 elephant population further decreased in many countries, population in Somalia decreased the most. ##Population in Congo increased from 2007 to 2015 while Chad and Dem Rep of Congo swa the decrease and other countries had little or no effect #Q2.1 probability that the web site has fewer than 5 million visitors in a single day mean <- 4500000 std_dev <- 820000 target_visitors <- 5000000 # Calculating the z-score z_score <- (target_visitors - mean) / std_dev # Calculating the probability probab <- pnorm (z_score) cat ( "Probability of fewer than 5 million visitors:" , probab, " \n " ) ## Probability of fewer than 5 million visitors: 0.7289883 #Q2.2 probability that the web site has 3 million or more visitors in a single day target2_visitors <- 3000000
z_score <- (target2_visitors - mean) / std_dev probab2 <- 1 - pnorm (z_score) cat ( "Probability of 3 million or more visitors:" , probab2, " \n " ) ## Probability of 3 million or more visitors: 0.9663203 #Q2.3 probability that the web site has between 3 million and 4 million visitors in a single day lb <- 3000000 ub <- 4000000 z_lb <- (lb - mean) / std_dev z_ub <- (ub - mean) / std_dev probab_lb <- pnorm (z_lb) probab_ub <- pnorm (z_ub) # Calculating the probability between 3 million and 4 million probab3 <- probab_ub - probab_lb cat ( "Probability of between 3 million and 4 million visitors:" , probab3, " \n " ) ## Probability of between 3 million and 4 million visitors: 0.237332 #Q2.4 Amount of web traffic requiring additional server capacity threshold <- 0.15 # Calculating the z-score z_threshold <- qnorm ( 1 - threshold) traffic_threshold <- mean + z_threshold * std_dev cat ( "Amount of web traffic requiring additional server capacity:" , traffic_threshold, "million visitors \n " ) ## Amount of web traffic requiring additional server capacity: 5349875 million visitors file_path <- "C:/Users/Rishabh Arora/Downloads/BigBlue.xlsx" data <- readxl :: read_excel (file_path) #Q3.1 Selecting columns to normalize clm_norm <- c ( "UsageRate" , "Recognition" , "Leader" ) # Normalizing the selected columns normalize_columns <- as.data.frame ( scale (data[clm_norm])) # Combining the normalized columns with the dataset bigblue <- cbind (data[, - which ( names (data) %in% clm_norm)], normalize_columns) head (bigblue)
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
## EmployeeID UsageRate Recognition Leader ## 1 1 -0.9831708 -0.4328996 -0.1857172 ## 2 2 1.0156009 -0.4328996 -0.1857172 ## 3 3 -0.9721172 -0.4328996 -0.1857172 ## 4 4 -0.9824326 -0.4328996 -0.1857172 ## 5 5 -0.9817885 -0.4328996 -0.1857172 ## 6 6 -0.8906684 -0.4328996 -0.1857172 #Q3.2 How many clusters do you recommend to categorize the employees? Why? css <- numeric ( length = 10 ) for (i in 1 : 10 ) { kmeans_model <- kmeans (bigblue, centers = i, nstart = 10 ) css[i] <- kmeans_model $ tot.withinss } #elbow plot plot ( 1 : 10 , css, type = "b" , main = "Elbow Method" , xlab = "Number of Clusters (k)" , ylab = "CSS" ) ##based on elbow plot recommended number of clusters are 3 rec_cluster <- 3 #Q3.3 performing k-means clustering with 3 clusters kmeans_model <- kmeans (bigblue, centers = rec_cluster, nstart = 10 )
# Assign cluster labels to the original data bigblue_clusters <- cbind (bigblue, Cluster = kmeans_model $ cluster) # Print cluster summaries cluster_summaries <- aggregate (bigblue, by = list ( Cluster = kmeans_model $ cluster), FUN = function (x) c ( Mean = mean (x), Min = min (x), Max = max (x))) print (cluster_summaries) ## Cluster EmployeeID.Mean EmployeeID.Min EmployeeID.Max UsageRate.Mean ## 1 1 90.0 73.0 107.0 0.4617810 ## 2 2 54.5 37.0 72.0 - 0.2100311 ## 3 3 18.5 1.0 36.0 - 0.2389226 ## UsageRate.Min UsageRate.Max Recognition.Mean Recognition.Min Recognition.Max ## 1 -0.9642919 2.1550943 0.3942479 -0.4328996 4.1991262 ## 2 -0.9822525 1.9362465 -0.1755648 -0.4328996 3.0411198 ## 3 -0.9831708 1.9846656 -0.2077317 -0.4328996 1.8831133 ## Leader.Mean Leader.Min Leader.Max ## 1 0.3820467 -0.1857172 6.4381947 ## 2 -0.1857172 -0.1857172 -0.1857172 ## 3 -0.1857172 -0.1857172 -0.1857172 #Q3.4 Interpretation ##Cluster 1:Mean usage rate and recognition rate is negative which means employees were not involved in high priority projects and haven't served as leader on projects as compared to other employees. ##Cluster 2:Mean usage rate and recognition are positive that indicate employees in this cluster were involved in high priority projects the most and also served as leaders as mean.leader is also positive. ##Cluster 3:Mean usage rate, recognition and mean leader is negative but slightly better than cluster 1 employees #we can say that employees in Cluster 2 performed the best,they were involved in high priority projects , served as leaders and also received the recognition for their performance.