Assignment_1_Rishabh_arora
docx
keyboard_arrow_up
School
Conestoga College *
*We aren’t endorsed by this school
Course
8010
Subject
Anthropology
Date
Feb 20, 2024
Type
docx
Pages
8
Uploaded by UltraWater15179
Assignment_1_Rishabh_arora.R
Rishabh Arora
2024-01-30
library
(ggplot2)
library
(dplyr)
## ## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## ## filter, lag
## The following objects are masked from 'package:base':
## ## intersect, setdiff, setequal, union
file_path <-
"C:/Users/Rishabh Arora/Downloads/africanelephants.xlsx"
africanelephants <-
readxl
::
read_excel
(file_path)
#Q1.1 geometric mean for annual change from 1979 to 1989
africanelephants
$
change_79to89 <-
((africanelephants
$
`
1989 Elephant Population
`
/
africanelephants
$
`
1979 Elephant Population
`
)
^
(
1
/
10
) -
1
)
*
100
print
(africanelephants)
## # A tibble: 14 × 6
## Country 1979 Elephant Popula…¹ 1989 Elephant Popula…² 2007 Elephant Popula…³
## <chr> <dbl> <dbl> <dbl>
## 1 Angola 12400 12400 2530
## 2 Botswana 20000 51000 175487
## 3 Cameroon 16200 21200 15387
## 4 Cen Afr… 63000 19000 3334
## 5 Chad 15000 3100 6435
## 6 Congo 10800 70000 22102
## 7 Dem Rep… 377700 85000 23714
## 8 Gabon 13400 76000 70637
## 9 Kenya 65000 19000 31636
## 10 Mozambi… 54800 18600 26088
## 11 Somalia 24300 6000 70
## 12 Tanzania 316300 80000 167003
## 13 Zambia 150000 41000 29231
## 14 Zimbabwe 30000 43000 99107
## # abbreviated names: ¹
ℹ
`1979 Elephant Population`,
## # ²`1989 Elephant Population`, ³`2007 Elephant Population`
## # 2 more variables: `2012 Elephant Population` <dbl>, ℹ
change_79to89 <dbl>
## based on geometric mean calculated Congo and Gabon saw the significant increase in elephant population from 1979 to 1989 #Q1.2 geometric mean for annual change from 1989 to 2007 over 18 years
africanelephants
$
change_89to07 <-
((africanelephants
$
`
2007 Elephant Population
`
/
africanelephants
$
`
1989 Elephant Population
`
)
^
(
1
/
18
) -
1
)
*
100
print
(africanelephants)
## # A tibble: 14 × 7
## Country 1979 Elephant Popula…¹ 1989 Elephant Popula…² 2007 Elephant Popula…³
## <chr> <dbl> <dbl> <dbl>
## 1 Angola 12400 12400 2530
## 2 Botswana 20000 51000 175487
## 3 Cameroon 16200 21200 15387
## 4 Cen Afr… 63000 19000 3334
## 5 Chad 15000 3100 6435
## 6 Congo 10800 70000 22102
## 7 Dem Rep… 377700 85000 23714
## 8 Gabon 13400 76000 70637
## 9 Kenya 65000 19000
31636
## 10 Mozambi… 54800 18600 26088
## 11 Somalia 24300 6000 70
## 12 Tanzania 316300 80000 167003
## 13 Zambia 150000 41000 29231
## 14 Zimbabwe 30000 43000 99107
## # abbreviated names: ¹
ℹ
`1979 Elephant Population`,
## # ²`1989 Elephant Population`, ³`2007 Elephant Population`
## # 3 more variables: `2012 Elephant Population` <dbl>, ℹ
change_79to89 <dbl>,
## # change_89to07 <dbl>
##from the year 1989 to 2007 Somalia saw the biggest decrease in elephant population
#Q1.3 geometric mean for annual change from 2007 to 2012 over 5 years
africanelephants
$
change_07to12 <-
((africanelephants
$
`
2012 Elephant Population
`
/
africanelephants
$
`
2007 Elephant Population
`
)
^
(
1
/
5
) -
1
) *
100
print
(africanelephants)
## # A tibble: 14 × 8
## Country 1979 Elephant Popula…¹ 1989 Elephant Popula…² 2007 Elephant Popula…³
## <chr> <dbl> <dbl> <dbl>
## 1 Angola 12400 12400 2530
## 2 Botswana 20000 51000 175487
## 3 Cameroon 16200 21200 15387
## 4 Cen Afr… 63000 19000 3334
## 5 Chad 15000 3100 6435
## 6 Congo 10800 70000 22102
## 7 Dem Rep… 377700 85000 23714
## 8 Gabon 13400 76000 70637
## 9 Kenya 65000 19000 31636
## 10 Mozambi… 54800 18600
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help
26088
## 11 Somalia 24300 6000 70
## 12 Tanzania 316300 80000 167003
## 13 Zambia 150000 41000 29231
## 14 Zimbabwe 30000 43000 99107
## # abbreviated names: ¹
ℹ
`1979 Elephant Population`,
## # ²`1989 Elephant Population`, ³`2007 Elephant Population`
## # 4 more variables: `2012 Elephant Population` <dbl>, ℹ
change_79to89 <dbl>,
## # change_89to07 <dbl>, change_07to12 <dbl>
##from the year 2007 to 2012 Chad and Dem Rep of Congo saw the biggest
decrease in elephant population
#Q1.4 multiple boxplot graph that includes boxplots of the elephant population observations in each year 1979, 1989, 2007, 2012
elephant_data <-
tidyr
::
gather
(africanelephants, key =
"Year"
, value =
"Population"
, -
Country)
elephant_data_Year <-
factor
(elephant_data
$
Year, levels =
c
(
"1979 Elephant Population"
, "1989 Elephant Population"
, "2007 Elephant Population"
, "2012 Elephant Population"
))
ggplot
(
data =
elephant_data, aes
(
x =
elephant_data_Year, y = Population , fill =
Country)) +
geom_boxplot
() +
labs
(
title =
"Elephant Population in Each Year"
, x =
"Year"
, y = "Elephant Population"
) +
theme_minimal
()
## 1989 box heights are lower than 1979 that shows elephant population
decreased in many countries.
##From 1989 to 2007 elephant population further decreased in many countries, population in Somalia decreased the most. ##Population in Congo increased from 2007 to 2015 while Chad and Dem Rep of Congo swa the decrease and other countries had little or no effect #Q2.1 probability that the web site has fewer than 5 million visitors in a single day
mean <-
4500000 std_dev <-
820000 target_visitors <-
5000000 # Calculating the z-score
z_score <-
(target_visitors -
mean) /
std_dev
# Calculating the probability probab <-
pnorm
(z_score)
cat
(
"Probability of fewer than 5 million visitors:"
, probab, "
\n
"
)
## Probability of fewer than 5 million visitors: 0.7289883
#Q2.2 probability that the web site has 3 million or more visitors in a single day
target2_visitors
<-
3000000
z_score <-
(target2_visitors -
mean) /
std_dev
probab2 <-
1
-
pnorm
(z_score)
cat
(
"Probability of 3 million or more visitors:"
, probab2, "
\n
"
)
## Probability of 3 million or more visitors: 0.9663203
#Q2.3 probability that the web site has between 3 million and 4 million visitors in a single day
lb
<-
3000000
ub <-
4000000 z_lb <-
(lb -
mean) /
std_dev
z_ub <-
(ub -
mean) /
std_dev
probab_lb <-
pnorm
(z_lb)
probab_ub <-
pnorm
(z_ub)
# Calculating the probability between 3 million and 4 million
probab3 <-
probab_ub -
probab_lb
cat
(
"Probability of between 3 million and 4 million visitors:"
, probab3, "
\n
"
)
## Probability of between 3 million and 4 million visitors: 0.237332
#Q2.4 Amount of web traffic requiring additional server capacity
threshold <-
0.15 # Calculating the z-score z_threshold <-
qnorm
(
1
-
threshold)
traffic_threshold <-
mean +
z_threshold *
std_dev
cat
(
"Amount of web traffic requiring additional server capacity:"
, traffic_threshold, "million visitors
\n
"
)
## Amount of web traffic requiring additional server capacity: 5349875
million visitors
file_path <-
"C:/Users/Rishabh Arora/Downloads/BigBlue.xlsx"
data <-
readxl
::
read_excel
(file_path)
#Q3.1 Selecting columns to normalize
clm_norm <-
c
(
"UsageRate"
, "Recognition"
, "Leader"
)
# Normalizing the selected columns
normalize_columns <-
as.data.frame
(
scale
(data[clm_norm]))
# Combining the normalized columns with the dataset
bigblue <-
cbind
(data[, -
which
(
names
(data) %in%
clm_norm)], normalize_columns)
head
(bigblue)
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help
## EmployeeID UsageRate Recognition Leader
## 1 1 -0.9831708 -0.4328996 -0.1857172
## 2 2 1.0156009 -0.4328996 -0.1857172
## 3 3 -0.9721172 -0.4328996 -0.1857172
## 4 4 -0.9824326 -0.4328996 -0.1857172
## 5 5 -0.9817885 -0.4328996 -0.1857172
## 6 6 -0.8906684 -0.4328996 -0.1857172
#Q3.2 How many clusters do you recommend to categorize the employees? Why?
css <-
numeric
(
length =
10
) for
(i in
1
:
10
) {
kmeans_model <-
kmeans
(bigblue, centers =
i, nstart =
10
)
css[i] <-
kmeans_model
$
tot.withinss
}
#elbow plot
plot
(
1
:
10
, css, type =
"b"
, main =
"Elbow Method"
, xlab =
"Number of Clusters (k)"
, ylab =
"CSS"
)
##based on elbow plot recommended number of clusters are 3
rec_cluster
<-
3
#Q3.3 performing k-means clustering with 3 clusters
kmeans_model <-
kmeans
(bigblue, centers =
rec_cluster, nstart =
10
)
# Assign cluster labels to the original data
bigblue_clusters <-
cbind
(bigblue, Cluster =
kmeans_model
$
cluster)
# Print cluster summaries
cluster_summaries <-
aggregate
(bigblue, by =
list
(
Cluster = kmeans_model
$
cluster), FUN =
function
(x) c
(
Mean =
mean
(x), Min = min
(x), Max =
max
(x)))
print
(cluster_summaries)
## Cluster EmployeeID.Mean EmployeeID.Min EmployeeID.Max UsageRate.Mean
## 1 1 90.0 73.0 107.0 0.4617810
## 2 2 54.5 37.0 72.0 -
0.2100311
## 3 3 18.5 1.0 36.0 -
0.2389226
## UsageRate.Min UsageRate.Max Recognition.Mean Recognition.Min Recognition.Max
## 1 -0.9642919 2.1550943 0.3942479 -0.4328996 4.1991262
## 2 -0.9822525 1.9362465 -0.1755648 -0.4328996 3.0411198
## 3 -0.9831708 1.9846656 -0.2077317 -0.4328996 1.8831133
## Leader.Mean Leader.Min Leader.Max
## 1 0.3820467 -0.1857172 6.4381947
## 2 -0.1857172 -0.1857172 -0.1857172
## 3 -0.1857172 -0.1857172 -0.1857172
#Q3.4 Interpretation
##Cluster 1:Mean usage rate and recognition rate is negative which means employees were not involved in high priority projects and haven't served as leader on projects as compared to other employees.
##Cluster 2:Mean usage rate and recognition are positive that indicate
employees in this cluster were involved in high priority projects the most and also served as leaders as mean.leader is also positive.
##Cluster 3:Mean usage rate, recognition and mean leader is negative but slightly better than cluster 1 employees
#we can say that employees in Cluster 2 performed the best,they were involved in high priority projects , served as leaders and also received the recognition for their performance.
Related Documents
Recommended textbooks for you
Essentials of Physical Anthropology (Third Editio...
Anthropology
ISBN:9780393938661
Author:Clark Spencer Larsen
Publisher:W. W. Norton & Company
Recommended textbooks for you
- Essentials of Physical Anthropology (Third Editio...AnthropologyISBN:9780393938661Author:Clark Spencer LarsenPublisher:W. W. Norton & Company
Essentials of Physical Anthropology (Third Editio...
Anthropology
ISBN:9780393938661
Author:Clark Spencer Larsen
Publisher:W. W. Norton & Company