Stats Unit3 assignment
docx
keyboard_arrow_up
School
West Texas A&M University *
*We aren’t endorsed by this school
Course
6388
Subject
Statistics
Date
Feb 20, 2024
Type
docx
Pages
12
Uploaded by georgiaestrada
Dr. J. Rausch
Stat/Soc/Crim Rerch (POSC-6388)
February 5, 2024
Georgia Estrada
Unit 3 and Problem Sets
> getwd()
[1] "C:/Users/georgia/OneDrive/Documents"
> BES <- read.csv("C:/Users/georgia/Downloads/DSS/DSS/BES.csv")
> head(BES)
vote leave education age
1 leave 1 3 60
2 leave 1 NA 56
3 stay 0 5 73
4 leave 1 4 64
5 don't know NA 2 68
6 stay 0 4 85
> dim(BES)
[1] 30895 4
> table(BES$vote)
don't know leave stay won't vote 2314 13692 14352 537 > freq_table <-table(BES$vote)
> prop.table(freq_table)
don't know leave stay won't vote 0.07489885 0.44317851 0.46454119 0.01738145 > prop.table (table(BES$vote))
don't know leave stay won't vote 0.07489885 0.44317851 0.46454119 0.01738145 > table(BES$education, exclude=NULL)
1 2 3 4 5 <NA> 2045 5781 6272 10676 2696 3425 > mean(BES$leave)
[1] NA
> mean(BES$leave, na.rm=TRUE)
[1] 0.4882328
> BES1<-na.omit(BES)
> head(BES)
vote leave education age
1 leave 1 3 60
2 leave 1 NA 56
3 stay 0 5 73
4 leave 1 4 64
5 don't know NA 2 68
6 stay 0 4 85
> head(BES1)
vote leave education age
1 leave 1 3 60
3 stay 0 5 73
4 leave 1 4 64
6 stay 0 4 85
7 leave 1 3 78
8 leave 1 2 51
> dim(BES)
[1] 30895 4
> dim(BES1)
[1] 25097 4
> table(BES1$leave, BES1$education)
1 2 3 4 5
0 498 1763 3014 6081 1898
1 1356 3388 2685 3783 631
> prop.table(table(BES$leave, BES1$education))
Error in table(BES$leave, BES1$education) : all arguments must have the same length
> prop.table(table(BES1$leave, BES1$education))
1 2 3 4 5
0 0.01984301 0.07024744 0.12009404 0.24229988 0.07562657
1 0.05403036 0.13499621 0.10698490 0.15073515 0.02514245
> prop.table(table(BES1$leave, BES1$education), margin=1)
1 2 3 4 5
0 0.03757356 0.13301645 0.22740305 0.45880489 0.14320205
1 0.11449802 0.28607616 0.22671620 0.31942920 0.05328042
> prop.table(table(BES1$leave, BES1$education), margin=2)
1 2 3 4 5
0 0.2686084 0.3422636 0.5288647 0.6164842 0.7504943
1 0.7313916 0.6577364 0.4711353 0.3835158 0.2495057
> hist(BES1$age)
> hist(BES1$age[BES1leave==0])
Error: object 'BES1leave' not found
> hist(BES1$age[BES1$leave==0])
> hist(BES1$age[BES1$leave==0])#For non-supporters
> hist(BES1$age[BES1$leave==1]) #for supporters
> hist(BES1$age[BES1$education==1]) #W/o qualifications
> hist(BES1$age[BES1$education==4]) #w/ undergraduate degree
> hist(BES1$age[BES1$education==1], freq=FALSE) #w/o qualifications
> hist(BES1$age[BES1$education==4], freq=FALSE) # w/ undergraduate degree
> hist(BES1$age[BES1$leave==0]), freq=FALSE) #For non-supporters
Error: unexpected ',' in "hist(BES1$age[BES1$leave==0]),"
> hist(BES1$age[BES1$leave==0], freq=FALSE) #For non-supporters
> hist(BES1$age[BES1$leave==1], freq=FALSE)#for supporters
> mean(BES1$age[BES1leave==0])#For non-supporters
Error: object 'BES1leave' not found
> mean(BES1$age[BES1$leave==0])#For non-supporters
[1] 46.89
> mean(BES1$age[BES1$leave==1])#for supporters
[1] 55.06823
> median(BES1$age[BES1$leave==0])#For non-supporters
[1] 48
> median(BES1$age[BES1$leave==1])#for supporters
[1] 58
> sd(BES1$age[BES1$leave==0])#For non-supporters
[1] 17.3464
> sd(BES1$age[BES1$leave==1])#for supporters
[1] 14.96106
> var(BES1$age[BES1$leave==1])
[1] 223.8334
> sd(BES1$age[BES1$leave==1])^2
[1] 223.8334
> sqrt(var(BeS1$age[BES1$leave==1]))
Error: object 'BeS1' not found
> sqrt(var(BES1$age[BES1$leave==1]))
[1] 14.96106
> dis<-read.csv("UK_districts.csv")
Error in file(file, "rt") : cannot open the connection
In addition: Warning message:
In file(file, "rt") :
cannot open file 'UK_districts.csv': No such file or directory
> UK_districts <- read_csv("C:/Users/georgia/Downloads/DSS/DSS/UK_districts.csv")
Error in read_csv("C:/Users/georgia/Downloads/DSS/DSS/UK_districts.csv") : could not find function "read_csv"
> UK_districts <- read.csv("C:/Users/georgia/Downloads/DSS/DSS/UK_districts.csv")
> head(dis)
Error: object 'dis' not found
> head(UK_districts)
name leave high_education
1 Birmingham 50.42 22.98
2 Cardiff 39.98 32.33
3 Edinburgh City 25.56 21.92
4 Glasgow City 33.41 25.91
5 Liverpool 41.81 22.44
6 Swansea 51.51 25.85
> dim(UK_districts)
[1] 382 3
> UK_districts1<-na.omit(UK_districts)
> dim(UK_districts1)
[1] 380 3
> plot(UK_districts1$high_education, UK_districts1$leave)
> plot(x=UK_districts1$high_education, y=UK_districts1$leave)
> plot(y=UK_districts1$leave, x=UK_districts1$high_education)
> abline(v=mean(UK_districts1$high_education), i=lty="dashed")
Error: unexpected '=' in "abline(v=mean(UK_districts1$high_education), i=lty="
> abline(v=mean(UK_districts1$high_education), lty="dashed")
> abline(h=mean(UK_districts1$leave), lty="dashed")
> cor(UK_districts1$high_education, UK_districts1$leave)
[1] -0.7633185
> cor(UK_districts1$leave, UK_districts1$high_education)
[1] -0.7633185
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help
Problem Set 1
Use the function read.csv() to read the CSV file “india.csv” and use the assignment operator <- to ore the data in an object called india. (Do not forget to set the working directory first.) Provide
the R code you used (without the output). (10 points). > getwd()
[1] "C:/Users/georg/OneDrive/Documents"
> india <- read.csv("C:/Users/georg/Downloads/india.csv")
2. Use the function head() to view the first few observations of the dataset. Provide the R code you used (without the output). (10 points).
> head(india)
village female water irrigation
1 GP1_village2 1 10 0
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help
2 GP1_village1 1 0 5
3 GP2_village2 1 2 2
4 GP2_village1 1 31 4
5 GP3_village2 0 0 0
6 GP3_village1 0 0 0
3. What does each observation in this dataset represent? (5 points).
The observation of the dataset is the village type such as village 1 and village 2 of the first 6 out of 322. 4. Please substantively interpret the first observation in the dataset. (5 points).
The first observation in the dataset represents village 2 in Gram Panchayat in group 1 that was assigned a female politician. Next, shows that there were 10 new or repaired drinking water facilities and 0 new or repaired irrigation facilities of randomly assigned politicians. 5. For each variable in the dataset, please identify the type of variable (character vs. numeric binary vs. numeric non-binary) (10 points).
The variable “Village” is a character variable, “female” is a binary variable, “water” and “irrigation are numeric non-binary.
6. How many observations are in the dataset? In other words, how many villages were part of this experiment? (Hint: the function dim() might be helpful here.) Provide the R code you used (without the output) and provide the substantive answer. (10 points).
> dim(india)
[1] 322 4
There are 322 villages in India in the dataset.
Problem Set 2
1.Use the function mean() to calculate the average of the variable female. Please provide a full substantive interpretation of what this average means. Make sure to provide the unit of measurement. (10 points).
> mean(india$female)
[1] 0.3354037
Approximately, 34% of the villages has females politicians were randomly assigned.
2. Use the function mean() to calculate the average of the variable water. Please provide a full substantive interpretation of what this average means. Make sure to provide the unit of measurement. (10 points).
> mean(india$water)
[1] 17.84161
The average of new or repaired drinking water facilities per village is 18.
3. If we wanted to estimate the average causal effect of having a female politician on the number of new (and repaired) drinking water facilities: (10 points).
mean(india$water[india$female==1])-mean(india$water[india$female==0])
[1] 9.252423
a. What would be the treatment variable? Please just provide the name of the variable.
Female
b. What would be the outcome variable? Please just provide the name of the variable.
Water
4. If we wanted to estimate the average causal effect of having a female politician on the number of new (and repaired) irrigation facilities: (10 points).
> mean(india$irrigation[india$female==1])-mean(india$irrigation[india$female==0])
[1] -0.3693319
a. What would be the treatment variable? Please just provide the name of the variable.
Female
b. What would be the outcome variable? Please just provide the name of the variable.
Irrigation 5. In both analyses above: (10 points) a. What would be the treatment group?
b. What would be the control group?
a.) The treatment group are the villages that were randomly assigned with a female politician.
b.) The controlled group are the villages that did not have a random assigned female politician.
Problem Set 3
1.Considering that the dataset we are analyzing comes from a randomized experiment, what can we compute to estimate the average causal effect of having a female politician on the number of new (or repaired) drinking water facilities? Please provide the name of the estimator. (5 points).
We can compute the average number of repaired or new drinking water facilities in villages that have a female politician and compare them to the average number of repaired or new drinking water facilities in villages that do not have a female politician or has a male politician. The name of the estimator is the mean.
2. In this dataset, what is the average number of new (or repaired) drinking water facilities in villages with a female politician? Please answer with a full sentence. (10 points). > mean(india$water[india$female==1])
[1] 23.99074
The average number of new or repaired drinking water facilities in a village with a female politician is 24.0.
3. What is the average number of new (or repaired) drinking water facilities in villages with a male politician? Please answer with a full sentence. (10 points).
> mean(india$water[india$female==0])
[1] 14.73832
The average number of new or repaired drinking water facilities in a village with a male politician is 14.7.
4. What is the estimated average causal effect of having a female politician on the number of new (or repaired) drinking water facilities? Please provide a full substantive answer (make sure to include the assumption, why the assumption is reasonable, the treatment, the outcome, as well as the direction, size, and unit of measurement of the average treatment effect) (25 points).
> mean(india$water[india$female==1])-mean(india$water[india$female==0])
[1] 9.252423
It is assumed that the villages assigned to have a female politician are comparable to the villages that are not assigned a female politician. The assumption is reasonable because the female politicians were assigned at random. The treatment is a randomized experiment of having a female politician versus having a male politician with the outcome of the numbers of new or repaired drinking facilities. The direction, size, and unit of measurement is the mean of 9.
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help
Related Documents
Related Questions
Problem 4-09
Epsilon Airlines services predominately the eastern and southeastern United States. The vast majority of Epsilon's customers make reservations through Epsilon's website, but a small percentage
of customers make reservations via phone. Epsilon employs call-center personnel to handle these reservations along with any problems with the website reservation system and for the
rebooking of flights for customers if their plans change or their travel is disrupted. Staffing the call center appropriately is a challenge for Epsilon's management team. Having too many
employees on hand is a waste of money, but having too few results in very poor customer service and the potential loss of customers.
Epsilon analysts have estimated the minimum number of call-center employees needed by day of week for the upcoming vacation season (June, July, and the first two weeks of August). These
estimates are as follows:
Minimum Number of
Employees Needed
Day
Monday
75
Tuesday
50
Wednesday
45…
arrow_forward
please send the answer for the wrong part
arrow_forward
Suppose a company needs temporary passwords for the trial of a new time management software. Each password will have two letters followed by two digits. The digits 7 8 and 9 will not be used. So there are 26 letters and seven digits that will be used. Assume the letters and digits can be repeated. How many passwords can be created using this format?
Passwords =
arrow_forward
What type of variable is exchange on which a stock is traded
arrow_forward
Suppose that license plates in a certain municipality come in two forms: two letters (A …Z) followed by three digits (0 … 9) or three letters followed by two digits. How manydifferent license plates are possible
arrow_forward
. How many 10 digit phone numbers, Area code + number (***)***-****, are possible if the second digit in the area code has to be a 0 or 1 and the first digit of the number can't be 0?
arrow_forward
Let A = {small, medium, large}, B = {blue, green}, and C = {triangle, square}. H
Represent A x C as cells in a spreadsheet.
triangle
---Select---
---Select--- V
small
medium
large
---Select--- V
---Select--- V
---Select--- V
---Select--- ✓
square
medium
---Select--- V
---Select--- V
---Select--- V
---Select--- V
---Select--- ✓
arrow_forward
Suppose a company needs temporary passwords for the trial of a new time management software. Each password will have two letters followed by two digits. The digits 7, 8,9
will not be used. So, there are 26 letters and 7 digits that will be used. Assume that the letters and digits can be repeated. How many passwords can be created using this format?
arrow_forward
7x 1+2x 2+1 x_3+8x_4+9 x 5 = 69
9x 2+3 x 3 + 4x_4+4 x 5 = 54
4x 1+6 x 2+5 x_4+4 x 5 = 40
2x 1 + 10 x 2 +9x_3+9x_4+3 x 5 = 76
10 x 1+4x2+7x_3+7x_4 + 4 x 5 = 70
arrow_forward
Solve with RTM formula
arrow_forward
Sorry about that. Here is question 2 of the same worksheet.
arrow_forward
-5 -4 -3 -2 -1
2
1
-1
-2
-3-
-4-
-5 #
-5 -4 -3 -2 -1
4
3
5-
4
3
2
1
-1
-2
-3-
-4-
-5-
T
-5 -4 -3 -2 -1
5
4
3-
27
-5 -4 -3 -2 -1
1
-1
-2
-3
-4
-5+
150
4
3
2-
1
-1
-2
-3
1 2
3 4
2 3
3
4
4
5
5
a
5
a. f(x) = -log(x)
b. f(x) = log² (x)
c. f(x) =
d. f(x) =
log₂ (x)
-
- log2 (x)
arrow_forward
cez - 7.2 The X
ww-awn.aleks.com/alekscgi/x/Isl.exe/1o_u-lgNslkr7j8P3jH-IBS1dp57AtUddR-PrG9yyMRedZQZTxsfCOhwVPjmJnZ9X-APBaxAHwujlsUY-iGulpK4Uwt0STWP4BlgiuKzgg8hleBQVJq?1oBw7QYjlbavb:
7.2 The Law of Sines
Cla
Question 9 of 11 (1 point) | Question Attempt: 1 of Unlimited
v1
v 4
v 10
v 11
v 3
The connector rod from the piston to the crankshaft in a certain 2.0 -L engine is 5.5 in. The radius of the crank cirde is 2.8 in. If the angle made by the
connector rod with the horizontal at the wrist pin P is 23°, how far is the wrist pin from the center C of the crankshaft? Round to the nearest tenth of an inch.
5.5 in
Example
2.8 in
The distance from the wrist pin to the center of the crankshaft is approximately
in or
in
Check
Save For Later
Submit Assignr
O 2021 McGraw-Hil Education, All Rights Reserved. Terms of Use Privacy I Acce
70
Home
F11
F12
%23
2
3
4
8.
W
R
T
Y U
P
arrow_forward
Question 8part 3,4,5
arrow_forward
A producer of educational TV shows for gifted children wants to promote the claim that gifted children's
scores on an analytical test increase the more the children watch their shows.
In RStudio, run the following code to install and/or library the package "openintro".
1. install.packages("openintro") # don't do this again if you already did this!
2. library(openintro)
3. gifted
Delete the install line of code if you are in an RMD file so that it doesn't install every time you knit. The
last line of code will access the dataset of that name.
The dataset named 'gifted' gives information about test scores of gifted children on a standard analytical
test and the number of hours of educational TV these children watch. use the data set to make a model to
predict score on analytical skills test ("score") from hours of educational TV watched per week ("edutv").
a. Make a scatterplot of "score" (y-axis) vs "edutv" (x-axis). Which plot is the scatterplot?
Graph A Graph B Graph C Graph D
C
A…
arrow_forward
A producer of educational TV shows for gifted children wants to promote the claim that gifted children's
scores on an analytical test increase the more the children watch their shows.
In RStudio, run the following code to install and/or library the package "openintro".
1. Install.packages("openintro") # don't do this again if you already did this!
2. library(openintro)
3. gifted
Delete the install line of code if you are in an RMD file so that it doesn't install every time you knit. The
last line of code will access the dataset of that name.
dataset named gifted gives information about test scores of gifted children on a standard analytical
The
test and the number of hours of educational TV these children watch. use the data set to make a model to
predict score on analytical skills test (score") from hours of educational TV watched per week (edutv").
a. Make a scatterplot of "score" (y-axis) vs "edutv" (x-axis). Which plot is the scatterplot?
Graph A Graph B Graph C Graph Do
ỏ
A
1954…
arrow_forward
A producer of educational TV shows for gifted children wants to promote the claim that gifted children's
scores on an analytical test increase the more the children watch their shows.
In RStudio, run the following code to install and/or library the package "openintro".
1. install.packages ("openintro") # don't do this again if you already did this!
2. library(openintro)
3. gifted
Delete the install line of code if you are in an RMD file so that it doesn't install every time you knit. The
last line of code will access the dataset of that name.
The dataset named 'gifted' gives information about test scores of gifted children on a standard analytical
test and the number of hours of educational TV these children watch. use the data set to make a model to
predict score on analytical skills test ("score") from hours of educational TV watched per week ("edutv").
a. Make a scatterplot of "score" (y-axis) vs "edutv" (x-axis). Which plot is the scatterplot?
Graph A Graph B Graph C Graph D
A
165-…
arrow_forward
SEE MORE QUESTIONS
Recommended textbooks for you
Algebra & Trigonometry with Analytic Geometry
Algebra
ISBN:9781133382119
Author:Swokowski
Publisher:Cengage
Related Questions
- Problem 4-09 Epsilon Airlines services predominately the eastern and southeastern United States. The vast majority of Epsilon's customers make reservations through Epsilon's website, but a small percentage of customers make reservations via phone. Epsilon employs call-center personnel to handle these reservations along with any problems with the website reservation system and for the rebooking of flights for customers if their plans change or their travel is disrupted. Staffing the call center appropriately is a challenge for Epsilon's management team. Having too many employees on hand is a waste of money, but having too few results in very poor customer service and the potential loss of customers. Epsilon analysts have estimated the minimum number of call-center employees needed by day of week for the upcoming vacation season (June, July, and the first two weeks of August). These estimates are as follows: Minimum Number of Employees Needed Day Monday 75 Tuesday 50 Wednesday 45…arrow_forwardplease send the answer for the wrong partarrow_forwardSuppose a company needs temporary passwords for the trial of a new time management software. Each password will have two letters followed by two digits. The digits 7 8 and 9 will not be used. So there are 26 letters and seven digits that will be used. Assume the letters and digits can be repeated. How many passwords can be created using this format? Passwords =arrow_forward
- What type of variable is exchange on which a stock is tradedarrow_forwardSuppose that license plates in a certain municipality come in two forms: two letters (A …Z) followed by three digits (0 … 9) or three letters followed by two digits. How manydifferent license plates are possiblearrow_forward. How many 10 digit phone numbers, Area code + number (***)***-****, are possible if the second digit in the area code has to be a 0 or 1 and the first digit of the number can't be 0?arrow_forward
- Let A = {small, medium, large}, B = {blue, green}, and C = {triangle, square}. H Represent A x C as cells in a spreadsheet. triangle ---Select--- ---Select--- V small medium large ---Select--- V ---Select--- V ---Select--- V ---Select--- ✓ square medium ---Select--- V ---Select--- V ---Select--- V ---Select--- V ---Select--- ✓arrow_forwardSuppose a company needs temporary passwords for the trial of a new time management software. Each password will have two letters followed by two digits. The digits 7, 8,9 will not be used. So, there are 26 letters and 7 digits that will be used. Assume that the letters and digits can be repeated. How many passwords can be created using this format?arrow_forward7x 1+2x 2+1 x_3+8x_4+9 x 5 = 69 9x 2+3 x 3 + 4x_4+4 x 5 = 54 4x 1+6 x 2+5 x_4+4 x 5 = 40 2x 1 + 10 x 2 +9x_3+9x_4+3 x 5 = 76 10 x 1+4x2+7x_3+7x_4 + 4 x 5 = 70arrow_forward
- Solve with RTM formulaarrow_forwardSorry about that. Here is question 2 of the same worksheet.arrow_forward-5 -4 -3 -2 -1 2 1 -1 -2 -3- -4- -5 # -5 -4 -3 -2 -1 4 3 5- 4 3 2 1 -1 -2 -3- -4- -5- T -5 -4 -3 -2 -1 5 4 3- 27 -5 -4 -3 -2 -1 1 -1 -2 -3 -4 -5+ 150 4 3 2- 1 -1 -2 -3 1 2 3 4 2 3 3 4 4 5 5 a 5 a. f(x) = -log(x) b. f(x) = log² (x) c. f(x) = d. f(x) = log₂ (x) - - log2 (x)arrow_forward
arrow_back_ios
SEE MORE QUESTIONS
arrow_forward_ios
Recommended textbooks for you
- Algebra & Trigonometry with Analytic GeometryAlgebraISBN:9781133382119Author:SwokowskiPublisher:Cengage
Algebra & Trigonometry with Analytic Geometry
Algebra
ISBN:9781133382119
Author:Swokowski
Publisher:Cengage