task-3-R111111

docx

School

Centro Escolar University *

*We aren’t endorsed by this school

Course

AUDITING

Subject

Statistics

Date

Nov 24, 2024

Type

docx

Pages

12

Uploaded by aly_george

Report
R studio questions from 16 to 23 2023-11-27 R Markdown This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com . When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: hw4_data <- read.csv ( "C:/Users/user/Desktop/attique/asad work/1547941853 需要用 R/hw4_data.csv" ) data <- hw4_data # Count the number of people in each treatment group n_total <- nrow (data) n_employment <- sum (data $ employment_group == 1 ) n_cash <- sum (data $ cash_group == 1 ) n_control <- sum (data $ control_group == 1 ) # Calculate the share in each treatment group share_employment <- n_employment / n_total share_cash <- n_cash / n_total share_control <- n_control / n_total # Print the results cat ( "Total number of people:" , n_total, " \n " ) ## Total number of people: 726 cat ( "Share in the employment group:" , share_employment, " \n " ) ## Share in the employment group: 0.5550964 cat ( "Share in the cash group:" , share_cash, " \n " ) ## Share in the cash group: 0.2217631 cat ( "Share in the control group:" , share_control, " \n " ) ## Share in the control group: 0.2231405 # question16 # Calculate mean and standard deviation for age mean_age <- tapply (data $ age, data $ employment_group, mean)
sd_age <- tapply (data $ age, data $ employment_group, sd) # Calculate mean and standard deviation for being married marital_stats <- aggregate (marry_dum ~ employment_group, data, function (x) c ( mean = mean (x), sd = sd (x))) # Extract mean and standard deviation values mean_married <- marital_stats $ marry_dum[, "mean" ] sd_married <- marital_stats $ marry_dum[, "sd" ] # Print the results cat ( "Age: \n " ) ## Age: cat ( " Employment group - Mean:" , mean_age[ 1 ], "SD:" , sd_age[ 1 ], " \n " ) ## Employment group - Mean: 28.66873 SD: 6.975902 cat ( " Cash group - Mean:" , mean_age[ 2 ], "SD:" , sd_age[ 2 ], " \n " ) ## Cash group - Mean: 28.01985 SD: 6.895424 cat ( " Control group - Mean:" , mean_age[ 3 ], "SD:" , sd_age[ 3 ], " \n " ) ## Control group - Mean: NA SD: NA cat ( " \n Married: \n " ) ## ## Married: cat ( " Employment group - Mean:" , mean_married[ 1 ], "SD:" , sd_married[ 1 ], " \n " ) ## Employment group - Mean: 0.8142415 SD: 0.3895151 cat ( " Cash group - Mean:" , mean_married[ 2 ], "SD:" , sd_married[ 2 ], " \n " ) ## Cash group - Mean: 0.7617866 SD: 0.4265199 cat ( " Control group - Mean:" , mean_married[ 3 ], "SD:" , sd_married[ 3 ], " \n " ) ## Control group - Mean: NA SD: NA # question 17 # Age comparison between employment and control groups t_age <- t.test (data $ age[data $ employment_group == 1 ], data $ age[data $ control_group == 1 ])
# Print the t-test result for age print ( "Age Comparison:" ) ## [1] "Age Comparison:" print (t_age) ## ## Welch Two Sample t-test ## ## data: data$age[data$employment_group == 1] and data$age[data$control_group == 1] ## t = -0.70767, df = 295.98, p-value = 0.4797 ## alternative hypothesis: true difference in means is not equal to 0 ## 95 percent confidence interval: ## -1.722064 0.811149 ## sample estimates: ## mean of x mean of y ## 28.01985 28.47531 # Married comparison between employment and control groups # Assuming you want to compare "marry_dum_own" for employment_group == 1 and control_group == 1 t_marry <- t.test (data $ marry_dum[data $ employment_group == 1 ], data $ marry_dum[data $ control_group == 1 ]) # Print the t-test result for marital status print ( " \n Marital Status Comparison:" ) ## [1] "\nMarital Status Comparison:" print (t_marry) ## ## Welch Two Sample t-test ## ## data: data$marry_dum[data$employment_group == 1] and data$marry_dum[data$control_group == 1] ## t = -1.7863, df = 331.94, p-value = 0.07497 ## alternative hypothesis: true difference in means is not equal to 0 ## 95 percent confidence interval: ## -0.137366784 0.006618997 ## sample estimates: ## mean of x mean of y ## 0.7617866 0.8271605 # question 18 # Baseline mental health index comparisons t_baseline_emp_ctrl <- t.test (data $ b_mental_health_index[data $ employment_group == 1 ],
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
data $ b_mental_health_index[data $ control_group == 1 ]) t_baseline_cash_ctrl <- t.test (data $ b_mental_health_index[data $ cash_group == 1 ], data $ b_mental_health_index[data $ control_group == 1 ]) t_baseline_emp_cash <- t.test (data $ b_mental_health_index[data $ employment_group == 1 ], data $ b_mental_health_index[data $ cash_group == 1 ]) # Print the results cat ( "Baseline Mental Health Index T-Test (Employment vs. Control): \n " ) ## Baseline Mental Health Index T-Test (Employment vs. Control): print (t_baseline_emp_ctrl) ## ## Welch Two Sample t-test ## ## data: data$b_mental_health_index[data$employment_group == 1] and data$b_mental_health_index[data$control_group == 1] ## t = 1.4146, df = 308.03, p-value = 0.1582 ## alternative hypothesis: true difference in means is not equal to 0 ## 95 percent confidence interval: ## -0.02516152 0.15386896 ## sample estimates: ## mean of x mean of y ## 0.059110301 -0.005243423 cat ( " \n Baseline Mental Health Index T-Test (Cash vs. Control): \n " ) ## ## Baseline Mental Health Index T-Test (Cash vs. Control): print (t_baseline_cash_ctrl) ## ## Welch Two Sample t-test ## ## data: data$b_mental_health_index[data$cash_group == 1] and data$b_mental_health_index[data$control_group == 1] ## t = 0.27537, df = 319.98, p-value = 0.7832 ## alternative hypothesis: true difference in means is not equal to 0 ## 95 percent confidence interval: ## -0.09340705 0.12381071 ## sample estimates: ## mean of x mean of y ## 0.009958406 -0.005243423 cat ( " \n Baseline Mental Health Index T-Test (Employment vs. Cash): \n " )
## ## Baseline Mental Health Index T-Test (Employment vs. Cash): print (t_baseline_emp_cash) ## ## Welch Two Sample t-test ## ## data: data$b_mental_health_index[data$employment_group == 1] and data$b_mental_health_index[data$cash_group == 1] ## t = 1.0405, df = 291.7, p-value = 0.299 ## alternative hypothesis: true difference in means is not equal to 0 ## 95 percent confidence interval: ## -0.04382451 0.14212829 ## sample estimates: ## mean of x mean of y ## 0.059110301 0.009958406 #question 19 # 19. Multiple Regression model <- lm (e_mental_health_index ~ employment_group + cash_group + b_mental_health_index , data = hw4_data) # Print coefficients summary (model) ## ## Call: ## lm(formula = e_mental_health_index ~ employment_group + cash_group + ## b_mental_health_index, data = hw4_data) ## ## Residuals: ## Min 1Q Median 3Q Max ## -1.6602 -0.3103 0.0121 0.3027 1.8688 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 0.002387 0.036165 0.066 0.947 ## employment_group 0.188364 0.042878 4.393 1.29e-05 *** ## cash_group 0.031125 0.051227 0.608 0.544 ## b_mental_health_index 0.455204 0.034259 13.287 < 2e-16 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 0.4603 on 722 degrees of freedom ## Multiple R-squared: 0.2257, Adjusted R-squared: 0.2225 ## F-statistic: 70.17 on 3 and 722 DF, p-value: < 2.2e-16
# 21. Simulation Exercise set.seed ( 123 ) # Setting a seed for reproducibility # Baseline values true_effect <- 0.5 sample_size <- 50 # Simulation function simulate_experiment <- function (true_effect, sample_size) { # Construct a population with some baseline outcome population <- rnorm ( 1000 , mean = 0 , sd = 1 ) # Draw a random sample from the population sample_data <- sample (population, size = sample_size) # Randomly assign units into treatment and control treatment_group <- sample_data[ 1 : (sample_size / 2 )] control_group <- sample_data[(sample_size / 2 + 1 ) : sample_size] # Add the real treatment effect to the baseline outcome treatment_group <- treatment_group + true_effect # Run a t-test for the post-treatment outcome between treatment and control t_test_result <- t.test (treatment_group, control_group) # Return p-value return (t_test_result $ p.value) } # Run simulation p_values <- replicate ( 1000 , simulate_experiment (true_effect, sample_size)) # 21. Analyze the distribution of p-values hist (p_values, main = "Distribution of P-values" , xlab = "P-value" , col = "lightblue" )
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
# 22. Increase sample size to 500 sample_size_500 <- 500 p_values_500 <- replicate ( 1000 , simulate_experiment (true_effect, sample_size_500)) # Analyze the distribution of p-values for sample size 500 hist (p_values_500, main = "Distribution of P-values (Sample Size 500)" , xlab = "P-value" , col = "lightblue" )
# 23. Reset sample size to 50 and increase the size of the true effect sample_size <- 50 true_effect_1 <- 1 true_effect_5 <- 5 # Run simulations for increased true effect sizes p_values_1 <- replicate ( 1000 , simulate_experiment (true_effect_1, sample_size)) p_values_5 <- replicate ( 1000 , simulate_experiment (true_effect_5, sample_size)) # Analyze the distribution of p-values for true effect size 1 hist (p_values_1, main = "Distribution of P-values (True Effect Size 1)" , xlab = "P-value" , col = "lightblue" )
# Analyze the distribution of p-values for true effect size 5 hist (p_values_5, main = "Distribution of P-values (True Effect Size 5)" , xlab = "P-value" , col = "lightblue" )
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
# 23. Reset sample size to 50 and increase the size of the true effect sample_size <- 50 true_effect_1 <- 1 true_effect_5 <- 5 # Run simulations for increased true effect sizes p_values_1 <- replicate ( 1000 , simulate_experiment (true_effect_1, sample_size)) p_values_5 <- replicate ( 1000 , simulate_experiment (true_effect_5, sample_size)) # Analyze the distribution of p-values for true effect size 1 hist (p_values_1, main = "Distribution of P-values (True Effect Size 1)" , xlab = "P-value" , col = "lightblue" ) # Analyze the distribution of p-values for true effect size 5 hist (p_values_5, main = "Distribution of P-values (True Effect Size 5)" , xlab = "P-value" , col = "lightblue" )
# Set the seed, so we all get the same results set.seed ( 123 ) # You need to specify a seed value # Create the population like we have before: pop <- rnorm ( n = 100000 , mean = 70 , sd = 10 ) # Define key parameters: true.effect <- 0.5 sample.size <- 50 # Prepare an empty list of p_values p.values <- rep ( NA , 1000 ) # Correct the typo in the variable name for (i in 1 : 1000 ) { # Take a sample our.samp <- sample (pop, size = sample.size) # Turn the vector into a data frame df <- data.frame (our.samp) # Assign a random value between 0 and 1 to each person in our sample df $ random_0_1 <- runif (sample.size) # Assign those with values above 0.5 into the treatment group # this will roughly split the sample into 50% in each group df $ treatment.group <- as.numeric (df $ random_0_1 > 0.5 )
# Create the post_treatment outcome by adding the treatment effect, # but only for those in the treatment group df $ outcome.post <- df $ our.samp + true.effect * df $ treatment.group # Run a t-test between the two groups (alternatively, we could have run a regression) evaluating <- t.test (df[df $ treatment.group == 1 ,] $ outcome.post, df[df $ treatment.group == 0 ,] $ outcome.post, alternative = 'two.sided' ) p.values[i] <- evaluating $ p.value } hist (p.values, breaks = 20 )
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help