assignment_3

py

School

University Of Arizona *

*We aren’t endorsed by this school

Course

97

Subject

Mathematics

Date

Feb 20, 2024

Type

py

Pages

5

Uploaded by PrivateIbis3915

Report
import pandas as pd import numpy as np import matplotlib.pyplot as plt import math # df = pd.read_csv('~/Desktop/DM/gaTech/Summer 2022/AI Ethics/toxity_per_attribute_actual.csv') # reduced_df = df.copy() # print('Actual len') # print(len(reduced_df)) # column_names = reduced_df.columns # # print(reduced_df.columns) # reduced_df_indexes = df[(df['lesbian'] == False) & # (df['gay'] == False) & # (df['bisexual'] == False) & # (df['transgender'] == False) & # (df['trans'] == False) & # (df['queer'] == False) & # (df['lgbt'] == False) & # (df['lgbtq'] == False) & # (df['homosexual'] == False) & # (df['straight'] == False) & # (df['heterosexual'] == False) & # (df['male'] == False) & # (df['female'] == False) & # (df['nonbinary'] == False) & # (df['african'] == False) & # (df['african american'] == False) & # (df['black'] == False) & # (df['white'] == False) & # (df['european'] == False) & # (df['hispanic'] == False) & # (df['latino'] == False) & # (df['latina'] == False) & # (df['latinx'] == False) & # (df['mexican'] == False) & # (df['canadian'] == False) & # (df['american'] == False) & # (df['asian'] == False) & # (df['indian'] == False) & # (df['middle eastern'] == False) & # (df['chinese'] == False) & # (df['japanese'] == False) & # (df['christian'] == False) & # (df['muslim'] == False) & # (df['jewish'] == False) & # (df['buddhist'] == False) & # (df['catholic'] == False) & # (df['protestant'] == False) & # (df['sikh'] == False) & # (df['taoist'] == False) & # (df['old'] == False) & # (df['older'] == False) & # (df['young'] == False) & # (df['younger'] == False) & # (df['teenage'] == False) & # (df['millenial'] == False) & # (df['middle aged'] == False) & # (df['elderly'] == False) &
# (df['blind'] == False) & # (df['deaf'] == False) & # (df['paralyzed'] == False)].index # reduced_df.drop(reduced_df_indexes, inplace=True) # print('Reduced len') # print(len(reduced_df)) # x = reduced_df['TOXICITY'].mean() # y = reduced_df['TOXICITY'].std() # # print(reduced_df['TOXICITY'].mean()) # # print(reduced_df['TOXICITY'].std()) # z = x + (2 * y) # w = x - (2 * y) # moe_pos = (1.96 * y) / math.sqrt(len(reduced_df)) # moe_neg = x - (1.96 * y) / math.sqrt(len(reduced_df)) # print('95% CI range') # print(w, z) # print('Reduced dataset stats') # print(x, y, moe_pos, moe_neg) # n_10 = reduced_df.sample(frac=0.1) # print('10% sample len') # print(len(n_10)) # # Mean +/- 1.96*SD/SQRT(n) # mean_10 = n_10['TOXICITY'].mean() # std_10 = n_10['TOXICITY'].std() # moe_10_pos = (1.96 * std_10) / math.sqrt(len(n_10)) # moe_10_neg = mean_10 - (1.96 * std_10) / math.sqrt(len(n_10)) # print('10% sample dataset stats') # print(mean_10, std_10, moe_10_pos, moe_10_neg) # n_60 = reduced_df.sample(frac=0.6) # print('60% sample len') # print(len(n_60)) # # Mean +/- 1.96*SD/SQRT(n) # mean_60 = n_60['TOXICITY'].mean() # std_60 = n_60['TOXICITY'].std() # moe_60_pos = (1.96 * std_60) / math.sqrt(len(n_60)) # moe_60_neg = mean_60 - (1.96 * std_60) / math.sqrt(len(n_60)) # print('60% sample dataset stats') # print(mean_60, std_60, moe_60_pos, moe_60_neg) # step_5_df = pd.read_csv('~/Desktop/DM/gaTech/Summer 2022/AI Ethics/step5.csv') # print(len(step_5_df)) # x = step_5_df['TOXICITY'].mean() # y = step_5_df['TOXICITY'].std() # # print(reduced_df['TOXICITY'].mean()) # # print(reduced_df['TOXICITY'].std()) # z = x + (2 * y) # w = x - (2 * y) # moe = (1.96 * y) / math.sqrt(len(step_5_df)) # # moe_neg = x - (1.96 * y) / math.sqrt(len(step_5_df)) # print('95% CI range') # print(w, z) # print('Reduced dataset stats') # print(x, y, moe) # # n_10 = step_5_df.sample(frac=0.1) # print('10% sample len') # print(len(n_10)) # # Mean +/- 1.96*SD/SQRT(n)
# mean_10 = n_10['TOXICITY'].mean() # std_10 = n_10['TOXICITY'].std() # moe_10 = (1.96 * std_10) / math.sqrt(len(n_10)) # # moe_10_neg = mean_10 - (1.96 * std_10) / math.sqrt(len(n_10)) # print('10% sample dataset stats') # print(mean_10, std_10, moe_10) # n_60 = step_5_df.sample(frac=0.6) # print('60% sample len') # print(len(n_60)) # # Mean +/- 1.96*SD/SQRT(n) # mean_60 = n_60['TOXICITY'].mean() # std_60 = n_60['TOXICITY'].std() # moe_60 = (1.96 * std_60) / math.sqrt(len(n_60)) # # moe_60_neg = mean_60 - (1.96 * std_60) / math.sqrt(len(n_60)) # print('60% sample dataset stats') # print(mean_60, std_60, moe_60) # step_5_df = pd.read_csv('~/Desktop/DM/gaTech/Summer 2022/AI Ethics/male.csv') # print(len(step_5_df)) # x = step_5_df['TOXICITY'].mean() # y = step_5_df['TOXICITY'].std() # # print(reduced_df['TOXICITY'].mean()) # # print(reduced_df['TOXICITY'].std()) # z = x + (2 * y) # w = x - (2 * y) # moe = (1.96 * y) / math.sqrt(len(step_5_df)) # # moe_neg = x - (1.96 * y) / math.sqrt(len(step_5_df)) # print('95% CI range') # print(w, z) # print('Reduced dataset stats') # print(x, y, moe) # # n_10 = step_5_df.sample(frac=0.1) # print('10% sample len') # print(len(n_10)) # # Mean +/- 1.96*SD/SQRT(n) # mean_10 = n_10['TOXICITY'].mean() # std_10 = n_10['TOXICITY'].std() # moe_10 = (1.96 * std_10) / math.sqrt(len(n_10)) # # moe_10_neg = mean_10 - (1.96 * std_10) / math.sqrt(len(n_10)) # print('10% sample dataset stats') # print(mean_10, std_10, moe_10) # n_60 = step_5_df.sample(frac=0.6) # print('60% sample len') # print(len(n_60)) # # Mean +/- 1.96*SD/SQRT(n) # mean_60 = n_60['TOXICITY'].mean() # std_60 = n_60['TOXICITY'].std() # moe_60 = (1.96 * std_60) / math.sqrt(len(n_60)) # # moe_60_neg = mean_60 - (1.96 * std_60) / math.sqrt(len(n_60)) # print('60% sample dataset stats') # print(mean_60, std_60, moe_60) # # step_5_df = pd.read_csv('~/Desktop/DM/gaTech/Summer 2022/AI Ethics/female.csv') # print(len(step_5_df)) # x = step_5_df['TOXICITY'].mean() # y = step_5_df['TOXICITY'].std() # # print(reduced_df['TOXICITY'].mean()) # # print(reduced_df['TOXICITY'].std())
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
# z = x + (2 * y) # w = x - (2 * y) # moe = (1.96 * y) / math.sqrt(len(step_5_df)) # # moe_neg = x - (1.96 * y) / math.sqrt(len(step_5_df)) # print('95% CI range') # print(w, z) # print('Reduced dataset stats') # print(x, y, moe) # # n_10 = step_5_df.sample(frac=0.1) # print('10% sample len') # print(len(n_10)) # # Mean +/- 1.96*SD/SQRT(n) # mean_10 = n_10['TOXICITY'].mean() # std_10 = n_10['TOXICITY'].std() # moe_10 = (1.96 * std_10) / math.sqrt(len(n_10)) # # moe_10_neg = mean_10 - (1.96 * std_10) / math.sqrt(len(n_10)) # print('10% sample dataset stats') # print(mean_10, std_10, moe_10) # n_60 = step_5_df.sample(frac=0.6) # print('60% sample len') # print(len(n_60)) # # Mean +/- 1.96*SD/SQRT(n) # mean_60 = n_60['TOXICITY'].mean() # std_60 = n_60['TOXICITY'].std() # moe_60 = (1.96 * std_60) / math.sqrt(len(n_60)) # # moe_60_neg = mean_60 - (1.96 * std_60) / math.sqrt(len(n_60)) # print('60% sample dataset stats') # print(mean_60, std_60, moe_60) # step_5_df = pd.read_csv('~/Desktop/DM/gaTech/Summer 2022/AI Ethics/other.csv') print(len(step_5_df)) x = step_5_df['TOXICITY'].mean() y = step_5_df['TOXICITY'].std() # print(reduced_df['TOXICITY'].mean()) # print(reduced_df['TOXICITY'].std()) z = x + (2 * y) w = x - (2 * y) moe = (1.96 * y) / math.sqrt(len(step_5_df)) # moe_neg = x - (1.96 * y) / math.sqrt(len(step_5_df)) print('95% CI range') print(w, z) print('Reduced dataset stats') print(x, y, moe) n_10 = step_5_df.sample(frac=0.1) print('10% sample len') print(len(n_10)) # Mean +/- 1.96*SD/SQRT(n) mean_10 = n_10['TOXICITY'].mean() std_10 = n_10['TOXICITY'].std() moe_10 = (1.96 * std_10) / math.sqrt(len(n_10)) # moe_10_neg = mean_10 - (1.96 * std_10) / math.sqrt(len(n_10)) print('10% sample dataset stats') print(mean_10, std_10, moe_10) n_60 = step_5_df.sample(frac=0.6) print('60% sample len') print(len(n_60)) # Mean +/- 1.96*SD/SQRT(n)
mean_60 = n_60['TOXICITY'].mean() std_60 = n_60['TOXICITY'].std() moe_60 = (1.96 * std_60) / math.sqrt(len(n_60)) # moe_60_neg = mean_60 - (1.96 * std_60) / math.sqrt(len(n_60)) print('60% sample dataset stats') print(mean_60, std_60, moe_60)