assignment_3
py
keyboard_arrow_up
School
University Of Arizona *
*We aren’t endorsed by this school
Course
97
Subject
Mathematics
Date
Feb 20, 2024
Type
py
Pages
5
Uploaded by PrivateIbis3915
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
# df = pd.read_csv('~/Desktop/DM/gaTech/Summer 2022/AI Ethics/toxity_per_attribute_actual.csv')
# reduced_df = df.copy()
# print('Actual len')
# print(len(reduced_df))
# column_names = reduced_df.columns
# # print(reduced_df.columns)
# reduced_df_indexes = df[(df['lesbian'] == False) &
# (df['gay'] == False) &
# (df['bisexual'] == False) &
# (df['transgender'] == False) &
# (df['trans'] == False) &
# (df['queer'] == False) &
# (df['lgbt'] == False) &
# (df['lgbtq'] == False) &
# (df['homosexual'] == False) &
# (df['straight'] == False) &
# (df['heterosexual'] == False) &
# (df['male'] == False) &
# (df['female'] == False) &
# (df['nonbinary'] == False) &
# (df['african'] == False) &
# (df['african american'] == False) &
# (df['black'] == False) &
# (df['white'] == False) &
# (df['european'] == False) &
# (df['hispanic'] == False) &
# (df['latino'] == False) &
# (df['latina'] == False) &
# (df['latinx'] == False) &
# (df['mexican'] == False) &
# (df['canadian'] == False) &
# (df['american'] == False) &
# (df['asian'] == False) &
# (df['indian'] == False) &
# (df['middle eastern'] == False) &
# (df['chinese'] == False) &
# (df['japanese'] == False) &
# (df['christian'] == False) &
# (df['muslim'] == False) &
# (df['jewish'] == False) &
# (df['buddhist'] == False) &
# (df['catholic'] == False) &
# (df['protestant'] == False) &
# (df['sikh'] == False) &
# (df['taoist'] == False) &
# (df['old'] == False) &
# (df['older'] == False) &
# (df['young'] == False) &
# (df['younger'] == False) &
# (df['teenage'] == False) &
# (df['millenial'] == False) &
# (df['middle aged'] == False) &
# (df['elderly'] == False) &
# (df['blind'] == False) &
# (df['deaf'] == False) &
# (df['paralyzed'] == False)].index
# reduced_df.drop(reduced_df_indexes, inplace=True)
# print('Reduced len')
# print(len(reduced_df))
# x = reduced_df['TOXICITY'].mean()
# y = reduced_df['TOXICITY'].std()
# # print(reduced_df['TOXICITY'].mean())
# # print(reduced_df['TOXICITY'].std())
# z = x + (2 * y)
# w = x - (2 * y)
# moe_pos = (1.96 * y) / math.sqrt(len(reduced_df))
# moe_neg = x - (1.96 * y) / math.sqrt(len(reduced_df))
# print('95% CI range')
# print(w, z)
# print('Reduced dataset stats')
# print(x, y, moe_pos, moe_neg)
# n_10 = reduced_df.sample(frac=0.1)
# print('10% sample len')
# print(len(n_10))
# # Mean +/- 1.96*SD/SQRT(n)
# mean_10 = n_10['TOXICITY'].mean()
# std_10 = n_10['TOXICITY'].std()
# moe_10_pos = (1.96 * std_10) / math.sqrt(len(n_10))
# moe_10_neg = mean_10 - (1.96 * std_10) / math.sqrt(len(n_10))
# print('10% sample dataset stats')
# print(mean_10, std_10, moe_10_pos, moe_10_neg)
# n_60 = reduced_df.sample(frac=0.6)
# print('60% sample len')
# print(len(n_60))
# # Mean +/- 1.96*SD/SQRT(n)
# mean_60 = n_60['TOXICITY'].mean()
# std_60 = n_60['TOXICITY'].std()
# moe_60_pos = (1.96 * std_60) / math.sqrt(len(n_60))
# moe_60_neg = mean_60 - (1.96 * std_60) / math.sqrt(len(n_60))
# print('60% sample dataset stats')
# print(mean_60, std_60, moe_60_pos, moe_60_neg)
# step_5_df = pd.read_csv('~/Desktop/DM/gaTech/Summer 2022/AI Ethics/step5.csv')
# print(len(step_5_df))
# x = step_5_df['TOXICITY'].mean()
# y = step_5_df['TOXICITY'].std()
# # print(reduced_df['TOXICITY'].mean())
# # print(reduced_df['TOXICITY'].std())
# z = x + (2 * y)
# w = x - (2 * y)
# moe = (1.96 * y) / math.sqrt(len(step_5_df))
# # moe_neg = x - (1.96 * y) / math.sqrt(len(step_5_df))
# print('95% CI range')
# print(w, z)
# print('Reduced dataset stats')
# print(x, y, moe)
#
# n_10 = step_5_df.sample(frac=0.1)
# print('10% sample len')
# print(len(n_10))
# # Mean +/- 1.96*SD/SQRT(n)
# mean_10 = n_10['TOXICITY'].mean()
# std_10 = n_10['TOXICITY'].std()
# moe_10 = (1.96 * std_10) / math.sqrt(len(n_10))
# # moe_10_neg = mean_10 - (1.96 * std_10) / math.sqrt(len(n_10))
# print('10% sample dataset stats')
# print(mean_10, std_10, moe_10)
# n_60 = step_5_df.sample(frac=0.6)
# print('60% sample len')
# print(len(n_60))
# # Mean +/- 1.96*SD/SQRT(n)
# mean_60 = n_60['TOXICITY'].mean()
# std_60 = n_60['TOXICITY'].std()
# moe_60 = (1.96 * std_60) / math.sqrt(len(n_60))
# # moe_60_neg = mean_60 - (1.96 * std_60) / math.sqrt(len(n_60))
# print('60% sample dataset stats')
# print(mean_60, std_60, moe_60)
# step_5_df = pd.read_csv('~/Desktop/DM/gaTech/Summer 2022/AI Ethics/male.csv')
# print(len(step_5_df))
# x = step_5_df['TOXICITY'].mean()
# y = step_5_df['TOXICITY'].std()
# # print(reduced_df['TOXICITY'].mean())
# # print(reduced_df['TOXICITY'].std())
# z = x + (2 * y)
# w = x - (2 * y)
# moe = (1.96 * y) / math.sqrt(len(step_5_df))
# # moe_neg = x - (1.96 * y) / math.sqrt(len(step_5_df))
# print('95% CI range')
# print(w, z)
# print('Reduced dataset stats')
# print(x, y, moe)
#
# n_10 = step_5_df.sample(frac=0.1)
# print('10% sample len')
# print(len(n_10))
# # Mean +/- 1.96*SD/SQRT(n)
# mean_10 = n_10['TOXICITY'].mean()
# std_10 = n_10['TOXICITY'].std()
# moe_10 = (1.96 * std_10) / math.sqrt(len(n_10))
# # moe_10_neg = mean_10 - (1.96 * std_10) / math.sqrt(len(n_10))
# print('10% sample dataset stats')
# print(mean_10, std_10, moe_10)
# n_60 = step_5_df.sample(frac=0.6)
# print('60% sample len')
# print(len(n_60))
# # Mean +/- 1.96*SD/SQRT(n)
# mean_60 = n_60['TOXICITY'].mean()
# std_60 = n_60['TOXICITY'].std()
# moe_60 = (1.96 * std_60) / math.sqrt(len(n_60))
# # moe_60_neg = mean_60 - (1.96 * std_60) / math.sqrt(len(n_60))
# print('60% sample dataset stats')
# print(mean_60, std_60, moe_60)
#
# step_5_df = pd.read_csv('~/Desktop/DM/gaTech/Summer 2022/AI Ethics/female.csv')
# print(len(step_5_df))
# x = step_5_df['TOXICITY'].mean()
# y = step_5_df['TOXICITY'].std()
# # print(reduced_df['TOXICITY'].mean())
# # print(reduced_df['TOXICITY'].std())
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help
# z = x + (2 * y)
# w = x - (2 * y)
# moe = (1.96 * y) / math.sqrt(len(step_5_df))
# # moe_neg = x - (1.96 * y) / math.sqrt(len(step_5_df))
# print('95% CI range')
# print(w, z)
# print('Reduced dataset stats')
# print(x, y, moe)
#
# n_10 = step_5_df.sample(frac=0.1)
# print('10% sample len')
# print(len(n_10))
# # Mean +/- 1.96*SD/SQRT(n)
# mean_10 = n_10['TOXICITY'].mean()
# std_10 = n_10['TOXICITY'].std()
# moe_10 = (1.96 * std_10) / math.sqrt(len(n_10))
# # moe_10_neg = mean_10 - (1.96 * std_10) / math.sqrt(len(n_10))
# print('10% sample dataset stats')
# print(mean_10, std_10, moe_10)
# n_60 = step_5_df.sample(frac=0.6)
# print('60% sample len')
# print(len(n_60))
# # Mean +/- 1.96*SD/SQRT(n)
# mean_60 = n_60['TOXICITY'].mean()
# std_60 = n_60['TOXICITY'].std()
# moe_60 = (1.96 * std_60) / math.sqrt(len(n_60))
# # moe_60_neg = mean_60 - (1.96 * std_60) / math.sqrt(len(n_60))
# print('60% sample dataset stats')
# print(mean_60, std_60, moe_60)
#
step_5_df = pd.read_csv('~/Desktop/DM/gaTech/Summer 2022/AI Ethics/other.csv')
print(len(step_5_df))
x = step_5_df['TOXICITY'].mean()
y = step_5_df['TOXICITY'].std()
# print(reduced_df['TOXICITY'].mean())
# print(reduced_df['TOXICITY'].std())
z = x + (2 * y)
w = x - (2 * y)
moe = (1.96 * y) / math.sqrt(len(step_5_df))
# moe_neg = x - (1.96 * y) / math.sqrt(len(step_5_df))
print('95% CI range')
print(w, z)
print('Reduced dataset stats')
print(x, y, moe)
n_10 = step_5_df.sample(frac=0.1)
print('10% sample len')
print(len(n_10))
# Mean +/- 1.96*SD/SQRT(n)
mean_10 = n_10['TOXICITY'].mean()
std_10 = n_10['TOXICITY'].std()
moe_10 = (1.96 * std_10) / math.sqrt(len(n_10))
# moe_10_neg = mean_10 - (1.96 * std_10) / math.sqrt(len(n_10))
print('10% sample dataset stats')
print(mean_10, std_10, moe_10)
n_60 = step_5_df.sample(frac=0.6)
print('60% sample len')
print(len(n_60))
# Mean +/- 1.96*SD/SQRT(n)
mean_60 = n_60['TOXICITY'].mean()
std_60 = n_60['TOXICITY'].std()
moe_60 = (1.96 * std_60) / math.sqrt(len(n_60))
# moe_60_neg = mean_60 - (1.96 * std_60) / math.sqrt(len(n_60))
print('60% sample dataset stats')
print(mean_60, std_60, moe_60)