My Python program analyzes data from a CSV file that is all numbers. This program is designed to find the median and standard deviation I'm trying to figure out a part where if the last column of the file contains zeros, it finds where the numbers are in the same row indexes of the other columns. It then calculates the average of these numbers. However, my program calculates them by columns first, not by rows. How could this be fixed? For example, if the last column had a zero and another column had a three in the same row index and another with 4, those numbers would be calculated.
My Python
# Lucas Conklin
# 5772707
import csv
import statistics
def readCSVIntoDictionary(f_name):
data = []
with open(f_name) as f:
reader = csv.reader(f)
for row in reader:
if not data:
for index in range(len(row)):
data.append([])
for index in range(len(row)):
data[index].append(float(row[index]))
f.close()
return data
features = readCSVIntoDictionary("C:\\Users\\lucas\\Downloads\\pima.csv")
row = readCSVIntoDictionary("C:\\Users\\lucas\\Downloads\\pima.csv")
def find_median_and_SD(data, feature):
med = statistics.median(data[feature])
rounded_med = round(med, 4)
st_dev = statistics.stdev(data[feature])
rounded_st_dev = round(st_dev, 5)
return rounded_med, rounded_st_dev
for i in range(0, len(features)):
(median, standard_deviation) = find_median_and_SD(features, i)
print(f'Feature {i} Median: {median} Standard Deviation: {standard_deviation}')
for feature in range(len(features)-1):
valid_data = []
for i in range(len(features[-1])):
current_feat = features[-1][i]
target_feature = 0
if target_feature == current_feat:
valid_data.append(features[feature][i])
print(statistics.median(valid_data))
print(statistics.stdev(valid_data))
Trending now
This is a popular solution!
Step by step
Solved in 3 steps
How could this code work if zero_indices were made into a for loop instead of zero_indices = [i for i in range(len(last_column)) if last_column[i] == 0]
def find_nonzero_indices(col):
nonzero_indices = [];
for i in range(len(col)):
if col[i] != 0 :
nonzero_indices.append(i)
return nonzero_indices
features = readCSVIntoDictionary("C:\\Users\\lucas\\Downloads\\pima.csv")
last_column = features[-1]
zero_indices = [i for i in range(len(last_column)) if last_column[i] == 0]
for i in range(len(features[0])):
if i not in zero_indices:
valid_data = [];
for j in range(len(features[-1]):
valid_data.append(features[j][i])
median, stdev = find_median_and_SD(features, i)
print(f"Feature {i} Median: {median} Standard Deviation: {stdev}")
for zero_index in zero_indices:
valid_data = [];
for j in range(len(features[-1]):
if features[j][zero_index] != 0:
valid_data.append(features[j][zero_index])
if valid_data:
avg = sum(valid_data)/len(valid_data)
print(f"Average of non-zero values in row {zero_index}: {avg}")
else:
print(f"There are no non-zero values in row {zero_index}")
How could this code work without
zero_indices = [i for i in range(len(last_column)) if last_column[i] == 0]
def find_nonzero_indices(col):
nonzero_indices = [];
for i in range(len(col)):
if col[i] != 0 :
nonzero_indices.append(i)
return nonzero_indices
features = readCSVIntoDictionary("C:\\Users\\lucas\\Downloads\\pima.csv")
last_column = features[-1]
zero_indices = [i for i in range(len(last_column)) if last_column[i] == 0]
for i in range(len(features[0])):
if i not in zero_indices:
valid_data = [];
for j in range(len(features[-1]):
valid_data.append(features[j][i])
median, stdev = find_median_and_SD(features, i)
print(f"Feature {i} Median: {median} Standard Deviation: {stdev}")
for zero_index in zero_indices:
valid_data = [];
for j in range(len(features[-1]):
if features[j][zero_index] != 0:
valid_data.append(features[j][zero_index])
if valid_data:
avg = sum(valid_data)/len(valid_data)
print(f"Average of non-zero values in row {zero_index}: {avg}")
else:
print(f"There are no non-zero values in row {zero_index}")
How could this be done without the zip() function or enumerate?