hw7_correct

School

Arizona State University *

*We aren’t endorsed by this school

Course

591PYTHON

Subject

Electrical Engineering

Date

Jan 9, 2024

Type

Pages

Uploaded by UltraBoulderHare16

################################################################################### # # Additional project for EEE591 # # # # The code for the first part has not been changed # # from the previous submission. However, in my previous submission, I used combined# # accuracies, here I am using Test accuracies as mentioned in the problem statement# ################################################################################### # import matplotlib.pyplot as plt from sklearn import datasets import numpy as np # needed for arrays from sklearn.model_selection import train_test_split # splits database from sklearn.preprocessing import StandardScaler # standardize data # the algorithm from sklearn.metrics import accuracy_score # grade the results import pandas as pd from pandas import DataFrame,read_csv import seaborn as sns from sklearn.linear_model import Perceptron # Perceptron Algorithm from sklearn.svm import SVC # SVM Algorithm from sklearn.linear_model import LogisticRegression # Logistic Regression Algorithm from sklearn.ensemble import RandomForestClassifier # Random Forest Algorithm from sklearn.neighbors import KNeighborsClassifier # KNN Algorithm from sklearn.tree import DecisionTreeClassifier # Decision Tree Algorithm from sklearn.tree import export_graphviz from sklearn.model_selection import train_test_split # split the data ################################################################################### ####### # The first part (below) of the code reads the data and # # converts it into aalatable form # # Note : throughout the code, ignore comments which begin with the header Debug Comments # ################################################################################### ####### heartdata = pd.read_csv ("heart1.csv") # read the data into a dataframe X = heartdata [['age','sex','cpt', 'rbp','sc','fbs','rer','mhr','eia','opst','dests','nmvcf','thal' ]] #extracting only the features from the dataframe Y = heartdata [['a1p2' ]] #extracting the classifcation X_array = X.to_numpy() #converting the dataframes to arrays for easy manipulation

Y_array = Y.to_numpy() X_train, X_test, Y_train, Y_test= \ train_test_split(X_array,Y_array,test_size=0.3,random_state=0) #splitting data in train and test # converting Y_train and Y_test from an array of arrays to a singular array Y_test_normal = [] for i in Y_test : Y_test_normal.append(i[0]) Y_test = Y_test_normal Y_train_normal = [] for i in Y_train : Y_train_normal.append(i[0]) Y_train = Y_train_normal ############################################### # Converting above data into standard scalars # ############################################### sc = StandardScaler() # create the standard scalar sc.fit(X_train) # compute the required transformation X_train_std = sc.transform(X_train) # apply to the training data X_test_std = sc.transform(X_test) # and SAME transformation of test data ################################################################################### ########## # The next part of the code runs the extracted data through various machine learning models # # and prints out the "test accuracy", previously I used combined, but now test # ################################################################################### ########## ######################################################### # # # Running the above extracted data through a Perceptron # # # ######################################################### ppn = Perceptron(max_iter=100, tol=1e-3, eta0=0.001, # Increasing max_iter to 100 increased accuracy for me fit_intercept=True, random_state=0, verbose=True) ppn.fit(X_train_std, Y_train) # do the training print ("\n") print ("Results for perceptron are : \n") y_pred_ppn = ppn.predict(X_test_std) # applying on test data X_combined_std = np.vstack((X_train_std, X_test_std)) # vstack puts first array above the second in a vertical stack

y_combined = np.hstack((Y_train, Y_test)) # hstack puts first array to left of the second in a horizontal stack # The below part shows how the combination of test and train data did, but we are not going to use it here for eee591 project # but calculating just for reference y_combined_pred = ppn.predict(X_combined_std) ################################################### # printing test accuracy of perceptron # ################################################### print('Misclassified test samples of perceptron : %d' % \ (Y_test != y_pred_ppn).sum()) print ("Test accuracy of perceptron is : %.2f" % accuracy_score(Y_test, y_pred_ppn),"\n") #################################################### # # # Running data through SVM # # # #################################################### for c_val in [0.9]: # I found c_val = 0.9 yields highest value svm = SVC(kernel='linear', C=c_val, random_state=0) svm.fit(X_train_std, Y_train) # do the training y_pred_svm = svm.predict(X_test_std) # work on the test data # The below part shows how the combination of test and train data did, but we are not going to use it here for eee591 project # but calculating just for reference X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((Y_train, Y_test)) ################################################### # printing test accuracy of SVM # ################################################### print('\nMisclassified test samples of SVM : %d' % \ (Y_test != y_pred_svm).sum()) print ("Test accuracy of SVM is : %.2f" % accuracy_score(Y_test, y_pred_svm),"\n") #################################################################### # # # Running the data through Logistic Regression # # # #################################################################### for c_val in [1]: # c_val = 1 yielded highes combined accuracy for me lr = LogisticRegression(C=c_val, solver='liblinear', \ multi_class='ovr', random_state=0) lr.fit(X_train_std, Y_train) # apply the algorithm to training data # combine the train and test data X_combined_std = np.vstack((X_train_std, X_test_std)) # vstack puts first array

Your preview ends here

Eager to read complete document? Join bartleby learn and gain access to the full version

Access to all documents
Unlimited textbook solutions
24/7 expert homework help

above the second in a vertical stack y_combined = np.hstack((Y_train, Y_test)) # hstack puts first array to left of the second in a horizontal stack print("\n") print("Results for Logistic Regression are : \n") # print('Number in test ', len(Y_test)) y_pred_lr = lr.predict(X_test_std) # apply to test data X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((Y_train, Y_test)) # The below part shows how the combination of test and train data did, but we are not going to use it here for eee591 project # but calculating just for reference y_combined_pred = lr.predict(X_combined_std) ################################################### # printing test accuracy of Logistic Regression # ################################################### print('\nMisclassified test samples of Logistic Regression : %d' % \ (Y_test != y_pred_lr).sum()) print ("Test accuracy of Logistic Regression is : %.2f" % accuracy_score(Y_test, y_pred_lr),"\n") ####################################################### # # # Running the data through Decision Tree # # # ####################################################### tree = DecisionTreeClassifier(criterion='entropy',max_depth=7 ,random_state=0) #changing max depth to 7 yielded better results for me tree.fit(X_train,Y_train) X_combined = np.vstack((X_train, X_test)) y_combined = np.hstack((Y_train, Y_test)) print ("\n") print ("Results for Decision Tree Training are : \n") y_pred_decisiontree = tree.predict (X_test) # The below part shows how the combination of test and train data did, but we are not going to use it here for eee591 project # but calculating just for reference y_combined_pred = tree.predict(X_combined) ################################################### # printing test accuracy of Decision Tree # ################################################### print('\nMisclassified test samples of Decision Tree : %d' % \ (Y_test != y_pred_decisiontree).sum())

print ("Test accuracy of Decision Tree is : %.2f" % accuracy_score(Y_test, y_pred_decisiontree),"\n") ####################################################### # Running the data through Random Forest # ####################################################### for trees in [100]: # Highest accuracy I found was 100 trees forest = RandomForestClassifier(criterion='entropy', n_estimators=trees, \ random_state=1, n_jobs=4) forest.fit(X_train, Y_train) y_pred_randomforest = forest.predict(X_test) # see how we do on the test data # combine the train and test data X_combined = np.vstack((X_train, X_test)) y_combined = np.hstack((Y_train, Y_test)) # print('Number in combined ', len(y_combined)) # The below part shows how the combination of test and train data did, but we are not going to use it here for eee591 project # but calculating just for reference y_combined_pred = forest.predict(X_combined) print ("Results for Random Forest are : \n") ################################################### # printing test accuracy of Random Forest # ################################################### print('\nMisclassified test samples of Random Forest : %d' % \ (Y_test != y_pred_randomforest).sum()) print ("Test accuracy of Random Forest is : %.2f" % accuracy_score(Y_test, y_pred_randomforest),"\n") ####################################### # Running the data through KNN # ####################################### for neighs in [1]: # 1 yielded highest accuracy # Debug comment : print(neighs,'neighbors') knn = KNeighborsClassifier(n_neighbors=neighs,p=2,metric='minkowski') knn.fit(X_train_std,Y_train) # run on the test data and print results and check accuracy y_pred_knn = knn.predict(X_test_std) # combine the train and test data X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((Y_train, Y_test)) # The below part shows how the combination of test and train data did, but we are not going to use it here for eee591 project

# but calculating just for reference y_combined_pred = knn.predict(X_combined_std) print ("Results for KNN are : \n") ################################################### # printing test accuracy of KNN # ################################################### print('\nMisclassified test samples of KNN : %d' % \ (Y_test != y_pred_knn).sum()) print ("Test accuracy of KNN is : %.2f" % accuracy_score(Y_test, y_pred_knn),"\n") ################################################################################### #### # The Final part of the code uses ensemble learning with 3, 4,5,6 models respectively # ################################################################################### #### print ("\nResults of Ensemble Learning are : \n") ####################################################################### # Ensemble Learning using 3 Models Perceptron,SVM,Logistic Regression # ####################################################################### x = (y_pred_ppn + y_pred_svm + y_pred_lr)/3 #combining the predictions of 3 models and divide by 3 y_threemodels = [] #empty list to store final predictions # round away the predictions towards nearest integer # 1.66 gets rounded to 2. Because more models have chosen 2 # Similarly 1.33 gets rounded to 1, because more models have chosen 1 for i in x : z = np.int64( round(i)) y_threemodels.append(z) print ("Test accuracy of ensemble learning using 3 models is : %.2f" % accuracy_score(y_threemodels, Y_test),"\n") ################################################################################### #### # Ensemble Learning using 4 models Perceptron,SVM,Logistic Regression,Decision Tree # ################################################################################### #### x = (y_pred_ppn + y_pred_svm + y_pred_lr + y_pred_decisiontree)/4 y_fourmodels = [] # round away the predictions towards nearest integer ties = 0 # to count no. of ties netween models for i in range(len(x)) : if x[i] == 1.5 : # However if 2 models have chosen 1, two models have chosen 2. Go with Logistic regression model # because individually Logistic Regression has the highest output z = y_pred_lr [i] y_fourmodels.append(z)

Your preview ends here

Eager to read complete document? Join bartleby learn and gain access to the full version

Access to all documents
Unlimited textbook solutions
24/7 expert homework help

ties = ties + 1 else : z = np.int64( round(x[i])) y_fourmodels.append(z) print ("Test accuracy of ensemble learning using 4 models is : %.2f" % accuracy_score(y_fourmodels, Y_test)) print ("no. of ties in ensemble learning using 4 models : ", ties,"\n") ################################################################################### ################## # Ensemble Learning using 5 models Perceptron,SVM,Logistic Regression,Decision Tree,Random Forest # ################################################################################### ################## x = (y_pred_ppn + y_pred_svm + y_pred_lr + y_pred_decisiontree + y_pred_randomforest)/5 y_fivemodels = [] # round away the predictions towards nearest integer for i in range(len(x)) : if x[i] == 1.5 : z = y_pred_lr [i] y_fivemodels.append(z) else : z = np.int64( round(x[i])) y_fivemodels.append(z) print ("Test accuracy of ensemble learning using 5 models is : %.2f" % accuracy_score(y_fivemodels, Y_test),"\n") ################################################################################### ####################### # Ensemble Learning using 6 models Perceptron,SVM,Logistic Regression,Decision Tree,Random Forest, KNN # ################################################################################### ####################### x = (y_pred_ppn + y_pred_svm + y_pred_lr + y_pred_decisiontree + y_pred_randomforest + y_pred_knn)/6 y_sixmodels = [] ties = 0 # to count no. of ties netween models # round away the predictions towards nearest integer # However if 2 models have chosen 1, two models have chosen 2. Go with Logistic regression model # because individually Logistic Regression has the highest output for i in range(len(x)) : if x[i] == 1.5 : z = y_pred_lr [i] y_sixmodels.append(z) ties = ties + 1 else : z = np.int64( round(x[i]))

y_sixmodels.append(z) print ("Test accuracy of ensemble learning using 6 models is : %.2f" % accuracy_score(y_sixmodels, Y_test)) print ("no. of ties in ensemble learning using 6 models : ", ties,"\n")