hw7_correct

py

School

Arizona State University *

*We aren’t endorsed by this school

Course

591PYTHON

Subject

Electrical Engineering

Date

Jan 9, 2024

Type

py

Pages

8

Uploaded by UltraBoulderHare16

Report
################################################################################### # # Additional project for EEE591 # # # # The code for the first part has not been changed # # from the previous submission. However, in my previous submission, I used combined# # accuracies, here I am using Test accuracies as mentioned in the problem statement# ################################################################################### # import matplotlib.pyplot as plt from sklearn import datasets import numpy as np # needed for arrays from sklearn.model_selection import train_test_split # splits database from sklearn.preprocessing import StandardScaler # standardize data # the algorithm from sklearn.metrics import accuracy_score # grade the results import pandas as pd from pandas import DataFrame,read_csv import seaborn as sns from sklearn.linear_model import Perceptron # Perceptron Algorithm from sklearn.svm import SVC # SVM Algorithm from sklearn.linear_model import LogisticRegression # Logistic Regression Algorithm from sklearn.ensemble import RandomForestClassifier # Random Forest Algorithm from sklearn.neighbors import KNeighborsClassifier # KNN Algorithm from sklearn.tree import DecisionTreeClassifier # Decision Tree Algorithm from sklearn.tree import export_graphviz from sklearn.model_selection import train_test_split # split the data ################################################################################### ####### # The first part (below) of the code reads the data and # # converts it into aalatable form # # Note : throughout the code, ignore comments which begin with the header Debug Comments # ################################################################################### ####### heartdata = pd.read_csv ("heart1.csv") # read the data into a dataframe X = heartdata [['age','sex','cpt', 'rbp','sc','fbs','rer','mhr','eia','opst','dests','nmvcf','thal' ]] #extracting only the features from the dataframe Y = heartdata [['a1p2' ]] #extracting the classifcation X_array = X.to_numpy() #converting the dataframes to arrays for easy manipulation
Y_array = Y.to_numpy() X_train, X_test, Y_train, Y_test= \ train_test_split(X_array,Y_array,test_size=0.3,random_state=0) #splitting data in train and test # converting Y_train and Y_test from an array of arrays to a singular array Y_test_normal = [] for i in Y_test : Y_test_normal.append(i[0]) Y_test = Y_test_normal Y_train_normal = [] for i in Y_train : Y_train_normal.append(i[0]) Y_train = Y_train_normal ############################################### # Converting above data into standard scalars # ############################################### sc = StandardScaler() # create the standard scalar sc.fit(X_train) # compute the required transformation X_train_std = sc.transform(X_train) # apply to the training data X_test_std = sc.transform(X_test) # and SAME transformation of test data ################################################################################### ########## # The next part of the code runs the extracted data through various machine learning models # # and prints out the "test accuracy", previously I used combined, but now test # ################################################################################### ########## ######################################################### # # # Running the above extracted data through a Perceptron # # # ######################################################### ppn = Perceptron(max_iter=100, tol=1e-3, eta0=0.001, # Increasing max_iter to 100 increased accuracy for me fit_intercept=True, random_state=0, verbose=True) ppn.fit(X_train_std, Y_train) # do the training print ("\n") print ("Results for perceptron are : \n") y_pred_ppn = ppn.predict(X_test_std) # applying on test data X_combined_std = np.vstack((X_train_std, X_test_std)) # vstack puts first array above the second in a vertical stack
y_combined = np.hstack((Y_train, Y_test)) # hstack puts first array to left of the second in a horizontal stack # The below part shows how the combination of test and train data did, but we are not going to use it here for eee591 project # but calculating just for reference y_combined_pred = ppn.predict(X_combined_std) ################################################### # printing test accuracy of perceptron # ################################################### print('Misclassified test samples of perceptron : %d' % \ (Y_test != y_pred_ppn).sum()) print ("Test accuracy of perceptron is : %.2f" % accuracy_score(Y_test, y_pred_ppn),"\n") #################################################### # # # Running data through SVM # # # #################################################### for c_val in [0.9]: # I found c_val = 0.9 yields highest value svm = SVC(kernel='linear', C=c_val, random_state=0) svm.fit(X_train_std, Y_train) # do the training y_pred_svm = svm.predict(X_test_std) # work on the test data # The below part shows how the combination of test and train data did, but we are not going to use it here for eee591 project # but calculating just for reference X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((Y_train, Y_test)) ################################################### # printing test accuracy of SVM # ################################################### print('\nMisclassified test samples of SVM : %d' % \ (Y_test != y_pred_svm).sum()) print ("Test accuracy of SVM is : %.2f" % accuracy_score(Y_test, y_pred_svm),"\n") #################################################################### # # # Running the data through Logistic Regression # # # #################################################################### for c_val in [1]: # c_val = 1 yielded highes combined accuracy for me lr = LogisticRegression(C=c_val, solver='liblinear', \ multi_class='ovr', random_state=0) lr.fit(X_train_std, Y_train) # apply the algorithm to training data # combine the train and test data X_combined_std = np.vstack((X_train_std, X_test_std)) # vstack puts first array
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
above the second in a vertical stack y_combined = np.hstack((Y_train, Y_test)) # hstack puts first array to left of the second in a horizontal stack print("\n") print("Results for Logistic Regression are : \n") # print('Number in test ', len(Y_test)) y_pred_lr = lr.predict(X_test_std) # apply to test data X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((Y_train, Y_test)) # The below part shows how the combination of test and train data did, but we are not going to use it here for eee591 project # but calculating just for reference y_combined_pred = lr.predict(X_combined_std) ################################################### # printing test accuracy of Logistic Regression # ################################################### print('\nMisclassified test samples of Logistic Regression : %d' % \ (Y_test != y_pred_lr).sum()) print ("Test accuracy of Logistic Regression is : %.2f" % accuracy_score(Y_test, y_pred_lr),"\n") ####################################################### # # # Running the data through Decision Tree # # # ####################################################### tree = DecisionTreeClassifier(criterion='entropy',max_depth=7 ,random_state=0) #changing max depth to 7 yielded better results for me tree.fit(X_train,Y_train) X_combined = np.vstack((X_train, X_test)) y_combined = np.hstack((Y_train, Y_test)) print ("\n") print ("Results for Decision Tree Training are : \n") y_pred_decisiontree = tree.predict (X_test) # The below part shows how the combination of test and train data did, but we are not going to use it here for eee591 project # but calculating just for reference y_combined_pred = tree.predict(X_combined) ################################################### # printing test accuracy of Decision Tree # ################################################### print('\nMisclassified test samples of Decision Tree : %d' % \ (Y_test != y_pred_decisiontree).sum())
print ("Test accuracy of Decision Tree is : %.2f" % accuracy_score(Y_test, y_pred_decisiontree),"\n") ####################################################### # Running the data through Random Forest # ####################################################### for trees in [100]: # Highest accuracy I found was 100 trees forest = RandomForestClassifier(criterion='entropy', n_estimators=trees, \ random_state=1, n_jobs=4) forest.fit(X_train, Y_train) y_pred_randomforest = forest.predict(X_test) # see how we do on the test data # combine the train and test data X_combined = np.vstack((X_train, X_test)) y_combined = np.hstack((Y_train, Y_test)) # print('Number in combined ', len(y_combined)) # The below part shows how the combination of test and train data did, but we are not going to use it here for eee591 project # but calculating just for reference y_combined_pred = forest.predict(X_combined) print ("Results for Random Forest are : \n") ################################################### # printing test accuracy of Random Forest # ################################################### print('\nMisclassified test samples of Random Forest : %d' % \ (Y_test != y_pred_randomforest).sum()) print ("Test accuracy of Random Forest is : %.2f" % accuracy_score(Y_test, y_pred_randomforest),"\n") ####################################### # Running the data through KNN # ####################################### for neighs in [1]: # 1 yielded highest accuracy # Debug comment : print(neighs,'neighbors') knn = KNeighborsClassifier(n_neighbors=neighs,p=2,metric='minkowski') knn.fit(X_train_std,Y_train) # run on the test data and print results and check accuracy y_pred_knn = knn.predict(X_test_std) # combine the train and test data X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((Y_train, Y_test)) # The below part shows how the combination of test and train data did, but we are not going to use it here for eee591 project
# but calculating just for reference y_combined_pred = knn.predict(X_combined_std) print ("Results for KNN are : \n") ################################################### # printing test accuracy of KNN # ################################################### print('\nMisclassified test samples of KNN : %d' % \ (Y_test != y_pred_knn).sum()) print ("Test accuracy of KNN is : %.2f" % accuracy_score(Y_test, y_pred_knn),"\n") ################################################################################### #### # The Final part of the code uses ensemble learning with 3, 4,5,6 models respectively # ################################################################################### #### print ("\nResults of Ensemble Learning are : \n") ####################################################################### # Ensemble Learning using 3 Models Perceptron,SVM,Logistic Regression # ####################################################################### x = (y_pred_ppn + y_pred_svm + y_pred_lr)/3 #combining the predictions of 3 models and divide by 3 y_threemodels = [] #empty list to store final predictions # round away the predictions towards nearest integer # 1.66 gets rounded to 2. Because more models have chosen 2 # Similarly 1.33 gets rounded to 1, because more models have chosen 1 for i in x : z = np.int64( round(i)) y_threemodels.append(z) print ("Test accuracy of ensemble learning using 3 models is : %.2f" % accuracy_score(y_threemodels, Y_test),"\n") ################################################################################### #### # Ensemble Learning using 4 models Perceptron,SVM,Logistic Regression,Decision Tree # ################################################################################### #### x = (y_pred_ppn + y_pred_svm + y_pred_lr + y_pred_decisiontree)/4 y_fourmodels = [] # round away the predictions towards nearest integer ties = 0 # to count no. of ties netween models for i in range(len(x)) : if x[i] == 1.5 : # However if 2 models have chosen 1, two models have chosen 2. Go with Logistic regression model # because individually Logistic Regression has the highest output z = y_pred_lr [i] y_fourmodels.append(z)
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
ties = ties + 1 else : z = np.int64( round(x[i])) y_fourmodels.append(z) print ("Test accuracy of ensemble learning using 4 models is : %.2f" % accuracy_score(y_fourmodels, Y_test)) print ("no. of ties in ensemble learning using 4 models : ", ties,"\n") ################################################################################### ################## # Ensemble Learning using 5 models Perceptron,SVM,Logistic Regression,Decision Tree,Random Forest # ################################################################################### ################## x = (y_pred_ppn + y_pred_svm + y_pred_lr + y_pred_decisiontree + y_pred_randomforest)/5 y_fivemodels = [] # round away the predictions towards nearest integer for i in range(len(x)) : if x[i] == 1.5 : z = y_pred_lr [i] y_fivemodels.append(z) else : z = np.int64( round(x[i])) y_fivemodels.append(z) print ("Test accuracy of ensemble learning using 5 models is : %.2f" % accuracy_score(y_fivemodels, Y_test),"\n") ################################################################################### ####################### # Ensemble Learning using 6 models Perceptron,SVM,Logistic Regression,Decision Tree,Random Forest, KNN # ################################################################################### ####################### x = (y_pred_ppn + y_pred_svm + y_pred_lr + y_pred_decisiontree + y_pred_randomforest + y_pred_knn)/6 y_sixmodels = [] ties = 0 # to count no. of ties netween models # round away the predictions towards nearest integer # However if 2 models have chosen 1, two models have chosen 2. Go with Logistic regression model # because individually Logistic Regression has the highest output for i in range(len(x)) : if x[i] == 1.5 : z = y_pred_lr [i] y_sixmodels.append(z) ties = ties + 1 else : z = np.int64( round(x[i]))
y_sixmodels.append(z) print ("Test accuracy of ensemble learning using 6 models is : %.2f" % accuracy_score(y_sixmodels, Y_test)) print ("no. of ties in ensemble learning using 6 models : ", ties,"\n")