Digit_recog_Q3

py

School

Pennsylvania State University *

*We aren’t endorsed by this school

Course

556

Subject

Economics

Date

Jan 9, 2024

Type

py

Pages

2

Uploaded by MateNightingalePerson923

Report
import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split from sklearn.impute import SimpleImputer # Function to load data from file def load_data(file_path): with open(file_path, 'r') as file: data = np.array([[float(x) for x in line.split()] for line in file]) return data # Load data from each feature file feature_files = [ 'mfeat-fou', 'mfeat-fac', 'mfeat-kar', 'mfeat-pix', 'mfeat-zer', 'mfeat-mor' ] feature_data = [load_data(file) for file in feature_files] # Concatenating training features from all 6 types train_features = np.concatenate(feature_data, axis=1) # Splitting the data into 1000 training examples and 1000 test examples train_features, test_features, train_labels, test_labels = train_test_split( train_features, np.repeat(np.arange(10), 200), test_size=1000, train_size=1000 ) train_features_known1, train_features_known2 = train_test_split( train_features, test_size=500, train_size=500, shuffle=False ) # Splitting the features for the training set train_features_known1, train_missing_known1 = np.split(train_features_known1, [356], axis=1) train_missing_known2, train_features_known2 = np.split(train_features_known2, [356], axis=1) # Impute missing values only for the identified features in the test set train_missing_known1 = np.full(train_missing_known1.shape, np.nan) train_combined_known1 = np.concatenate((train_features_known1, train_missing_known1), axis=1) train_missing_known2 = np.full(train_missing_known2.shape, np.nan) train_combined_known2 = np.concatenate((train_missing_known2, train_features_known2), axis=1) train_combined = np.vstack((train_combined_known1, train_combined_known2)) # Fit imputer on training data for the identified features imputer = SimpleImputer(missing_values=np.nan, strategy='mean') imputer.fit(train_combined) # Impute missing values in the test set imputed_train_features1 = imputer.transform(train_combined_known1) imputed_train_features2 = imputer.transform(train_combined_known2) # Train the first model on the first 3 known features
model1 = RandomForestClassifier(n_estimators=5) model1.fit(imputed_train_features1, train_labels[:500]) model2 = RandomForestClassifier(n_estimators=5) model2.fit(imputed_train_features2, train_labels[500:1000]) # Evaluate accuracy on the test set predictions1 = model1.predict(test_features) predictions2 = model2.predict(test_features) accuracy1 = accuracy_score(test_labels, predictions1) accuracy2 = accuracy_score(test_labels, predictions2) print(f"The accuracy with model 1: {accuracy1:.4f}") print(f"The accuracy with model 2: {accuracy2:.4f}")
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help