Digit_recog

py

School

Pennsylvania State University *

*We aren’t endorsed by this school

Course

556

Subject

Economics

Date

Jan 9, 2024

Type

py

Pages

2

Uploaded by MateNightingalePerson923

Report
import numpy as np from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score # Function to load data from file def load_data(file_path): with open(file_path, 'r') as file: data = np.array([[float(x) for x in line.split()] for line in file]) return data # Load data from each feature file feature_files = [ 'mfeat-fou', 'mfeat-fac', 'mfeat-kar', 'mfeat-pix', 'mfeat-zer', 'mfeat-mor' ] feature_data = [load_data(file) for file in feature_files] # Initialize arrays to store predictions ensemble_predictions = [] individual_accuracies = [] # Splitting data and evaluating each feature set for feature_set in feature_data: # Splitting data into training and test sets train_data, test_data, train_labels, test_labels = train_test_split( feature_set, np.repeat(np.arange(10), 200), test_size=0.5, train_size=0.5, random_state=10 ) print(test_labels) # Individual classifier for each feature set clf = RandomForestClassifier(n_estimators=10) clf.fit(train_data, train_labels) predictions = clf.predict(test_data) individual_accuracy = accuracy_score(test_labels, predictions) individual_accuracies.append(individual_accuracy) # Store predictions for ensemble ensemble_predictions.append(predictions) # Combine features into a single dataset combined_data = np.concatenate(feature_data, axis=1) # Training a combined classifier using all features train_data, test_data, train_labels, test_labels = train_test_split( combined_data, np.repeat(np.arange(10), 200), test_size=1000, train_size=1000, random_state=10 ) clf_combined = RandomForestClassifier(n_estimators=10) clf_combined.fit(train_data, train_labels) predictions_combined = clf_combined.predict(test_data) accuracy_combined = accuracy_score(test_labels, predictions_combined) # Combine predictions by majority voting for the ensemble classifier ensemble_predictions = np.array(ensemble_predictions).T print(ensemble_predictions) majority_predictions = np.apply_along_axis(lambda x: np.bincount(x).argmax(),
axis=1, arr=ensemble_predictions) # Calculate accuracy of the ensemble predictions (majority voting) accuracy_majority_voting = accuracy_score(test_labels, majority_predictions) # Print results print("Accuracy of individual classifiers for each feature set:") for i, acc in enumerate(individual_accuracies): print(f"Feature set {i+1}: {acc:.4f}") print(f"\nAccuracy of the combined classifier using all features: {accuracy_combined:.4f}") print(f"\nAccuracy of the ensemble classifier using majority voting: {accuracy_majority_voting:.4f}")
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help