message (2)

txt

School

University Of Arizona *

*We aren’t endorsed by this school

Course

BIOCHEMIST

Subject

Computer Science

Date

Jan 9, 2024

Type

txt

Pages

Uploaded by MasterEnergyDragonfly27

import numpy as np import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score from sklearn.naive_bayes import MultinomialNB from sklearn.ensemble import RandomForestClassifier # Step 1: Load The Data fileName = 'edos_labelled_data.csv'; dataSet = pd.read_csv(fileName); #print(dataSet) print("yo0") # Step 2: Clean the Data # ----- Convert to lowercase dataSet['processed_text'] = dataSet['text'].apply(lambda x: x.lower()); # ---- Removing emojies---- dataSet['processed_text'] = dataSet['processed_text'].apply(remove_emojis) # ---- remove stopwords----- dataSet['processed_text'] = dataSet['processed_text'].apply(remove_stopwords) #----- text lemmatization and stemmin--- dataSet['processed_text'] = dataSet['processed_text'].apply(lemmatize_and_stem) #print(dataSet) # Step 3: Encode Text Features. vectorizer = TfidfVectorizer() tf_idf_matrix = pd.DataFrame(vectorizer.fit_transform(dataSet['processed_text']).toarray(), columns=vectorizer.get_feature_names_out()) count_vectorizer = CountVectorizer() bow_matrix = pd.DataFrame(count_vectorizer.fit_transform(dataSet['processed_text']).toarray(), columns=count_vectorizer.get_feature_names_out()) # Bag of Words count_vectorizer = CountVectorizer() X_bow = count_vectorizer.fit_transform(dataSet['processed_text']) bow_matrix = pd.DataFrame(X_bow.toarray(), columns=count_vectorizer.get_feature_names_out()) #print(tf_idf_matrix) # Separate the dataset using the 'split' column train_data = dataSet[dataSet['split'] == 'train'] test_data = dataSet[dataSet['split'] == 'test'] X_train_tf_idf = vectorizer.transform(train_data['processed_text']) X_test_tf_idf = vectorizer.transform(test_data['processed_text']) X_train_bow = count_vectorizer.transform(train_data['processed_text']) X_test_bow = count_vectorizer.transform(test_data['processed_text']) Y_train = train_data['label'] Y_test = test_data['label'] # Function to train and evaluate models def train_evaluate_model(model, X_train, Y_train, X_test, Y_test): model.fit(X_train, Y_train) y_pred = model.predict(X_test) return accuracy_score(Y_test, y_pred), precision_score(Y_test, y_pred,

average='weighted'), \ recall_score(Y_test, y_pred, average='weighted'), f1_score(Y_test, y_pred, average='weighted') # Model 1: Logistic Regression logistic_model = LogisticRegression() log_acc_tf_idf, log_prec_tf_idf, log_rec_tf_idf, log_f1_tf_idf = train_evaluate_model(logistic_model, X_train_tf_idf, Y_train, X_test_tf_idf, Y_test) log_acc_bow, log_prec_bow, log_rec_bow, log_f1_bow = train_evaluate_model(logistic_model, X_train_bow, Y_train, X_test_bow, Y_test) # Model 2: Naive Bayes naive_bayes_model = MultinomialNB() nb_acc_tf_idf, nb_prec_tf_idf, nb_rec_tf_idf, nb_f1_tf_idf = train_evaluate_model(naive_bayes_model, X_train_tf_idf, Y_train, X_test_tf_idf, Y_test) nb_acc_bow, nb_prec_bow, nb_rec_bow, nb_f1_bow = train_evaluate_model(naive_bayes_model, X_train_bow, Y_train, X_test_bow, Y_test) # Model 3: Ridge (Logistic Regression with L2 regularization) ridge_model = LogisticRegression(penalty='l2') ridge_acc_tf_idf, ridge_prec_tf_idf, ridge_rec_tf_idf, ridge_f1_tf_idf = train_evaluate_model(ridge_model, X_train_tf_idf, Y_train, X_test_tf_idf, Y_test) ridge_acc_bow, ridge_prec_bow, ridge_rec_bow, ridge_f1_bow = train_evaluate_model(ridge_model, X_train_bow, Y_train, X_test_bow, Y_test) # Model 4: Lasso (Logistic Regression with L1 regularization) lasso_model = LogisticRegression(penalty='l1', solver='liblinear') lasso_acc_tf_idf, lasso_prec_tf_idf, lasso_rec_tf_idf, lasso_f1_tf_idf = train_evaluate_model( lasso_model, X_train_tf_idf, Y_train, X_test_tf_idf, Y_test) lasso_acc_bow, lasso_prec_bow, lasso_rec_bow, lasso_f1_bow = train_evaluate_model( lasso_model, X_train_bow, Y_train, X_test_bow, Y_test) # Model 1: Logistic Regression print("") print("Logistic Regression with TF-IDF - Accuracy:", log_acc_tf_idf, "Precision:", log_prec_tf_idf, "Recall:", log_rec_tf_idf, "F1 Score:", log_f1_tf_idf) print("Logistic Regression with BoW - Accuracy:", log_acc_bow, "Precision:", log_prec_bow, "Recall:", log_rec_bow, "F1 Score:", log_f1_bow) # Model 2: Naive Bayes print("") print("Naive Bayes with TF-IDF - Accuracy:", nb_acc_tf_idf, "Precision:", nb_prec_tf_idf, "Recall:", nb_rec_tf_idf, "F1 Score:", nb_f1_tf_idf) print("Naive Bayes with BoW - Accuracy:", nb_acc_bow, "Precision:", nb_prec_bow, "Recall:", nb_rec_bow, "F1 Score:", nb_f1_bow) # Model 3: Ridge (Logistic Regression with L2 regularization) print("") print("Ridge with TF-IDF - Accuracy:", ridge_acc_tf_idf, "Precision:", ridge_prec_tf_idf, "Recall:", ridge_rec_tf_idf, "F1 Score:", ridge_f1_tf_idf) print("Ridge with BoW - Accuracy:", ridge_acc_bow, "Precision:", ridge_prec_bow, "Recall:", ridge_rec_bow, "F1 Score:", ridge_f1_bow) # Model 4: Lasso (Logistic Regression with L1 regularization) print("") print("Lasso with TF-IDF - Accuracy:", lasso_acc_tf_idf, "Precision:",

lasso_prec_tf_idf, "Recall:", lasso_rec_tf_idf, "F1 Score:", lasso_f1_tf_idf) print("Lasso with BoW - Accuracy:", lasso_acc_bow, "Precision:", lasso_prec_bow, "Recall:", lasso_rec_bow, "F1 Score:", lasso_f1_bow)

Your preview ends here

Eager to read complete document? Join bartleby learn and gain access to the full version

Access to all documents
Unlimited textbook solutions
24/7 expert homework help

Related Documents

POLI_toProposal.docx

REL_whenGraph.docx

INT 220 Business Brief Template.docx

CMSC449_A-4.docx

CMSC449_A-3.docx

CMSC449_A-6.docx

Assignment Print View.pdf

blackmamba (1).docx

IMG_4194.png

submission_173827983.pdf

723_HW3_Description.pdf

admn-233-assignment-2.pdf

Recommended textbooks for you

Systems Architecture

Computer Science

ISBN:9781305080195

Author:Stephen D. Burd

Publisher:Cengage Learning

Operations Research : Applications and Algorithms

Computer Science

ISBN:9780534380588

Author:Wayne L. Winston

Publisher:Brooks Cole

Principles of Information Systems (MindTap Course...

Computer Science

ISBN:9781285867168

Author:Ralph Stair, George Reynolds

Publisher:Cengage Learning

Fundamentals of Information Systems

Computer Science

ISBN:9781305082168

Author:Ralph Stair, George Reynolds

Publisher:Cengage Learning

Information Technology Project Management

Computer Science

ISBN:9781337101356

Author:Kathy Schwalbe

Publisher:Cengage Learning

MIS

Computer Science

ISBN:9781337681919

Author:BIDGOLI

Publisher:Cengage

SEE MORE TEXTBOOKS

Recommended textbooks for you

Systems Architecture
Computer Science
ISBN:9781305080195
Author:Stephen D. Burd
Publisher:Cengage Learning
Operations Research : Applications and Algorithms
Computer Science
ISBN:9780534380588
Author:Wayne L. Winston
Publisher:Brooks Cole
Principles of Information Systems (MindTap Course...
Computer Science
ISBN:9781285867168
Author:Ralph Stair, George Reynolds
Publisher:Cengage Learning
Fundamentals of Information Systems
Computer Science
ISBN:9781305082168
Author:Ralph Stair, George Reynolds
Publisher:Cengage Learning
Information Technology Project Management
Computer Science
ISBN:9781337101356
Author:Kathy Schwalbe
Publisher:Cengage Learning
MIS
Computer Science
ISBN:9781337681919
Author:BIDGOLI
Publisher:Cengage