In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

# Load the dataset
data = pd.read_csv('/content/data_1.csv')
text_column_index = 0  # Specify the index of the column containing loan scenario descriptions
label_column_index = 1  # Specify the index of the column containing the sentiment labels

# Split the dataset into features and labels
X = data.iloc[:, text_column_index]
y = data.iloc[:, label_column_index]

# Encode the target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
num_classes = len(label_encoder.classes_)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Vectorize the text data
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train and evaluate Naive Bayes classifier
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train_vec, y_train)
nb_scores = cross_val_score(nb_classifier, X_train_vec, y_train, cv=5)
nb_accuracy = nb_scores.mean()
nb_predictions = nb_classifier.predict(X_test_vec)
nb_precision = precision_score(y_test, nb_predictions, average='macro')
nb_recall = recall_score(y_test, nb_predictions, average='macro')
print("Naive Bayes Accuracy:", nb_accuracy)
print("Naive Bayes Precision:", nb_precision)
print("Naive Bayes Recall:", nb_recall)

# Train and evaluate Logistic Regression classifier
lr_classifier = LogisticRegression(max_iter=1000)
lr_classifier.fit(X_train_vec, y_train)
lr_scores = cross_val_score(lr_classifier, X_train_vec, y_train, cv=5)
lr_accuracy = lr_scores.mean()
lr_predictions = lr_classifier.predict(X_test_vec)
lr_precision = precision_score(y_test, lr_predictions, average='macro')
lr_recall = recall_score(y_test, lr_predictions, average='macro')
print("Logistic Regression Accuracy:", lr_accuracy)
print("Logistic Regression Precision:", lr_precision)
print("Logistic Regression Recall:", lr_recall)

# Train and evaluate Random Forest classifier
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train_vec, y_train)
rf_scores = cross_val_score(rf_classifier, X_train_vec, y_train, cv=5)
rf_accuracy = rf_scores.mean()
rf_predictions = rf_classifier.predict(X_test_vec)
rf_precision = precision_score(y_test, rf_predictions, average='macro')
rf_recall = recall_score(y_test, rf_predictions, average='macro')
print("Random Forest Accuracy:", rf_accuracy)
print("Random Forest Precision:", rf_precision)
print("Random Forest Recall:", rf_recall)
