In [6]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.metrics import accuracy_score

# Read dataset
temp1 = pd.read_csv('egmo_combined.csv')
temp2 = pd.read_csv('imo_combined.csv')
df = pd.concat([temp1, temp2], axis=0)
df.reset_index(drop=True, inplace=True)

df = df.sample(frac=1)
df.reset_index(drop=True, inplace=True)

df.shape

In [None]:
X, y = df["post_canonical"], df["score"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.02, random_state=42)

# Creating a pipeline with TF-IDF vectorizer and MLP classifier
clf = make_pipeline(
    TfidfVectorizer(),
    MLPRegressor(
        hidden_layer_sizes=(300,), 
        activation="relu",
        solver="adam",
        alpha=0.0001,
        learning_rate_init=0.0001,
        max_iter=1000, 
        random_state=42)
)

# Training the classifier
clf.fit(X_train, y_train)

# Predicting labels for the test set
y_pred = clf.predict(X_test)

# Evaluating the model
# print(classification_report(y_test, y_pred))


In [17]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("MAE:", mae)
print("MSE:", mse)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

percentage_performance = 100 - (mse / (sum(y_test) / len(y_test))) * 10
print("\nPercentage of Regression Performance:", percentage_performance, "%")

MAE: 2.6435426444337145
MSE: 10.605275256525001
Root Mean Squared Error: 3.2565741595309943
R-squared: -0.7990747175265558

Percentage of Regression Performance: 77.90486935887131 %


In [23]:
print(f"{list(y_test)}\n{list(y_pred)}")

[4.0625, 7.012987012987013, 2.21483942414175, 1.5102040816326523, 7.894345238095238, 0.8333333333333337, 6.555023923444976, 7.405741957800069, 5.706666666666667, 4.802604802604803]
[5.0220357547388295, 5.289297656301426, 7.955672357085305, 3.0113522190867505, 4.858094436160839, 6.957863219149323, 3.17744314262454, 6.332957679684216, 8.471576569636623, 4.93676941546324]
