In [7]:
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn import neural_network
from sklearn.metrics import accuracy_score

# Utils

In [8]:
def clean_data(review):
    if isinstance(review, str):
        no_punc = re.sub(r'[^\w\s]', '', review)
        return ''.join([i for i in no_punc if not i.isdigit()])
    return ''

def read_dataset():
    df = pd.read_csv("./data/emotions_labeled.csv")
    return df

emotions_label = {
    0: 'sadness',
    1: 'joy',
    2: 'love', 
    3: 'anger', 
    4: 'fear',
    5: 'surprise'
}

# Train test split

In [20]:
vectorizer = TfidfVectorizer(max_features=500)
dataset = read_dataset()

x_train, x_test, y_train, y_test = train_test_split(dataset['text'], dataset['label'],  test_size=0.2, random_state=21)

x_train = vectorizer.fit_transform(x_train)
x_test = vectorizer.transform(x_test)

# Train model

In [33]:
classifier = neural_network.MLPClassifier(hidden_layer_sizes=(64,), max_iter=50, early_stopping=True, n_iter_no_change=5, validation_fraction=.1, random_state=42)
classifier.fit(x_train, y_train)

# Test model

In [34]:
y_predict = classifier.predict(x_test)

print(f"Accuracy: {accuracy_score(y_test, y_predict)}\n")
conf_matrix = confusion_matrix(y_test, y_predict)
emotions = [emotions_label[i] for i in sorted(emotions_label)]
conf_df = pd.DataFrame(conf_matrix, index=emotions, columns=emotions)
print(conf_df)

Accuracy: 0.5706197068208536

          sadness    joy  love  anger  fear  surprise
sadness     16511   5616   196    993   721        61
joy          5822  20514   879    479   388       128
love         1478   3140  2131    184   101        11
anger        4180   2541    87   3801   647        23
fear         3081   2010    42    426  3880       253
surprise      633    867    13     57   737       731


# ANN32 -> ANN64

| Model | Accuracy |
| ----- | -------- |
| ANN32 | **0.56** |
| ANN64 | **0.57** |


---

| True \ Pred  | Sadness | Joy    | Love  | Anger | Fear  | Surprise |
| ------------ | ------- | ------ | ----- | ----- | ----- | -------- |
| **Sadness**  | 16,236  | 5,994  | 223   | 932   | 607   | 106      |
| **Joy**      | 5,812   | 20,390 | 1,076 | 402   | 331   | 199      |
| **Love**     | 1,484   | 2,942  | 2,417 | 129   | 59    | 14       |
| **Anger**    | 4,100   | 2,743  | 124   | 3,758 | 518   | 36       |
| **Fear**     | 3,069   | 2,190  | 55    | 386   | 3,524 | 468      |
| **Surprise** | 602     | 820    | 13    | 54    | 494   | 1,055    |

---

| True \ Pred  | Sadness | Joy    | Love  | Anger | Fear  | Surprise |
| ------------ | ------- | ------ | ----- | ----- | ----- | -------- |
| **Sadness**  | 16,511  | 5,616  | 196   | 993   | 721   | 61       |
| **Joy**      | 5,822   | 20,514 | 879   | 479   | 388   | 128      |
| **Love**     | 1,478   | 3,140  | 2,131 | 184   | 101   | 11       |
| **Anger**    | 4,180   | 2,541  | 87    | 3,801 | 647   | 23       |
| **Fear**     | 3,081   | 2,010  | 42    | 426   | 3,880 | 253      |
| **Surprise** | 633     | 867    | 13    | 57    | 737   | 731      |





# Given the text, the model predicted
By choosing a bike over a car, I’m reducing my environmental footprint. Cycling promotes eco-friendly transportation, and I’m proud to be part of that movement..

In [35]:
phrase = 'By choosing a bike over a car, I’m reducing my environmental footprint. Cycling promotes eco-friendly transportation, and I’m proud to be part of that movement..'

new_data = pd.DataFrame({'text': [phrase]})
new_data['text'] = new_data['text'].apply(clean_data)

x_new = vectorizer.transform(new_data['text'])

predictions = classifier.predict(x_new)

print("Predicted emotion: ", emotions[predictions[0]])

Predicted emotion:  joy
