# Multi-layer Perceptron Classifier

Source: https://towardsdatascience.com/building-a-speech-emotion-recognizer-using-python-4c1c7c89d713

This model optimizes the log-loss function using LBFGS or stochastic gradient descent.

Uses the notebook "Preprocessing" to preprocess the audio and store the results in a json file.

In [23]:
import numpy as np
import matplotlib.pyplot as plt
import os, glob, pickle, json 

from sklearn.metrics import recall_score, precision_score, classification_report, confusion_matrix, plot_confusion_matrix
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

import joblib

from ipynb.fs.full.basemodel import BaseModel

# Gridsearch

In [27]:
class MLP(BaseModel):    
    """
        RECALL - GRIDSEARCH RESULTS
    """
    model = MLPClassifier(hidden_layer_sizes=(200, 300,),
                          activation='logistic',
                          learning_rate='adaptive',
                          max_iter=400,
                          solver='sgd',
                         )
    
    """
        ACCURACY - GRIDSEARCH RESULT
    """
#     model = MLPClassifier(hidden_layer_sizes=(100,),
#                           activation='logistic',
#                           learning_rate='adaptive',
#                           max_iter=400,
#                           solver='adam',
#                          )

    @classmethod
    def train(cls, dataset_name):
        """
            Training method that uses the splitted dataset (train, test and validate)
        """
        # train dataset
        train_dataset = super().read_dataset(dataset_type='train', dataset_name=dataset_name)
        
        # test dataset
        test_dataset = super().read_dataset(dataset_type='test', dataset_name=dataset_name)
        
        # Original data training
        X_train = train_dataset['OriginalData']['X']
        y_train = train_dataset['OriginalData']['y']
        X_test = test_dataset['OriginalData']['X']
        y_test = test_dataset['OriginalData']['y']

        cls.model.fit(X_train, y_train)

        print(f"\nORIGINAL MODEl: {dataset_name}")
        super().model_accuracy(cls.model, X_train, X_test, y_train, y_test)

        # Augmented data training
        X_train = train_dataset['Augmented']['X']
        y_train = train_dataset['Augmented']['y']
        X_test = test_dataset['Augmented']['X']
        y_test = test_dataset['Augmented']['y']

        cls.model.fit(X_train, y_train)

        print(f"\nAUGMENTED MODEl: {dataset_name}")
        super().model_accuracy(cls.model, X_train, X_test, y_train, y_test)
    
    @classmethod
    def grid_search(cls, dataset_name, is_augmented=False):
        # train dataset
        train_dataset = super().read_dataset(dataset_type='train', dataset_name=dataset_name)
    
        X_train, y_train = [], []
        
        if is_augmented:
            X_train = train_dataset['OriginalData']['X']
            y_train = train_dataset['OriginalData']['y']
        else:
            X_train = train_dataset['Augmented']['X']
            y_train = train_dataset['Augmented']['y']
        
        # GridSearchCV Train accuracy
        param_grid = [
            {
                'activation' : ['logistic', 'tanh', 'relu'],
                'solver' : ['sgd', 'adam'],
                'hidden_layer_sizes': [
                     (100,), (200,), (300,)
                 ],
                'learning_rate': ['adaptive'],
                'max_iter': [400, 500, 600, 700],
            }
        ]
        
        clf = GridSearchCV(MLPClassifier(), param_grid, cv=3, scoring='recall')
        clf.fit(X_train, y_train)

        print("Best parameters set found on development set:")
        print(clf.best_params_)
        print(clf.best_estimator_)
        
        return clf

In [None]:
MLP.train("ravdess")

## GridSearch CV

### Use only for testing new models! This takes A LOT of time.

In [None]:
# TODO: remove 'lbfgs' and 'identity'
param_grid = [
        {
            'activation' : ['logistic', 'tanh', 'relu'],
            'solver' : ['sgd', 'adam'],
            'hidden_layer_sizes': [
                 (200,)
             ],
            'learning_rate': ['adaptive'],
            'max_iter': [400, 500, 600],
#             'warm_start': [True, False]
        }
       ]

In [None]:
# --- In Testing ---
clf = GridSearchCV(MLPClassifier(), param_grid, cv=3, scoring='accuracy')
clf.fit(X_train, y_train)

print("Best parameters set found on development set:")
print(clf.best_params_)
print(clf.best_estimator_)

### Accuracy of GridSearchCV

In [None]:
# GridSearchCV Train accuracy
y_pred = clf.best_estimator_.predict(X_train)
accuracy = accuracy_score(y_train, y_pred)
print("Train accuracy is: {}".format(accuracy))

# GridSearchCV Test accuracy
y_pred = clf.best_estimator_.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Test accuracy is: {}".format(accuracy))

# _Optional_ Store model thourgh Joblib

In [None]:
# Store model
joblib.dump(model, MODEL_FILENAME)

In [None]:
# Load model
joblib_model = joblib.load(MODEL_FILENAME)

# Train accuracy
y_pred = joblib_model.predict(X_train)
accuracy = accuracy_score(y_train, y_pred)
print("Train accuracy is: {}".format(accuracy))

# Test accuracy
y_pred = joblib_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Test accuracy is: {}".format(accuracy))

## Old Cross-Validation

In [None]:
n_fold = 5
kf = KFold(n_splits=n_fold, shuffle=True)
acc_test_score = []
acc_train_score = []

for train_index, test_index in kf.split(x):
    X_train , X_test = x[train_index,:], x[test_index,:]
    y_train , y_test = y[train_index] , y[test_index]
     
    model.fit(X_train,y_train)
    pred_test_values = model.predict(X_test)    
    pred_train_values = model.predict(X_train)

    # Test values
    acc_test = accuracy_score(pred_test_values , y_test)
    acc_test_score.append(acc_test)
    
    # Train values
    acc_train = accuracy_score(pred_train_values , y_train)
    acc_train_score.append(acc_train)
     
avg_test_acc_score = sum(acc_test_score)/n_fold
avg_train_acc_score = sum(acc_train_score)/n_fold

In [None]:
print('Train accuracy of each fold - {}'.format(acc_train_score))
print('Avg train accuracy : {}'.format(avg_train_acc_score))

print("\n")

print('Test accuracy of each fold - {}'.format(acc_test_score))
print('Avg test accuracy : {}'.format(avg_test_acc_score))

In [None]:
def plot_accuracy(train, test):
    # create train accuracy sublpot
    plt.plot(train, label="train accuracy")
    plt.plot(test, label="test accuracy")
    plt.ylabel("Accuracy")
    plt.xlabel("Amount of times")
    plt.legend(loc="lower right")
    plt.title("Accuracy")

    plt.show()

In [None]:
plot_accuracy(acc_train_score, acc_test_score)