In [1]:
import numpy as np
import librosa, librosa.display
import tensorflow.keras as keras
from tensorflow.keras.utils import to_categorical
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
import pickle
from sklearn.metrics import accuracy_score

In [2]:
# label is from 1-8, so I added a place holder at position 0
labels_names = np.array(['place holder', 'neutral', 'calm', 'happy', 'sad', 'angry', 'fearful', 'disgust', 'surprise'])

In [3]:
def test(X_test, y_test):
    
    # run ensemble learners
    # min-max normalize
    X_test_scaled = preprocessing.MinMaxScaler(feature_range=(0, 1)).fit(X_test.T)
    X_test_scaled = X_test_scaled.transform(X_test.T).T
    
    # compute MFCC
    sr = 44100
    n_mfcc = 13
    n_fft_mfcc = 2048
    hop_length_mfcc = 512

    MFCC_feature_matrix = []
    for i in range(X_test_scaled.shape[0]):
        MFCC_feature_matrix += [librosa.feature.mfcc(X_test_scaled[i], sr = sr, n_mfcc = n_mfcc, n_fft = n_fft_mfcc, hop_length = hop_length_mfcc)]
    MFCC_feature_matrix = np.array(MFCC_feature_matrix)

    # Add a depth of 1 so the data can be used in CNN
    X_test1 = MFCC_feature_matrix[..., np.newaxis]
    
    # compute output softmax
    pred_total = np.zeros((X_test_scaled.shape[0], 9))

    # run each learner and add their softmax output
    for i in range(20):
        model = keras.models.load_model('Ensemble_Learners/model'+str(i)+'.h5')
        pred_class = model.predict(X_test1)

        pred_total += pred_class
        
    # run knn
    # MFCC
    sr = 44100
    n_mfcc = 13
    n_fft_mfcc = 2048
    hop_length_mfcc = 512

    MFCC_feature_matrix = []
    for i in range(X_test.shape[0]):
        MFCC_feature_matrix += [librosa.feature.mfcc(X_test[i], sr = sr, n_mfcc = n_mfcc, n_fft = n_fft_mfcc, hop_length = hop_length_mfcc)]
    MFCC_feature_matrix = np.array(MFCC_feature_matrix)
    MFCC_feature_matrix = np.mean(MFCC_feature_matrix, axis=2)

    # STFT
    n_fft_stft = 4096
    hop_length_stft = 2048

    STFT_feature_matrix = []
    for i in range(X_test.shape[0]):
        STFT_feature_matrix += [np.abs(librosa.core.stft(X_test[i], n_fft = n_fft_stft, hop_length = hop_length_stft))]
    STFT_feature_matrix = np.array(STFT_feature_matrix)
    STFT_feature_matrix = np.mean(STFT_feature_matrix, axis=2)

    FeatureMatrix = np.concatenate((MFCC_feature_matrix, STFT_feature_matrix), axis=1)

    # load knn model and make prediction
    loaded_model = pickle.load(open('model_knn', 'rb'))
    pred_yp_knn_s1 = loaded_model.predict(FeatureMatrix)
    
    # get the top3 choice of ensemble learners
    cnn_1st_ind = pred_total.argsort()[:,-1]
    cnn_2st_ind = pred_total.argsort()[:,-2]
    cnn_3st_ind = pred_total.argsort()[:,-3]

    THRESHOLD = 4

    combined_class_pred = []

    for i in range(len(pred_yp_knn_s1)):
        # if first choice is THRESHOLD times or greater than the second choice, we choose first choice
        if pred_total[i][cnn_1st_ind[i]] > 4*pred_total[i][cnn_2st_ind[i]]:
            combined_class_pred += [cnn_1st_ind[i]]
        else:
            # otherwise, there is not a clear win, so we use knn results to help make final decision
            if pred_yp_knn_s1[i] == cnn_1st_ind[i]:
                combined_class_pred += [cnn_1st_ind[i]]
            elif pred_yp_knn_s1[i] == cnn_2st_ind[i]:
                combined_class_pred += [cnn_2st_ind[i]]
            elif pred_yp_knn_s1[i] == cnn_3st_ind[i]:
                combined_class_pred += [cnn_3st_ind[i]]
            else:
                combined_class_pred += [cnn_1st_ind[i]]

    combined_acc_score = accuracy_score(y_test, combined_class_pred)
    print(combined_acc_score)
    
    # output accuracy and prediction labels
    return combined_acc_score, combined_class_pred

In [4]:
# load data
# you need to change this path with your own directory
path = 'C:/Users/catia/Dropbox (UFL)/Teaching/2020 Fall/EEE 4773 Fundamentals of Machine Learning/GitHub/Final-Project/Code and Reports/Dataset/'

X_train = np.load(path+'data_training.npy')
y_train = np.load(path+'labels_training.npy')

X_test = np.load(path+'data_test.npy')
y_test = np.load(path+'labels_test.npy')

X_hardtest = np.load(path+'data_hardtest.npy',allow_pickle=True).astype('float')
y_hardtest = np.load(path+'labels_hardtest.npy')

In [7]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape, X_hardtest.shape, y_hardtest.shape

((2400, 100000), (2400,), (640, 100000), (640,), (340, 100000), (340,))

In [6]:
acc_score, pred_label = test(X_train, y_train)



0.9775


In [8]:
acc_score, pred_label = test(X_test, y_test)



0.853125


In [17]:
pred_label = np.array(pred_label)
pred_label, labels_names[pred_label]

(array([1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 4, 2, 1, 1, 2, 2, 2,
        1, 1, 1, 4, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 2, 6,
        7, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 6,
        2, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 1, 4, 2, 2, 1, 2, 2, 2,
        2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 6, 8, 8, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 2,
        4, 6, 4, 7, 3, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 4, 6, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 3, 4,
        4, 4, 4, 4, 2, 4, 1, 4, 4, 2, 

In [18]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, pred_label)

array([[68,  8,  2,  2,  0,  0,  0,  0],
       [ 9, 62,  0,  5,  0,  2,  2,  0],
       [ 0,  0, 75,  0,  1,  2,  0,  2],
       [ 1,  4,  3, 68,  0,  3,  1,  0],
       [ 0,  0,  1,  1, 70,  3,  1,  4],
       [ 0,  0,  2,  1,  2, 72,  0,  3],
       [ 0,  2,  1,  0,  5,  2, 69,  1],
       [ 0,  0,  7,  1,  5,  5,  0, 62]], dtype=int64)

In [9]:
acc_score, pred_label = test(X_hardtest, y_hardtest)



0.75


In [38]:
confusion_matrix(y_hardtest, pred_label)

array([[ 0,  0,  1,  0, 10,  0,  1,  8,  0],
       [ 0, 61, 11,  3,  3,  1,  1,  0,  0],
       [ 0,  8, 60,  1,  6,  0,  2,  3,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  1,  2,  2, 72,  0,  3],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  7,  2,  3,  6,  0, 62]], dtype=int64)