In [1]:
import numpy as np
import librosa, librosa.display
import tensorflow.keras as keras
from tensorflow.keras.utils import to_categorical
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
import pickle
from sklearn.metrics import accuracy_score

In [2]:
# label is from 1-8, so I added a place holder at position 0
labels_names = ['place holder', 'neutral', 'calm', 'happy', 'sad', 'angry', 'fearful', 'disgust', 'surprise']

In [5]:
def test(X_test, y_test):
    
    # run ensemble learners
    # min-max normalize
    X_test_scaled = preprocessing.MinMaxScaler(feature_range=(0, 1)).fit(X_test.T)
    X_test_scaled = X_test_scaled.transform(X_test.T).T
    
    # compute MFCC
    sr = 44100
    n_mfcc = 13
    n_fft_mfcc = 2048
    hop_length_mfcc = 512

    MFCC_feature_matrix = []
    for i in range(X_test_scaled.shape[0]):
        MFCC_feature_matrix += [librosa.feature.mfcc(X_test_scaled[i], sr = sr, n_mfcc = n_mfcc, n_fft = n_fft_mfcc, hop_length = hop_length_mfcc)]
    MFCC_feature_matrix = np.array(MFCC_feature_matrix)

    # Add a depth of 1 so the data can be used in CNN
    X_test1 = MFCC_feature_matrix[..., np.newaxis]
    
    # compute output softmax
    pred_total = np.zeros((X_test_scaled.shape[0], 9))

    # run each learner and add their softmax output
    for i in range(20):
        model = keras.models.load_model('Ensemble_Learners/model'+str(i)+'.h5')
        pred_class = model.predict(X_test1)

        pred_total += pred_class
        
    # run knn
    # MFCC
    sr = 44100
    n_mfcc = 13
    n_fft_mfcc = 2048
    hop_length_mfcc = 512

    MFCC_feature_matrix = []
    for i in range(X_test.shape[0]):
        MFCC_feature_matrix += [librosa.feature.mfcc(X_test[i], sr = sr, n_mfcc = n_mfcc, n_fft = n_fft_mfcc, hop_length = hop_length_mfcc)]
    MFCC_feature_matrix = np.array(MFCC_feature_matrix)
    MFCC_feature_matrix = np.mean(MFCC_feature_matrix, axis=2)

    # STFT
    n_fft_stft = 4096
    hop_length_stft = 2048

    STFT_feature_matrix = []
    for i in range(X_test.shape[0]):
        STFT_feature_matrix += [np.abs(librosa.core.stft(X_test[i], n_fft = n_fft_stft, hop_length = hop_length_stft))]
    STFT_feature_matrix = np.array(STFT_feature_matrix)
    STFT_feature_matrix = np.mean(STFT_feature_matrix, axis=2)

    FeatureMatrix = np.concatenate((MFCC_feature_matrix, STFT_feature_matrix), axis=1)

    # load knn model and make prediction
    loaded_model = pickle.load(open('model_knn', 'rb'))
    pred_yp_knn_s1 = loaded_model.predict(FeatureMatrix)
    
    # get the top3 choice of ensemble learners
    cnn_1st_ind = pred_total.argsort()[:,-1]
    cnn_2st_ind = pred_total.argsort()[:,-2]
    cnn_3st_ind = pred_total.argsort()[:,-3]

    THRESHOLD = 4

    combined_class_pred = []

    for i in range(len(pred_yp_knn_s1)):
        # if first choice is THRESHOLD times or greater than the second choice, we choose first choice
        if pred_total[i][cnn_1st_ind[i]] > 4*pred_total[i][cnn_2st_ind[i]]:
            combined_class_pred += [cnn_1st_ind[i]]
        else:
            # otherwise, there is not a clear win, so we use knn results to help make final decision
            if pred_yp_knn_s1[i] == cnn_1st_ind[i]:
                combined_class_pred += [cnn_1st_ind[i]]
            elif pred_yp_knn_s1[i] == cnn_2st_ind[i]:
                combined_class_pred += [cnn_2st_ind[i]]
            elif pred_yp_knn_s1[i] == cnn_3st_ind[i]:
                combined_class_pred += [cnn_3st_ind[i]]
            else:
                combined_class_pred += [cnn_1st_ind[i]]

    combined_acc_score = accuracy_score(y_test, combined_class_pred)
    print(combined_acc_score)
    
    # output accuracy and prediction labels
    return combined_acc_score, combined_class_pred

In [6]:
# load data
X_test = np.genfromtxt('X_test.csv', delimiter=',')
y_test = np.genfromtxt('y_test.csv', delimiter=',')

In [7]:
acc_score, pred_label = test(X_test, y_test)

0.8666666666666667
