In [1]:
import librosa

In [3]:
import os
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
import numpy as np
from tqdm import tqdm

In [5]:
path = "./data/"

In [19]:
labels = os.listdir(path)
label_indices = np.arange(0, len(labels))

In [20]:
labels

['angry', 'calm', 'disgust', 'fearful', 'happy', 'neutral', 'sad', 'surprised']

In [21]:
label_indices

array([0, 1, 2, 3, 4, 5, 6, 7])

In [22]:
to_categorical(label_indices)

array([[1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1.]], dtype=float32)

In [24]:
def wav2mfcc(file_path, max_len=11):
    wave, sr = librosa.load(file_path, mono=True, sr=None)
    mfcc = librosa.feature.mfcc(wave, sr=16000)

    # If maximum length exceeds mfcc lengths then pad the remaining ones
    if (max_len > mfcc.shape[1]):
        pad_width = max_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')

    # Else cutoff the remaining parts
    else:
        mfcc = mfcc[:, :max_len]
    
    return mfcc

In [26]:
for label in labels:
    mfcc_vectors = []

    wavfiles = [path + label + '/' + wavfile for wavfile in os.listdir(path + '/' + label)]
    for wavfile in tqdm(wavfiles, "Saving vectors of label - '{}'".format(label)):
        mfcc = wav2mfcc(wavfile, max_len=11)
        mfcc_vectors.append(mfcc)
    np.save(label + '.npy', mfcc_vectors)

Saving vectors of label - 'angry': 100%|███████████████████████████████████████████████| 40/40 [00:06<00:00, 10.86it/s]
Saving vectors of label - 'calm': 100%|████████████████████████████████████████████████| 40/40 [00:03<00:00, 12.08it/s]
Saving vectors of label - 'disgust': 100%|█████████████████████████████████████████████| 40/40 [00:03<00:00, 12.37it/s]
Saving vectors of label - 'fearful': 100%|█████████████████████████████████████████████| 40/40 [00:03<00:00, 14.17it/s]
Saving vectors of label - 'happy': 100%|███████████████████████████████████████████████| 40/40 [00:03<00:00, 12.99it/s]
Saving vectors of label - 'neutral': 100%|█████████████████████████████████████████████| 20/20 [00:01<00:00, 12.31it/s]
Saving vectors of label - 'sad': 100%|█████████████████████████████████████████████████| 40/40 [00:03<00:00, 12.65it/s]
Saving vectors of label - 'surprised': 100%|███████████████████████████████████████████| 40/40 [00:03<00:00, 12.62it/s]


In [40]:
split_ratio=0.6
random_state=42
    
X = np.load(labels[0] + '.npy')
y = np.zeros(X.shape[0], dtype = int)


for i, label in enumerate(labels[1:]):
    x = np.load(label + '.npy')
    X = np.vstack((X, x))
    y = np.append(y, np.full(x.shape[0], fill_value= (i + 1)))

assert X.shape[0] == len(y)


X_train, X_test, y_train, y_test=train_test_split(X, y, test_size= (1 - split_ratio), random_state=random_state, shuffle=True)


In [41]:
X_train

array([[[-8.74629176e+02, -8.72982794e+02, -8.72592381e+02, ...,
         -8.76932359e+02, -8.76834552e+02, -8.74402056e+02],
        [ 2.73635428e+00,  4.43310862e+00,  5.06180390e+00, ...,
          0.00000000e+00,  1.38309896e-01,  3.47560559e+00],
        [ 3.04621724e+00,  4.92454809e+00,  5.22903952e+00, ...,
          0.00000000e+00,  1.38278652e-01,  3.18162524e+00],
        ...,
        [ 2.17407838e+00,  1.65408703e+00,  1.64118550e+00, ...,
          0.00000000e+00,  1.35321163e-01,  2.89158727e+00],
        [ 2.06444737e+00,  1.43994524e+00,  1.55393288e+00, ...,
          0.00000000e+00,  1.34959422e-01,  2.68811315e+00],
        [ 1.67859795e+00,  7.54502202e-01,  7.26899003e-01, ...,
          0.00000000e+00,  1.34577357e-01,  2.33491836e+00]],

       [[-8.07400740e+02, -8.07400740e+02, -8.06950028e+02, ...,
         -8.04941144e+02, -8.05082085e+02, -8.06230287e+02],
        [ 0.00000000e+00,  0.00000000e+00,  6.37354401e-01, ...,
          3.47755507e+00,  3.27817867e

In [42]:
X_test

array([[[-7.08675855e+02, -7.06437731e+02, -7.01128365e+02, ...,
         -7.22149835e+02, -7.30053356e+02, -7.29788760e+02],
        [ 1.29526732e+01,  1.54961244e+01,  1.67360381e+01, ...,
          2.07007500e+01,  2.60217435e+01,  2.28029093e+01],
        [-3.89711069e+01, -3.99064277e+01, -4.37571698e+01, ...,
         -3.42698741e+01, -3.35942095e+01, -3.48558989e+01],
        ...,
        [ 1.05605925e+01,  4.32394658e+00, -4.21822378e+00, ...,
         -1.54344522e+00, -2.83682426e-02, -1.52277343e-01],
        [-3.91026662e+00, -5.48782119e+00, -1.11650906e+01, ...,
         -7.54397262e+00, -1.02764628e+01, -9.87906946e+00],
        [-2.13054195e+00,  2.47208987e+00,  3.57275686e+00, ...,
          2.36269667e+00, -4.56929539e-01,  5.71148675e-01]],

       [[-7.46467642e+02, -7.46467642e+02, -7.46467642e+02, ...,
         -7.46467642e+02, -7.46467642e+02, -7.46467642e+02],
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e

In [43]:
y_train

array([1, 4, 4, 4, 3, 6, 0, 3, 4, 7, 3, 2, 4, 2, 7, 7, 3, 4, 4, 0, 2, 3,
       4, 3, 5, 7, 3, 1, 0, 2, 7, 2, 6, 3, 7, 6, 6, 1, 4, 2, 5, 3, 0, 0,
       0, 3, 7, 6, 4, 1, 2, 5, 6, 1, 2, 6, 3, 0, 7, 3, 5, 0, 7, 7, 2, 6,
       6, 4, 2, 0, 1, 3, 6, 5, 6, 0, 7, 7, 0, 5, 0, 3, 0, 5, 4, 5, 7, 3,
       1, 3, 3, 7, 0, 1, 7, 1, 1, 7, 3, 1, 3, 7, 5, 3, 0, 7, 2, 2, 5, 5,
       0, 6, 7, 6, 4, 0, 1, 2, 7, 6, 7, 5, 4, 1, 5, 3, 4, 6, 6, 0, 2, 1,
       3, 0, 3, 2, 1, 2, 5, 0, 2, 1, 6, 0, 2, 4, 7, 3, 7, 6, 1, 1, 4, 4,
       7, 4, 4, 1, 1, 6, 6, 0, 4, 7, 4, 6, 3, 3, 3, 2, 2, 5, 3, 7, 0, 4,
       1, 2, 7, 2])

In [44]:
y_test

array([5, 7, 3, 0, 6, 6, 4, 2, 0, 4, 6, 1, 5, 1, 4, 6, 7, 5, 3, 4, 1, 2,
       6, 6, 2, 1, 7, 7, 3, 6, 0, 4, 0, 0, 5, 2, 0, 2, 1, 1, 2, 1, 7, 1,
       1, 1, 6, 6, 2, 6, 4, 3, 6, 1, 7, 7, 2, 2, 4, 0, 6, 1, 0, 4, 4, 0,
       0, 6, 1, 4, 2, 3, 6, 4, 3, 7, 2, 2, 1, 7, 4, 3, 0, 3, 1, 2, 2, 1,
       0, 0, 4, 2, 1, 6, 0, 0, 4, 3, 7, 1, 6, 1, 0, 4, 7, 6, 3, 0, 4, 2,
       3, 7, 2, 6, 6, 2, 3, 7, 4, 5])

In [45]:

data = {}
for label in labels:
    data[label] = {}
    data[label]['path'] = [path  + label + '/' + wavfile for wavfile in os.listdir(path + '/' + label)]

    vectors = []

    for wavfile in data[label]['path']:
        wave, sr = librosa.load(wavfile, mono=True, sr=None)
        mfcc = librosa.feature.mfcc(wave, sr=16000)
        vectors.append(mfcc)

    data[label]['mfcc'] = vectors


{'angry': {'mfcc': [array([[-708.35756888, -708.35756888, -708.35756888, ..., -708.35756888,
           -708.35756888, -708.35756888],
          [   0.        ,    0.        ,    0.        , ...,    0.        ,
              0.        ,    0.        ],
          [   0.        ,    0.        ,    0.        , ...,    0.        ,
              0.        ,    0.        ],
          ...,
          [   0.        ,    0.        ,    0.        , ...,    0.        ,
              0.        ,    0.        ],
          [   0.        ,    0.        ,    0.        , ...,    0.        ,
              0.        ,    0.        ],
          [   0.        ,    0.        ,    0.        , ...,    0.        ,
              0.        ,    0.        ]]),
   array([[-7.56025038e+02, -7.55896647e+02, -7.72531252e+02, ...,
           -6.90721126e+02, -6.81817701e+02, -7.01484149e+02],
          [ 3.75269590e+01,  4.04252156e+01,  3.39128461e+01, ...,
            8.84402894e+01,  9.38956241e+01,  9.22302898e+01]

In [46]:
data['angry']

{'mfcc': [array([[-708.35756888, -708.35756888, -708.35756888, ..., -708.35756888,
          -708.35756888, -708.35756888],
         [   0.        ,    0.        ,    0.        , ...,    0.        ,
             0.        ,    0.        ],
         [   0.        ,    0.        ,    0.        , ...,    0.        ,
             0.        ,    0.        ],
         ...,
         [   0.        ,    0.        ,    0.        , ...,    0.        ,
             0.        ,    0.        ],
         [   0.        ,    0.        ,    0.        , ...,    0.        ,
             0.        ,    0.        ],
         [   0.        ,    0.        ,    0.        , ...,    0.        ,
             0.        ,    0.        ]]),
  array([[-7.56025038e+02, -7.55896647e+02, -7.72531252e+02, ...,
          -6.90721126e+02, -6.81817701e+02, -7.01484149e+02],
         [ 3.75269590e+01,  4.04252156e+01,  3.39128461e+01, ...,
           8.84402894e+01,  9.38956241e+01,  9.22302898e+01],
         [ 1.20341822e+0