In [13]:
import json
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPooling2D, BatchNormalization, Dropout
import tensorflow.keras as keras
import numpy as np

In [14]:
JSON_PATH = "./data/mfcc_data_16sec_nfft255_nmfcc11.json"

with open(JSON_PATH, "r") as fp:
    data_l = json.load(fp)

In [3]:
def prepare_datasets(test_size, validation_size):
    """Loads data and splits it into train, validation and test sets.
    :param test_size (float): Value in [0, 1] indicating percentage of data set to allocate to test split
    :param validation_size (float): Value in [0, 1] indicating percentage of train set to allocate to validation split
    :return X_train (ndarray): Input training set
    :return X_validation (ndarray): Input validation set
    :return X_test (ndarray): Input test set
    :return y_train (ndarray): Target training set
    :return y_validation (ndarray): Target validation set
    :return y_test (ndarray): Target test set
    """

    # load data
    X = np.array(data_l["mfcc"])
    y = np.array(data_l["labels"])

    # create train, validation and test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=validation_size)

    # add an axis to input sets
    X_train = X_train[..., np.newaxis]
    X_val = X_val[..., np.newaxis]
    X_test = X_test[..., np.newaxis]

    return X_train, X_val, X_test, y_train, y_val, y_test

In [4]:
X_train, X_val, X_test, y_train, y_val, y_test = prepare_datasets(0.25, 0.2)

In [5]:
X_train.shape, X_val.shape, X_test.shape

((84, 2757, 11, 1), (21, 2757, 11, 1), (35, 2757, 11, 1))

In [6]:
y_train.shape, y_val.shape, y_test.shape

((84,), (21,), (35,))

In [7]:
input_shape = X_train.shape[1:]
input_shape

(2757, 11, 1)

In [20]:
NUM_LABELS = 4

model = Sequential()

# 1st conv layer
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D((3, 3), strides=(2, 2), padding='same'))
model.add(BatchNormalization())

# 2nd conv layer
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D((3, 3), strides=(2, 2), padding='same'))
model.add(BatchNormalization())

# 3rd conv layer
model.add(Conv2D(32, (2, 2), activation='relu'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), padding='same'))
model.add(BatchNormalization())

# flatten output and feed it into dense layer
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))

# output layer
model.add(Dense(NUM_LABELS, activation='softmax'))

In [21]:
# compile model
optimiser = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimiser,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 2755, 9, 32)       320       
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 1378, 5, 32)      0         
 2D)                                                             
                                                                 
 batch_normalization_6 (Batc  (None, 1378, 5, 32)      128       
 hNormalization)                                                 
                                                                 
 conv2d_7 (Conv2D)           (None, 1376, 3, 32)       9248      
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 688, 2, 32)       0         
 2D)                                                             
                                                      

In [22]:
EPOCHS = 30
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=32, epochs=EPOCHS)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [23]:
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)

2/2 - 0s - loss: 1.8308 - accuracy: 0.2571 - 142ms/epoch - 71ms/step


# CNN v2

In [28]:
model_v2 = Sequential()
model_v2.add(Conv2D(16, (3,3), activation='relu', input_shape=input_shape))
model_v2.add(Conv2D(16, (3,3), activation='relu'))
model_v2.add(Flatten())
model_v2.add(Dense(128, activation='relu'))
model_v2.add(Dense(NUM_LABELS, activation='softmax'))

In [29]:
# compile model
model_v2.compile('Adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model_v2.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_11 (Conv2D)          (None, 2755, 9, 16)       160       
                                                                 
 conv2d_12 (Conv2D)          (None, 2753, 7, 16)       2320      
                                                                 
 flatten_4 (Flatten)         (None, 308336)            0         
                                                                 
 dense_8 (Dense)             (None, 128)               39467136  
                                                                 
 dense_9 (Dense)             (None, 4)                 516       
                                                                 
Total params: 39,470,132
Trainable params: 39,470,132
Non-trainable params: 0
_________________________________________________________________


In [30]:
EPOCHS = 15
history = model_v2.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=32, epochs=EPOCHS)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [31]:
test_loss, test_acc = model_v2.evaluate(X_test, y_test, verbose=2)

2/2 - 0s - loss: 16.1412 - accuracy: 0.2286 - 107ms/epoch - 54ms/step
