In [1]:
# Importing Required Packages
import keras, librosa, numpy as np, os
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import Conv2D, Dense, Dropout, Flatten, MaxPooling2D
from keras.models import load_model, Sequential
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tqdm import tqdm

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# Function to take input of the Folder Path and returning a tuple containing Labels, Indices of the labels and one-hot encoded labels
def get_labels(path):
    labels = os.listdir(path)
    label_indices = np.arange(0, len(labels))
    return labels, label_indices, to_categorical(label_indices)

# Function to convert .wav files to MFCC
def wav2mfcc(file_path, max_len = 32):
    wave, _ = librosa.load(file_path, mono = True, sr = None)
    mfcc = librosa.feature.mfcc(wave, sr = 16000)
    # If maximum length exceeds mfcc lengths then pad the remaining ones
    if (max_len > mfcc.shape[1]):
        pad_width = max_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width = ((0, 0), (0, pad_width)), mode = 'constant')
    # Else cutoff the remaining parts
    else:
        mfcc = mfcc[:, :max_len]
    return mfcc

# Function to save the MFCC in arrays
def save_data_to_array(path, max_len = 32):
    labels, _, _ = get_labels(path)
    for label in labels:
        # Init mfcc vectors
        mfcc_vectors = []
        wavfiles = [path + label + '/' + wavfile for wavfile in os.listdir(path + '/' + label)]
        for wavfile in tqdm(wavfiles, "Saving vectors of label - '{}'".format(label)):
            mfcc = wav2mfcc(wavfile, max_len = max_len)
            mfcc_vectors.append(mfcc)
        np.save(label + '.npy', mfcc_vectors)

# Function to create a CNN model
def get_model():
    model = Sequential()
    model.add(Conv2D(32, kernel_size = (2, 2), activation = 'relu', input_shape = (feature_dim_1, feature_dim_2, channel)))
    model.add(Conv2D(64, kernel_size = (2, 2), activation = 'relu'))
    model.add(Conv2D(128, kernel_size = (2, 2), activation='relu'))
    model.add(MaxPooling2D(pool_size = (2, 2)))
    model.add(Flatten())
    model.add(Dense(256, activation = 'relu'))
    model.add(Dropout(0.125))
    model.add(Dense(512, activation = 'relu'))
    model.add(Dropout(0.25))
    model.add(Dense(num_classes, activation = 'softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['categorical_accuracy'])
    return model

# Function to split the data in subsets for training and testing
def get_train_test(path, split_ratio = 0.8):
    # Get available labels
    labels, indices, _ = get_labels(path)
    # Getting first arrays
    X = np.load(labels[0] + '.npy')
    y = np.zeros(X.shape[0])
    # Append all of the dataset into one single array, same goes for y
    for i, label in enumerate(labels[1:]):
        x = np.load(label + '.npy')
        X = np.vstack((X, x))
        y = np.append(y, np.full(x.shape[0], fill_value = (i + 1)))
    assert X.shape[0] == len(y)
    return train_test_split(X, y, test_size = (1 - split_ratio), shuffle = True)

In [3]:
# Fixing some global parameters for all situations of digit classification
# Number of classes
num_classes = 10

# Specifying feature dimensions for CNN model
feature_dim_1 = 20
feature_dim_2 = 32
channel = 1

# Setting callbacks
checkpoint_correct = ModelCheckpoint(filepath = 'model_correct.h5', save_best_only = True, verbose = 1)
checkpoint_perturbed = ModelCheckpoint(filepath = 'model_perturbed.h5', save_best_only = True, verbose = 1)
checkpoint_mixed = ModelCheckpoint(filepath = 'model_mixed.h5', save_best_only = True, verbose = 1)
earlystop = EarlyStopping(min_delta = 0.001, patience = 5, verbose = 1)

# Save modified data to array file first
save_data_to_array(path = "./correct data/")
save_data_to_array(path = "./perturbed data/")
save_data_to_array(path = "./mixed data/")

# Loading dataset
X_train_correct, X_test_correct, y_train_correct, y_test_correct = get_train_test("./correct data/")
X_train_perturbed, X_test_perturbed, y_train_perturbed, y_test_perturbed = get_train_test("./perturbed data/")
X_train_mixed, X_test_mixed, y_train_mixed, y_test_mixed = get_train_test("./mixed data/")

# Performing one hot encoding
y_train_correct_hot = to_categorical(y_train_correct)
y_test_correct_hot = to_categorical(y_test_correct)
y_train_perturbed_hot = to_categorical(y_train_perturbed)
y_test_perturbed_hot = to_categorical(y_test_perturbed)
y_train_mixed_hot = to_categorical(y_train_mixed)
y_test_mixed_hot = to_categorical(y_test_mixed)

# Reshaping to perform 2D convolution
X_train_correct_reshaped = X_train_correct.reshape(X_train_correct.shape[0], feature_dim_1, feature_dim_2, channel)
X_test_correct_reshaped = X_test_correct.reshape(X_test_correct.shape[0], feature_dim_1, feature_dim_2, channel)
X_train_perturbed_reshaped = X_train_perturbed.reshape(X_train_perturbed.shape[0], feature_dim_1, feature_dim_2, channel)
X_test_perturbed_reshaped = X_test_perturbed.reshape(X_test_perturbed.shape[0], feature_dim_1, feature_dim_2, channel)
X_train_mixed_reshaped = X_train_mixed.reshape(X_train_mixed.shape[0], feature_dim_1, feature_dim_2, channel)
X_test_mixed_reshaped = X_test_mixed.reshape(X_test_mixed.shape[0], feature_dim_1, feature_dim_2, channel)

Saving vectors of label - 'correct eight': 100%|████████████████████████████████████| 150/150 [00:00<00:00, 178.31it/s]
Saving vectors of label - 'correct five': 100%|█████████████████████████████████████| 150/150 [00:00<00:00, 184.81it/s]
Saving vectors of label - 'correct four': 100%|█████████████████████████████████████| 150/150 [00:00<00:00, 184.81it/s]
Saving vectors of label - 'correct nine': 100%|█████████████████████████████████████| 150/150 [00:00<00:00, 178.86it/s]
Saving vectors of label - 'correct one': 100%|██████████████████████████████████████| 150/150 [00:00<00:00, 185.73it/s]
Saving vectors of label - 'correct seven': 100%|████████████████████████████████████| 150/150 [00:00<00:00, 183.68it/s]
Saving vectors of label - 'correct six': 100%|██████████████████████████████████████| 150/150 [00:00<00:00, 183.68it/s]
Saving vectors of label - 'correct three': 100%|████████████████████████████████████| 150/150 [00:00<00:00, 180.37it/s]
Saving vectors of label - 'correct two':

In [4]:
# Defining CNN models
model_correct = get_model()
model_perturbed = get_model()
model_mixed = get_model()

# Training the models
model_correct.fit(X_train_correct_reshaped, y_train_correct_hot, batch_size = 100, epochs = 100, validation_split = 0.25, callbacks = [checkpoint_correct, earlystop], verbose = 0)
model_perturbed.fit(X_train_perturbed_reshaped, y_train_perturbed_hot, batch_size = 100, epochs = 100, validation_split = 0.25, callbacks = [checkpoint_perturbed, earlystop], verbose = 0)
model_mixed.fit(X_train_mixed_reshaped, y_train_mixed_hot, batch_size = 100, epochs = 100, validation_split = 0.25, callbacks = [checkpoint_mixed, earlystop], verbose = 0)

# Evaluating the models
model_correct = load_model('model_correct.h5')
model_perturbed = load_model('model_perturbed.h5')
model_mixed = load_model('model_mixed.h5')
correct_correct_scores = model_correct.evaluate(X_test_correct_reshaped, y_test_correct_hot, verbose = 0)
correct_perturbed_scores = model_correct.evaluate(X_test_perturbed_reshaped, y_test_perturbed_hot, verbose = 0)
correct_mixed_scores = model_correct.evaluate(X_test_mixed_reshaped, y_test_mixed_hot, verbose = 0)
perturbed_correct_scores = model_perturbed.evaluate(X_test_correct_reshaped, y_test_correct_hot, verbose = 0)
perturbed_perturbed_scores = model_perturbed.evaluate(X_test_perturbed_reshaped, y_test_perturbed_hot, verbose = 0)
perturbed_mixed_scores = model_perturbed.evaluate(X_test_mixed_reshaped, y_test_mixed_hot, verbose = 0)
mixed_correct_scores = model_mixed.evaluate(X_test_correct_reshaped, y_test_correct_hot, verbose = 0)
mixed_perturbed_scores = model_mixed.evaluate(X_test_perturbed_reshaped, y_test_perturbed_hot, verbose = 0)
mixed_mixed_scores = model_mixed.evaluate(X_test_mixed_reshaped, y_test_mixed_hot, verbose = 0)


Epoch 00001: val_loss improved from inf to 1.91944, saving model to model_correct.h5

Epoch 00002: val_loss improved from 1.91944 to 1.37791, saving model to model_correct.h5

Epoch 00003: val_loss improved from 1.37791 to 0.72662, saving model to model_correct.h5

Epoch 00004: val_loss improved from 0.72662 to 0.48506, saving model to model_correct.h5

Epoch 00005: val_loss improved from 0.48506 to 0.30126, saving model to model_correct.h5

Epoch 00006: val_loss improved from 0.30126 to 0.21960, saving model to model_correct.h5

Epoch 00007: val_loss did not improve from 0.21960

Epoch 00008: val_loss did not improve from 0.21960

Epoch 00009: val_loss did not improve from 0.21960

Epoch 00010: val_loss improved from 0.21960 to 0.17739, saving model to model_correct.h5

Epoch 00011: val_loss did not improve from 0.17739

Epoch 00012: val_loss improved from 0.17739 to 0.14422, saving model to model_correct.h5

Epoch 00013: val_loss did not improve from 0.14422

Epoch 00014: val_loss d

In [5]:
# Printing results
print("Unperturbed Model performance on Unperturbed Data : %.2f%%" % (correct_correct_scores[1] * 100))
print("Unperturbed Model performance on Perturbed Data : %.2f%%" % (correct_perturbed_scores[1] * 100))
print("Unperturbed Model performance on Mixed Data : %.2f%%" % (correct_mixed_scores[1] * 100))
print("Perturbed Model performance on Unperturbed Data : %.2f%%" % (perturbed_correct_scores[1] * 100))
print("Perturbed Model performance on Perturbed Data : %.2f%%" % (perturbed_perturbed_scores[1] * 100))
print("Perturbed Model performance on Mixed Data : %.2f%%" % (perturbed_mixed_scores[1] * 100))
print("Mixed Model performance on Unperturbed Data : %.2f%%" % (mixed_correct_scores[1] * 100))
print("Mixed Model performance on Perturbed Data : %.2f%%" % (mixed_perturbed_scores[1] * 100))
print("Mixed Model performance on Mixed Data : %.2f%%" % (mixed_mixed_scores[1] * 100))

Unperturbed Model performance on Unperturbed Data : 97.33%
Unperturbed Model performance on Perturbed Data : 7.07%
Unperturbed Model performance on Mixed Data : 22.11%
Perturbed Model performance on Unperturbed Data : 6.00%
Perturbed Model performance on Perturbed Data : 89.33%
Perturbed Model performance on Mixed Data : 78.39%
Mixed Model performance on Unperturbed Data : 97.67%
Mixed Model performance on Perturbed Data : 90.67%
Mixed Model performance on Mixed Data : 88.50%
