<h1><center> Convolutional Neural Networks for Speech Recognition </center></h1>

## Import required libraries

In [114]:
import os
import numpy as np
import librosa as lb
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import tensorflow as tf

## Preprocessing

### Audio file directory path

In [115]:
dir_path = r'C:\Users\vjkri\Desktop\IoT\P6'
dirs = os.listdir(dir_path)

In [116]:
all_folder = {}

for name in dirs:
    all_folder[name] = os.path.join(dir_path,name)

In [117]:
all_audio_files = {}

for key, value in all_folder.items():
    path_directory = os.listdir(value)
    all_audio_files.setdefault(key, [])
    [all_audio_files[key].append(files) for files in path_directory]

### Embed audio signals to MFCC features

Extract MFCC features and save them in .npz format

In [118]:
for key, value in all_audio_files.items():
    nsamples_in_class = 1500
    sampling_rate = 16000
    class_arr = []
    for audio_file in value:
        audio_file_path = os.path.join(dir_path, key, audio_file)
        ys, sr = lb.load(audio_file_path, mono=True, sr=None)
        ys = ys[::3]
        mfcc = lb.feature.mfcc(ys, sr= sampling_rate)
        pad_width = 11 - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
        class_arr.append(mfcc)
    class_arr = np.array(class_arr)
    np.random.shuffle(class_arr)
    class_arr = class_arr[: nsamples_in_class]
    np.savez(str(key), class_arr)

Load all saved arrays

In [119]:
off_arr = np.load('off.npz')['arr_0']
on_arr = np.load('on.npz')['arr_0']

Concatenate all class arrays to a single feature matrix

In [120]:
train_x = np.array([])
train_x = np.concatenate((off_arr, on_arr), axis = 0)

In [121]:
print(train_x.shape)

(3000, 20, 11)


Reshape the arrays conventional to CNN

In [122]:
train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], train_x.shape[2], 1))

In [123]:
print(train_x.shape)

(3000, 20, 11, 1)


Create numerical target variables

In [124]:
target = []
nClasses = 2

target_var = [0,1]

for i in range(0,2):
    target.append([target_var[i]] * 1500)

In [125]:
target = (np.array(target).flatten())

In [126]:
target = np.reshape(target, (3000, 1))

Encode target classes

In [127]:
enc = LabelEncoder()
enc.fit(target)
target = enc.transform(target)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Shuffle the dataset

In [128]:
train_x, target = shuffle(train_x, target)

Split dataset for training and validation

In [129]:
train_x, test_x, train_y, test_y = train_test_split(train_x, target)

## Convolutional Neural Networks

CNN architecture

In [130]:
def cnn_arch():
    model = tf.keras.models.Sequential()
    
    model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    
    model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.Dropout(0.25))
  
    model.add(tf.keras.layers.Flatten())
    
    model.add(tf.keras.layers.Dense(220, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.25))
    
    model.add(tf.keras.layers.Dense(220, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.25))
    
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

    return model

In [131]:
print(train_x.shape)

(2250, 20, 11, 1)


Define input shape and classes

In [133]:
input_shape = (train_x.shape[1], train_x.shape[2], train_x.shape[3])
cnn = cnn_arch()

Summary of CNN architecture

In [134]:
print(cnn.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_22 (Conv2D)           (None, 18, 9, 32)         320       
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 16, 7, 64)         18496     
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 8, 3, 64)          0         
_________________________________________________________________
dropout_36 (Dropout)         (None, 8, 3, 64)          0         
_________________________________________________________________
flatten_11 (Flatten)         (None, 1536)              0         
_________________________________________________________________
dense_37 (Dense)             (None, 220)               338140    
_________________________________________________________________
dropout_37 (Dropout)         (None, 220)               0         
__________

Compile the model

In [135]:
cnn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

Fit the model

In [136]:
cnn.fit(train_x, train_y, epochs = 100, verbose = 1, validation_data = (test_x, test_y))

Train on 2250 samples, validate on 750 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100


Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x18f172657b8>

Evaluate against Validation data

In [137]:
cnn_json = cnn.to_json()
with open("cnn.json", "w") as json_file:
    json_file.write(cnn_json)
cnn.save_weights("cnn.h5")