<h1><center> Convolutional Neural Networks for Speech Recognition </center></h1>

## Import required libraries

In [39]:
import os
import numpy as np
import librosa as lb
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import tensorflow as tf

## Preprocessing

### Audio file directory path

In [40]:
dir_path = r'C:\Users\vjkri\Desktop\IoT\P6'
dirs = os.listdir(dir_path)

In [41]:
all_folder = {}

for name in dirs:
    all_folder[name] = os.path.join(dir_path,name)

In [42]:
all_audio_files = {}

for key, value in all_folder.items():
    path_directory = os.listdir(value)
    all_audio_files.setdefault(key, [])
    [all_audio_files[key].append(files) for files in path_directory]

### Embed audio signals to MFCC features

Extract MFCC features and save them in .npz format

In [43]:
for key, value in all_audio_files.items():
    nsamples_in_class = 1500
    sampling_rate = 16000
    class_arr = []
    for audio_file in value:
        audio_file_path = os.path.join(dir_path, key, audio_file)
        ys, sr = lb.load(audio_file_path, mono=True, sr=None)
        ys = ys[::3]
        mfcc = lb.feature.mfcc(ys, sr= sampling_rate)
        pad_width = 11 - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
        class_arr.append(mfcc)
    class_arr = np.array(class_arr)
    np.random.shuffle(class_arr)
    class_arr = class_arr[: nsamples_in_class]
    np.savez(str(key), class_arr)

Load all saved arrays

In [44]:
off_arr = np.load('off.npz')['arr_0']
on_arr = np.load('on.npz')['arr_0']

Concatenate all class arrays to a single feature matrix

In [45]:
train_x = np.array([])
train_x = np.concatenate((on_arr, off_arr), axis = 0)

In [46]:
print(train_x.shape)

(3000, 20, 11)


Reshape the arrays conventional to CNN

In [47]:
train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], train_x.shape[2], 1))

In [48]:
print(train_x.shape)

(3000, 20, 11, 1)


Create numerical target variables

In [49]:
target = []
nClasses = 2

target_var = [0,1]

for i in range(0,2):
    target.append([target_var[i]] * 1500)

In [50]:
target = np.array(target)
target = np.reshape(target, (3000,1))

In [51]:
print(target.shape)

(3000, 1)


Encode target classes

In [58]:
enc = LabelEncoder()
enc.fit(target)
target = enc.transform(target)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Shuffle the dataset

In [52]:
train_x, target = shuffle(train_x, target)

Split dataset for training and validation

In [53]:
train_x, test_x, train_y, test_y = train_test_split(train_x, target)

## Convolutional Neural Networks

CNN architecture

In [54]:
def cnn_arch():
    model = tf.keras.models.Sequential()
    
    model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(tf.keras.layers.Dropout(0.25))
    
    model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.Dropout(0.25))
  
    model.add(tf.keras.layers.Flatten())
    
    model.add(tf.keras.layers.Dense(220, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.15))
    
    model.add(tf.keras.layers.Dense(220, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.15))
    
    model.add(tf.keras.layers.Dense(nClasses-1, activation='sigmoid'))

    return model

Define input shape and classes

In [55]:
input_shape = (train_x.shape[1], train_x.shape[2], train_x.shape[3])
cnn = cnn_arch()

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Summary of CNN architecture

In [56]:
print(cnn.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 18, 9, 32)         320       
_________________________________________________________________
dropout (Dropout)            (None, 18, 9, 32)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 16, 7, 64)         18496     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 8, 3, 64)          0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 8, 3, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 1536)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 220)               338140    
__________

Compile the model

In [59]:
cnn.compile(optimizer='nadam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

TypeError: Input 'y' of 'Equal' Op has type float32 that does not match type int32 of argument 'x'.

Fit the model

In [30]:
cnn.fit(train_x, train_y, epochs = 100, verbose = 1, validation_data = (test_x, test_y))

NameError: name 'cnn' is not defined

Evaluate against Validation data

In [303]:
test_acc = cnn.evaluate(test_x, test_y)



In [304]:
print('Accuracy of CNN in recognizing the words is :', test_acc[1])

Accuracy of CNN in recognizing the words is : 0.764


## Recurrent Neural Networks

In [33]:
def rnn_arch():
    model = tf.keras.models.Sequential()
    
    model.add(tf.keras.layers.GRU(200, input_shape = (20, 11)))
    model.add(tf.keras.layers.Dense(1, activation = 'sigmoid'))
    
    return model    

In [34]:
model = rnn_arch()

In [35]:
model.compile(optimizer = 'adam',  loss = 'binary_crossentropy', metrics = ['categorical_accuracy'])

In [36]:
model.fit(train_x, train_y, batch_size = 60, epochs= 10, verbose = 1, validation_data = (test_x, test_y))

Train on 2250 samples, validate on 750 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1d2d51b0f98>

In [27]:
train_score = model.evaluate(train_x, train_y, batch_size= 60)
validation_score = model.evaluate(test_x, test_y, batch_size=60)



In [38]:
rnn_json = model.to_json()
with open("rnn.json", "w") as json_file:
    json_file.write(rnn_json)
# serialize weights to HDF5
model.save_weights("rnn.h5")
print("Saved model to disk")

Saved model to disk
