In [1]:
# importing all the library 
import os
import pathlib
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras import models
from IPython import display



In [2]:
# Defining the File Directory
DATASET_PATH = "./speech_commands/"
DATA_DIR = pathlib.Path(DATASET_PATH)

In [3]:
# getting the dataset from out file directory
train_dataset, validation_dataset = tf.keras.utils.audio_dataset_from_directory(
    directory=DATA_DIR,
    batch_size=64,
    validation_split=0.2,
    seed = 0,
    output_sequence_length=16000,
    subset="both"
)

print(train_dataset)

Found 16471 files belonging to 7 classes.
Using 13177 files for training.
Using 3294 files for validation.
<_BatchDataset element_spec=(TensorSpec(shape=(None, 16000, None), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>


In [4]:
# defining the label names
LABEL_NAMES = np.array(train_dataset.class_names)
print(LABEL_NAMES)

['down' 'left' 'off' 'on' 'right' 'stop' 'up']


In [5]:
# as we can that the size of shape of the audio array has one useless dimension 
for audio, labels in train_dataset.take(1) : 
    print(audio.shape)
    for label in labels :
        print(label) 

# ! data definition
# * audio = the audio file represented in numbers
# * labels = number of the index in the LABEL_NAMES

(64, 16000, 1)
tf.Tensor(6, shape=(), dtype=int32)
tf.Tensor(6, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(6, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(6, shape=(), dtype=int32)
tf.Tensor(6, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(5, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(4, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(4, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, 

In [6]:
# ! so we are going to squeeze the array
def squeeze_array(audio, label) : 
    audio = tf.squeeze(audio, axis=-1) # we are going to delete the last dimension of the array
    print()
    print()
    return audio, label

train_dataset = train_dataset.map(squeeze_array, tf.data.AUTOTUNE)
validation_dataset = validation_dataset.map(squeeze_array, tf.data.AUTOTUNE)

print(train_dataset)





<_ParallelMapDataset element_spec=(TensorSpec(shape=(None, 16000), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>


In [7]:
# The array has been squeezed
for audio, labels in train_dataset.take(1) :
    print(audio.shape)

(64, 16000)


In [8]:
## creating the test dataset

test_dataset = validation_dataset.shard(num_shards=2, index=0)
validation_dataset = validation_dataset.shard(num_shards=2, index=1)


> Now that our dataset has been preprocessed and split into train, validation, test data set. We are going to turn it to spectrogram.


In [9]:
# function for turning the audio data set into spectrogram
def get_spectrogram(audio) : # turning the audio dataset into a spectrogram.
    spectrogram = tf.signal.stft(audio, frame_length=255, frame_step=128)
    spectrogram = tf.abs(spectrogram)
    spectrogram = spectrogram[..., tf.newaxis]
    print(spectrogram.shape)
    return spectrogram

def transform_dataset_into_spectrogram (dataset): 
    return dataset.map(map_func = lambda audio, label : (get_spectrogram(audio), label), 
                       num_parallel_calls = tf.data.AUTOTUNE)

In [10]:
# Using the function above to turn the dataset into spectrogram.
train_dataset_spec = transform_dataset_into_spectrogram(train_dataset)
test_dataset_spec = transform_dataset_into_spectrogram(test_dataset)
validation_dataset_spec = transform_dataset_into_spectrogram(validation_dataset)

print(train_dataset_spec)

# Next we are going to cache the dataset into our memory making the process more faster.
train_dataset_spec = train_dataset_spec.cache().shuffle(10000).prefetch(tf.data.AUTOTUNE)
train_dataset_spec = train_dataset_spec.cache().prefetch(tf.data.AUTOTUNE)
validation_dataset_spec = validation_dataset_spec.cache().prefetch(tf.data.AUTOTUNE)

(None, 124, 129, 1)
(None, 124, 129, 1)
(None, 124, 129, 1)
<_ParallelMapDataset element_spec=(TensorSpec(shape=(None, 124, 129, 1), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>


> We are going to do preparation for making our model

In [11]:
for SAMPLE_SPECTROGRAM, SAMPLE_LABELS in train_dataset_spec :
    print(SAMPLE_SPECTROGRAM)
    break

tf.Tensor(
[[[[3.31344232e-02]
   [5.03309630e-02]
   [5.34809232e-02]
   ...
   [3.53633630e-04]
   [3.47502792e-04]
   [1.99779868e-04]]

  [[3.65861766e-02]
   [1.74628887e-02]
   [3.47451381e-02]
   ...
   [1.22331970e-04]
   [1.51530476e-04]
   [4.82164323e-05]]

  [[3.08959633e-02]
   [3.43851745e-02]
   [6.44320548e-02]
   ...
   [2.55096791e-04]
   [4.09472595e-05]
   [1.84332021e-04]]

  ...

  [[1.65800428e+00]
   [3.36848831e+00]
   [2.68951297e+00]
   ...
   [2.00758368e-04]
   [3.37230304e-04]
   [3.24428082e-04]]

  [[2.18381858e+00]
   [2.50447154e+00]
   [3.50471091e+00]
   ...
   [5.31728438e-04]
   [3.98737466e-04]
   [7.84397125e-05]]

  [[1.10471213e+00]
   [1.94156039e+00]
   [3.59231162e+00]
   ...
   [4.21403674e-04]
   [4.04695136e-04]
   [1.96337700e-04]]]


 [[[6.38546228e-01]
   [3.39604288e-01]
   [1.65407043e-02]
   ...
   [5.58451065e-05]
   [7.68117898e-05]
   [1.25795603e-04]]

  [[9.61357474e-01]
   [5.03650844e-01]
   [4.59427610e-02]
   ...
   [9.9262

In [12]:
INPUT_SHAPE = SAMPLE_SPECTROGRAM.shape[1:]
LABEL_AMOUNT = len(LABEL_NAMES)

## CREATING THE NORMALIZATION LAYER
NORMALIZATION_LAYER = layers.Normalization()
NORMALIZATION_LAYER.adapt(data = train_dataset_spec.map(map_func = lambda spec, label : spec))

MODEL = models.Sequential(
    [
        layers.Input(shape=INPUT_SHAPE),
        layers.Resizing(32,32),
        NORMALIZATION_LAYER,
        layers.Conv2D(32, 3, activation='relu'),
        layers.Conv2D(64, 3, activation='relu'),
        layers.MaxPooling2D(),
        layers.Dropout(0.25),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(LABEL_AMOUNT)
    ]
)


In [13]:
MODEL.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resizing (Resizing)         (None, 32, 32, 1)         0         
                                                                 
 normalization (Normalizatio  (None, 32, 32, 1)        3         
 n)                                                              
                                                                 
 conv2d (Conv2D)             (None, 30, 30, 32)        320       
                                                                 
 conv2d_1 (Conv2D)           (None, 28, 28, 64)        18496     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 64)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 14, 14, 64)        0

In [14]:
MODEL.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [15]:
EPOCH = 20

MODEL_FIT = MODEL.fit(
    train_dataset_spec,
    validation_data = validation_dataset_spec,
    epochs = EPOCH,
    callbacks = tf.keras.callbacks.EarlyStopping(verbose = 1, patience = 2)
)



Epoch 1/20


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 9: early stopping


In [16]:
metrics = MODEL_FIT.history
VALIDATION_ACCURACY= metrics["accuracy"]
print(f"Highest Accuracy of the model training : {np.max(np.array(VALIDATION_ACCURACY))}")

Highest Accuracy of the model training : 0.9312438368797302


In [17]:
## doing prediction with our test dataset
print(test_dataset_spec)
prediction = MODEL.predict(test_dataset_spec)
prediction = tf.argmax(prediction, axis=1)

<_ParallelMapDataset element_spec=(TensorSpec(shape=(None, 124, 129, 1), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>


In [18]:
def convert_to_label(labels) : 
    return [LABEL_NAMES[label.numpy()] for label in labels]

prediction = convert_to_label(prediction)
true_prediction = convert_to_label(tf.concat([label for _, label in test_dataset_spec], axis=0))


for index, (true_label, pred_label) in enumerate(zip(true_prediction, prediction)) :
    print(f"Data {index} : Original Label {true_label}, Prediction Label : {pred_label}")

Data 0 : Original Label stop, Prediction Label : stop
Data 1 : Original Label down, Prediction Label : down
Data 2 : Original Label up, Prediction Label : up
Data 3 : Original Label right, Prediction Label : right
Data 4 : Original Label stop, Prediction Label : stop
Data 5 : Original Label off, Prediction Label : off
Data 6 : Original Label stop, Prediction Label : stop
Data 7 : Original Label down, Prediction Label : stop
Data 8 : Original Label on, Prediction Label : on
Data 9 : Original Label left, Prediction Label : left
Data 10 : Original Label up, Prediction Label : up
Data 11 : Original Label down, Prediction Label : down
Data 12 : Original Label down, Prediction Label : down
Data 13 : Original Label on, Prediction Label : on
Data 14 : Original Label up, Prediction Label : up
Data 15 : Original Label up, Prediction Label : up
Data 16 : Original Label down, Prediction Label : down
Data 17 : Original Label on, Prediction Label : on
Data 18 : Original Label off, Prediction Label :

In [19]:
prediction = np.array(prediction)
true_prediction= np.array(true_prediction)

test_accuracy = np.mean(prediction == true_prediction)

print(f"Accuracy from the validation : {np.max(VALIDATION_ACCURACY)}")
print(f"Accuracy of the test : {test_accuracy}")

Accuracy from the validation : 0.9312438368797302
Accuracy of the test : 0.9092548076923077
