In [194]:
# Import Python packages
import os

import numpy as np
import tensorflow as tf

In [195]:
# Create a list of all of the targets in the dataset (except for background noise)
dataset_path = os.path.abspath('speech_commands_v0.02')

targets = [entry for entry in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, entry))]
targets.remove('_background_noise_')
targets.sort()

In [196]:
# Load features and labels from .npz file
mfcc_features = np.load(os.path.join(os.getcwd(), 'mfcc_features.npz'))

x_train = mfcc_features['x_train']
y_train = mfcc_features['y_train']
x_validation = mfcc_features['x_validation']
y_validation = mfcc_features['y_validation']
x_test = mfcc_features['x_test']
y_test = mfcc_features['y_test']

In [197]:
# Convert all labels to wake word "go" (1) or other (0)
wake_word_index = targets.index('go')

y_train = np.equal(y_train, wake_word_index).astype('float64')
y_validation = np.equal(y_validation, wake_word_index).astype('float64')
y_test = np.equal(y_test, wake_word_index).astype('float64')

In [198]:
# Reshape feature arrays to have 1 channel
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)
x_validation = x_validation.reshape(x_validation.shape[0], x_validation.shape[1], x_validation.shape[2], 1)
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], x_test.shape[2], 1)

In [199]:
# Build machine learning model
model = tf.keras.models.Sequential()

model.add(tf.keras.layers.Conv2D(32, (2, 2), activation = 'relu', input_shape = x_test.shape[1:]))
model.add(tf.keras.layers.MaxPooling2D(pool_size = (2, 2)))

model.add(tf.keras.layers.Conv2D(32, (2, 2), activation = 'relu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size = (2, 2)))

model.add(tf.keras.layers.Conv2D(64, (2, 2), activation = 'relu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size = (2, 2)))

model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(64, activation = 'relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(1, activation = 'sigmoid'))

In [200]:
# Configure the model for training
model.compile(loss = 'binary_crossentropy', optimizer = 'rmsprop', metrics = ['accuracy'])

In [201]:
# Train the model
model.fit(x_train, y_train, epochs = 30, batch_size = 100, validation_data = (x_validation, y_validation))

Epoch 1/30
[1m775/775[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9628 - loss: 0.1526 - val_accuracy: 0.9732 - val_loss: 0.0939
Epoch 2/30
[1m775/775[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9678 - loss: 0.0985 - val_accuracy: 0.9754 - val_loss: 0.0740
Epoch 3/30
[1m775/775[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9715 - loss: 0.0840 - val_accuracy: 0.9760 - val_loss: 0.0739
Epoch 4/30
[1m775/775[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9746 - loss: 0.0763 - val_accuracy: 0.9773 - val_loss: 0.0755
Epoch 5/30
[1m775/775[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9766 - loss: 0.0716 - val_accuracy: 0.9805 - val_loss: 0.0616
Epoch 6/30
[1m775/775[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9775 - loss: 0.0680 - val_accuracy: 0.9760 - val_loss: 0.0731
Epoch 7/30
[1m775/775[0m 

<keras.src.callbacks.history.History at 0x177784770>

In [202]:
# Evaluate the model
model.evaluate(x_test, y_test)

[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 672us/step - accuracy: 0.9777 - loss: 0.0795


[0.08281411230564117, 0.9770150780677795]

In [203]:
# Save the model
model.save("model.keras")