In [1]:
import json
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
import pandas as pd
from datetime import datetime
from termcolor import colored

# Timer.
startTime = datetime.now()

# Path to created json file from mel preprocess and feature extraction script.
DATA_PATH = "E:/Acoustic/mel_data.json"

# Path to save model.
MODEL_SAVE = 'E:/Acoustic/model_1_RNN.h5'

# Path to save training history and model accuracy performance at end of training.
HISTORY_SAVE = "E:/Acoustic/history_1_RNN.csv"
ACC_SAVE = "E:/Acoustic/models_acc_1_RNN.json"

def load_data(data_path):
    """Loads training dataset from json file.
        :param data_path (str): Path to json file containing data
        :return X (ndarray): Inputs
        :return y (ndarray): Targets
    """

    with open(data_path, "r") as fp:
        data = json.load(fp)

    # Convert lists to numpy arrays.
    X = np.array(data["mel"]) # The name in brackets is changed to "mfccs" if MFCC features are used to train.
    y = np.array(data["labels"])
    return X, y

def prepare_datasets(test_size, validation_size):
    # Load extracted features and labels data.
    X, y = load_data(DATA_PATH)

    # Create train/test split.
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)

    # Create train/validation split.
    X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size)

    return X_train, X_validation, X_test, y_train, y_validation, y_test


def build_model(input_shape):
    # Create model.
    model = keras.Sequential()

    # 3 LSTM layers
    model.add(keras.layers.Bidirectional(keras.layers.LSTM(100, return_sequences=True), input_shape=input_shape))
    model.add(keras.layers.Bidirectional(keras.layers.LSTM(100, return_sequences=True), input_shape=(input_shape, 1)))
    model.add(keras.layers.Bidirectional(keras.layers.LSTM(100)))

    # Dense layer
    model.add(keras.layers.Dense(100, activation='relu'))
    model.add(keras.layers.Dropout(0.5))

    # Output layer
    model.add(keras.layers.Dense(2, activation='softmax'))

    return model


if __name__ == "__main__":
    # Create train, validation and test sets.
    X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(0.25, 0.2)

    # Early stopping.
    callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

    # Checkpoint.
    checkpoint = keras.callbacks.ModelCheckpoint(MODEL_SAVE, monitor='val_loss',
                                                  mode='min', save_best_only=True, verbose=1)

    # Build the RNN network.
    input_shape = (X_train.shape[1], X_train.shape[2])
    model = build_model(input_shape)

    # Compile the network.
    optimiser = keras.optimizers.Adam(learning_rate=0.0005)
    model.compile(optimizer=optimiser,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    model.summary()

    # Train the RNN.
    history = model.fit(X_train, y_train, validation_data=(X_validation, y_validation), batch_size=64, epochs=1000,
                          callbacks=[callback, checkpoint])

    # Save history.
    hist = pd.DataFrame(history.history)

    # Save to csv:
    hist_csv = HISTORY_SAVE
    with open(hist_csv, mode='w') as f:
        hist.to_csv(f)

    print(colored("Paper 8's model has been trained and its training history has been saved to {}.".format(hist_csv), "green"))

    # Evaluate the RNN on the test set.
    test_error, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
    print("Accuracy on test set is: {}".format(test_accuracy))

    # Timer output.
    time = datetime.now() - startTime
    print(time)

    # Save model accuracies on test set.
    accuracy = {
        "model_acc": [],
        "total_train_time": [],
    }

    accuracy["model_acc"].append(test_accuracy)
    accuracy["total_train_time"].append(str(time))

    with open(ACC_SAVE, "w") as fp:
        json.dump(accuracy, fp, indent=4)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional (Bidirectiona  (None, 44, 200)          152800    
 l)                                                              
                                                                 
 bidirectional_1 (Bidirectio  (None, 44, 200)          240800    
 nal)                                                            
                                                                 
 bidirectional_2 (Bidirectio  (None, 200)              240800    
 nal)                                                            
                                                                 
 dense (Dense)               (None, 100)               20100     
                                                                 
 dropout (Dropout)           (None, 100)               0         
                                                        

Epoch 22/1000
Epoch 22: val_loss improved from 0.00002 to 0.00002, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 23/1000
Epoch 23: val_loss improved from 0.00002 to 0.00001, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 24/1000
Epoch 24: val_loss improved from 0.00001 to 0.00001, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 25/1000
Epoch 25: val_loss improved from 0.00001 to 0.00001, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 26/1000
Epoch 26: val_loss improved from 0.00001 to 0.00001, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 27/1000
Epoch 27: val_loss improved from 0.00001 to 0.00001, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 28/1000
Epoch 28: val_loss improved from 0.00001 to 0.00001, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 29/1000
Epoch 29: val_loss improved from 0.00001 to 0.00001, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 30/1000
Epoch 30: val_loss improved from 0.00001 to 0.00001, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 31/1

Epoch 47/1000
Epoch 47: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 48/1000
Epoch 48: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 49/1000
Epoch 49: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 50/1000
Epoch 50: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 51/1000
Epoch 51: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 52/1000
Epoch 52: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 53/1000
Epoch 53: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 54/1000
Epoch 54: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 55/1000
Epoch 55: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 56/1

Epoch 72/1000
Epoch 72: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 73/1000
Epoch 73: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 74/1000
Epoch 74: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 75/1000
Epoch 75: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 76/1000
Epoch 76: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 77/1000
Epoch 77: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 78/1000
Epoch 78: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 79/1000
Epoch 79: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 80/1000
Epoch 80: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 81/1

Epoch 97/1000
Epoch 97: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 98/1000
Epoch 98: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 99/1000
Epoch 99: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 100/1000
Epoch 100: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 101/1000
Epoch 101: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 102/1000
Epoch 102: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 103/1000
Epoch 103: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 104/1000
Epoch 104: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 105/1000
Epoch 105: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h

Epoch 147: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 148/1000
Epoch 148: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 149/1000
Epoch 149: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 150/1000
Epoch 150: val_loss did not improve from 0.00000
Epoch 151/1000
Epoch 151: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 152/1000
Epoch 152: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 153/1000
Epoch 153: val_loss did not improve from 0.00000
Epoch 154/1000
Epoch 154: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 155/1000
Epoch 155: val_loss did not improve from 0.00000
Epoch 156/1000
Epoch 156: val_loss improved from 0.00000 to 0.00000, saving model to E:/Acoustic\model_1_RNN.h5
Epoch 157/1000
Epoch 157: val_loss did 

In [2]:
# Make prediction on a random sample.
X = X_test[55]
y = y_test[55]
predict(model, X, y)

    # Save model accuracies on test set (for weight calculations later on).
accuracy = {
    "model_acc": [],
    "model_loss": [],
    "total_train_time": [],
}

accuracy["model_acc"].append(test_accuracy)
accuracy["model_loss"].append(test_error)
accuracy["total_train_time"].append(str(time))

with open(ACC_SAVE, "w") as fp:
    json.dump(accuracy, fp, indent=4)

NameError: name 'predict' is not defined

In [1]:
import os
import json
import librosa
import tensorflow as tf
import numpy as np
from termcolor import colored

In [None]:
# Read and save parameters.
DATASET_PATH = ""  # Path of folder with testing audios.
SAVED_MODEL_PATH = "E:/Acoustic/model_1_RNN.h5"  # Path of trained model
SAMPLE_RATE = 22050  # Sample rate in Hz.
DURATION = 1  # Length of audio files fed. Measured in seconds.
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION

# Predictions (1 or 0)
JSON_PATH = ".../predictions.json"
# Performance scores (accuracy, precision, recall, f1 score)
JSON_PERFORMANCE = ".../model_scores.json"

# Prediction of fed audio
class _Class_Predict_Service:
    """Singleton class for keyword spotting inference with trained models.
    :param model: Trained model
    """

    model = None
    _mapping = [
        1,
        0
    ]
    _instance = None

    # Predict hard values (1 or 0).
    def predict(self, file_path):
        """
        :param file_path (str): Path to audio file to predict
        :return predicted_keyword (str): Keyword predicted by the model
        """

        # Extract mels from testing audio.
        log_mel = self.preprocess(file_path)

        log_mel = log_mel[np.newaxis, ...]

        # Get the predicted label.
        predictions = self.model.predict(log_mel)
        predicted_index = np.argmax(predictions)
        predicted_class = self._mapping[predicted_index]
        return predicted_class

    # Outputs certainty values for soft voting (1-0).
    def preprocess(self, file_path, n_mels=90, n_fft=2048, hop_length=512, num_segments=1):
        """Extract MFCCs from audio file.
        :param file_path (str): Path of audio file
        :param n_mels (int): # of mels to extract
        :param n_fft (int): Interval we consider to apply STFT. Measured in # of samples
        :param hop_length (int): Sliding window for STFT. Measured in # of samples
        """

        num_samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)

        # Load audio file.
        signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)

        # Process segments extracting mels and storing data.
        for s in range(num_segments):
            start_sample = num_samples_per_segment * s  # s=0 --> 0
            finish_sample = start_sample + num_samples_per_segment  # s=0 --> num_samples_per_segment

            # Extract mel specs.
            mel = librosa.feature.melspectrogram(signal[start_sample:finish_sample], sr=sr, n_mels=n_mels, n_fft=n_fft,
                                                 hop_length=hop_length)
            log_mel = librosa.power_to_db(mel)

        return log_mel.T


def Keyword_Spotting_Service():
    """Factory function for Keyword_Spotting_Service class.
    :return _Keyword_Spotting_Service._instance (_Keyword_Spotting_Service):
    """

    # Ensure an instance is created only the first time the factory function is called.
    if _Class_Predict_Service._instance is None:
        _Class_Predict_Service._instance = _Class_Predict_Service()
        _Class_Predict_Service.model = tf.keras.models.load_model(SAVED_MODEL_PATH)
    return _Class_Predict_Service._instance


# Saving results into a json file.
def save_mfcc(dataset_path, json_path):

    # Dictionary to store data.
    data = {
        "mapping": [],  # Maps different class labels --> background is mapped to 0.
        "names": [],
        "results": [],  # mels are the training input, labels are the target.
    }

    # Loop through all the classes.
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):

        # Ensure that we're not at the root level.
        if dirpath is not dataset_path:

            # Save the semantic label.
            dirpath_components = dirpath.split("/")  # class/background => ["class", "background"]
            semantic_label = dirpath_components[-1]  # considering only the last value
            data["mapping"].append(semantic_label)
            print("\nProcessing {}".format(semantic_label))

            # Process files for a specific class.
            for f in filenames:
                file_path = os.path.join(dirpath, f)

                # Create 2 instances of the keyword spotting service.
                kss = Keyword_Spotting_Service()
                kss1 = Keyword_Spotting_Service()

                # Check that different instances of the keyword spotting service point back to the same object.
                assert kss is kss1

                # Classify unseen audio.
                keyword = kss.predict(file_path)

                # Store mel for segment if it has the expected length.
                data["names"].append(f)
                data["results"].append(keyword)
                print("{}".format(file_path))

    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)


# Calculating performance scores (accuracy, precision, recall, f-score).
def performance_calcs(performance_path):
    # Dictionary to store model performance results.
    perfromance = {
        "TP": [],
        "FN": [],
        "TN": [],
        "FP": [],
        "Accuracy": [],
        "Precision": [],
        "Recall": [],
        "F1 Score": [],
    }

    with open(JSON_PATH, "r") as fp:
        data = json.load(fp)

    # Convert lists to numpy arrays.
    y = np.array(data["results"])

    a = float(sum(y[0:int(len(y) / 2)]))
    b = float(sum(y[int(len(y) / 2):int(len(y))]))

    # Calculating TP, TN, FP, FN.
    TP = a
    FN = int(len(y) / 2) - a
    FP = b
    TN = int(len(y) / 2) - b

    # Performance result calcs.
    Accuracy = (TP + TN) / (TP + TN + FN + FP)
    Precision = TP / (TP + FP)
    Recall = TP / (TP + FN)
    F1 = (2 * Precision * Recall) / (Precision + Recall)

    perfromance["TP"].append(TP)
    perfromance["FN"].append(FN)
    perfromance["TN"].append(TN)
    perfromance["FP"].append(FP)
    perfromance["Accuracy"].append(Accuracy)
    perfromance["Precision"].append(Precision)
    perfromance["Recall"].append(Recall)
    perfromance["F1 Score"].append(F1)

    with open(performance_path, "w") as fp:
        json.dump(perfromance, fp, indent=4)


if __name__ == "__main__":
    save_mfcc(DATASET_PATH, JSON_PATH)
    performance_calcs(JSON_PERFORMANCE)

    print(
        colored("Paper 8's model performance scores have been saved to {}.".format(JSON_PERFORMANCE), "green"))