In [None]:
# ! pip install --user librosa

In [None]:
from pathlib import Path
from scipy.io import wavfile
import scipy.signal
import pandas as pd
from tqdm.auto import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np
import os
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

## Mount Drive

In [None]:
def is_running_on_colab():
    try:
        import google.colab
        return True
    except ImportError:
        return False

ON_COLAB = is_running_on_colab()
ON_COLAB

In [None]:
if ON_COLAB:
  from google.colab import drive
  drive.mount('/content/gdrive')
  intermediate_folder = Path('/content/gdrive/MyDrive/Temp/Speech recognition project')
else:
  intermediate_folder = Path('..') / 'data' / 'intermediate'

## Read data

In [None]:
X_train = np.load(intermediate_folder / 'train_main_1_sec_audio_mfcc.npy').transpose(0, 2, 1)
X_train.shape

(33566, 20, 32)

In [None]:
X_val = np.load(intermediate_folder / 'val_main_1_sec_audio_mfcc.npy').transpose(0, 2, 1)
X_val.shape

(4619, 20, 32)

In [None]:
# X_test = np.load(intermediate_folder / 'test_main_1_sec_audio_mfcc.npy').transpose(0, 2, 1)
# X_test.shape

(4689, 20, 32)

In [None]:
y_train_labels = pd.read_csv(intermediate_folder / 'train_main_1_sec_labels.csv', header=None, index_col=False)[0]
y_val_labels = pd.read_csv(intermediate_folder / 'val_main_1_sec_labels.csv', header=None, index_col=False)[0]
# y_test_labels = pd.read_csv(intermediate_folder / 'test_main_1_sec_labels.csv', header=None, index_col=False)[0]
y_train_labels.shape, y_val_labels.shape
# y_train_labels.shape, y_val_labels.shape, y_test_labels.shape

((33566,), (4619,), (4689,))

In [None]:
le = LabelEncoder()
le.fit(y_train_labels)

y_train = le.transform(y_train_labels)
y_val = le.transform(y_val_labels)
# y_test = le.transform(y_test_labels)
y_train.shape, y_val.shape
# y_train.shape, y_val.shape, y_test.shape

In [None]:
pd.Series(y_train).value_counts().sort_index()

## RNN - 50

In [None]:
lstm = Sequential([LSTM(50, input_shape=(32, 20)),
                  Dense(20, activation='softmax')]
                  )
lstm.summary()

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.4min finished


              precision    recall  f1-score   support

        down       0.05      0.04      0.05       247
       eight       0.04      0.03      0.03       226
        five       0.06      0.07      0.07       224
        four       0.07      0.06      0.07       261
          go       0.03      0.03      0.03       231
        left       0.03      0.04      0.04       233
        nine       0.06      0.07      0.06       215
          no       0.05      0.04      0.04       235
         off       0.05      0.06      0.05       232
          on       0.03      0.03      0.03       234
         one       0.04      0.03      0.03       209
       right       0.05      0.06      0.05       235
       seven       0.05      0.04      0.05       240
         six       0.07      0.09      0.08       241
        stop       0.05      0.04      0.05       225
       three       0.06      0.07      0.07       225
         two       0.05      0.06      0.05       214
          up       0.05    

In [None]:
lstm.input_shape, lstm.output_shape

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5)

In [None]:
learning_rate = 0.001

lstm.compile(optimizer=Adam(learning_rate=learning_rate),
             loss='sparse_categorical_crossentropy',
             metrics=['accuracy'])

## Report

In [None]:
y_pred_lstm = lstm.predict(X_val[:, :, None])
print(classification_report(y_val, y_pred_lstm.argmax(axis=1)))