In [1]:
# ! pip install --user librosa

In [2]:
from pathlib import Path
from scipy.io import wavfile
import scipy.signal
import pandas as pd
from tqdm.auto import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np
import os
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier
import librosa

## Read data

In [3]:
intermediate_folder = Path('..') / 'data' / 'intermediate'

In [4]:
X_train = np.load(intermediate_folder / 'train_main_1_sec_audio_mfcc.npy')
X_train.shape

(33566, 20, 32)

In [5]:
X_val = np.load(intermediate_folder / 'val_main_1_sec_audio_mfcc.npy')
X_val.shape

(4619, 20, 32)

In [6]:
X_test = np.load(intermediate_folder / 'test_main_1_sec_audio_mfcc.npy')
X_test.shape

(4689, 20, 32)

In [7]:
y_train = pd.read_csv(intermediate_folder / 'train_main_1_sec_labels.csv', header=None, index_col=False)[0]
y_val = pd.read_csv(intermediate_folder / 'val_main_1_sec_labels.csv', header=None, index_col=False)[0]
y_test = pd.read_csv(intermediate_folder / 'test_main_1_sec_labels.csv', header=None, index_col=False)[0]
y_train.shape, y_val.shape, y_test.shape

((33566,), (4619,), (4689,))

In [9]:
X_train.shape, X_train.reshape(X_train.shape[0], -1).shape

((33566, 20, 32), (33566, 640))

## Flat LogisticRegression

In [22]:
lr_def = LogisticRegression(verbose=1, max_iter=300)
lr_def.fit(X_train.reshape(X_train.shape[0], -1), y_train)
y_pred_lr = lr_def.predict(X_val.reshape(X_val.shape[0], -1))
print(classification_report(y_val, y_pred_lr))

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.4min finished


              precision    recall  f1-score   support

        down       0.05      0.04      0.05       247
       eight       0.04      0.03      0.03       226
        five       0.06      0.07      0.07       224
        four       0.07      0.06      0.07       261
          go       0.03      0.03      0.03       231
        left       0.03      0.04      0.04       233
        nine       0.06      0.07      0.06       215
          no       0.05      0.04      0.04       235
         off       0.05      0.06      0.05       232
          on       0.03      0.03      0.03       234
         one       0.04      0.03      0.03       209
       right       0.05      0.06      0.05       235
       seven       0.05      0.04      0.05       240
         six       0.07      0.09      0.08       241
        stop       0.05      0.04      0.05       225
       three       0.06      0.07      0.07       225
         two       0.05      0.06      0.05       214
          up       0.05    

In [26]:
lr_def = LogisticRegression(verbose=1, max_iter=1000)
lr_def.fit(X_train.reshape(X_train.shape[0], -1), y_train)
y_pred_lr = lr_def.predict(X_val.reshape(X_val.shape[0], -1))
print(classification_report(y_val, y_pred_lr))

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  6.0min finished


              precision    recall  f1-score   support

        down       0.07      0.07      0.07       247
       eight       0.03      0.02      0.02       226
        five       0.07      0.08      0.07       224
        four       0.06      0.05      0.06       261
          go       0.06      0.06      0.06       231
        left       0.04      0.04      0.04       233
        nine       0.04      0.05      0.04       215
          no       0.07      0.07      0.07       235
         off       0.05      0.05      0.05       232
          on       0.05      0.04      0.04       234
         one       0.04      0.03      0.04       209
       right       0.05      0.05      0.05       235
       seven       0.07      0.06      0.07       240
         six       0.06      0.07      0.07       241
        stop       0.04      0.03      0.03       225
       three       0.05      0.05      0.05       225
         two       0.04      0.05      0.04       214
          up       0.03    

## Flat Random Forest

In [27]:
rf = RandomForestClassifier(n_jobs=-1, verbose=1, min_impurity_decrease=.001)
rf.fit(X_train.reshape(X_train.shape[0], -1), y_train)
y_pred_rf = rf.predict(X_val.reshape(X_val.shape[0], -1))
print(classification_report(y_val, y_pred_rf))

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    6.4s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:    0.0s finished
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

        down       0.00      0.00      0.00       247
       eight       0.00      0.00      0.00       226
        five       0.00      0.00      0.00       224
        four       0.00      0.00      0.00       261
          go       0.00      0.00      0.00       231
        left       0.00      0.00      0.00       233
        nine       0.00      0.00      0.00       215
          no       0.00      0.00      0.00       235
         off       0.00      0.00      0.00       232
          on       0.00      0.00      0.00       234
         one       0.00      0.00      0.00       209
       right       0.00      0.00      0.00       235
       seven       0.00      0.00      0.00       240
         six       0.00      0.00      0.00       241
        stop       0.00      0.00      0.00       225
       three       0.00      0.00      0.00       225
         two       0.00      0.00      0.00       214
          up       0.00    

  _warn_prf(average, modifier, msg_start, len(result))


In [30]:
rf = RandomForestClassifier(n_jobs=-1, verbose=1, min_impurity_decrease=.00015)
rf.fit(X_train.reshape(X_train.shape[0], -1), y_train)
y_pred_rf = rf.predict(X_val.reshape(X_val.shape[0], -1))
print(classification_report(y_val, y_pred_rf))

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    6.1s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:   17.4s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.1s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:    0.2s finished


              precision    recall  f1-score   support

        down       0.14      0.02      0.03       247
       eight       0.00      0.00      0.00       226
        five       0.03      0.01      0.01       224
        four       0.02      0.00      0.01       261
          go       0.03      0.00      0.01       231
        left       0.12      0.01      0.02       233
        nine       0.06      0.11      0.08       215
          no       0.15      0.01      0.02       235
         off       0.00      0.00      0.00       232
          on       0.00      0.00      0.00       234
         one       0.00      0.00      0.00       209
       right       0.00      0.00      0.00       235
       seven       0.07      0.04      0.05       240
         six       0.06      0.09      0.07       241
        stop       0.08      0.06      0.07       225
       three       0.06      0.01      0.02       225
         two       0.10      0.01      0.02       214
          up       0.09    

In [24]:
rf = RandomForestClassifier(n_jobs=-1, verbose=1, min_impurity_decrease=.0001)
rf.fit(X_train.reshape(X_train.shape[0], -1), y_train)
y_pred_rf = rf.predict(X_val.reshape(X_val.shape[0], -1))
print(classification_report(y_val, y_pred_rf))

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:  2.8min finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:    0.0s finished


              precision    recall  f1-score   support

        down       0.06      0.06      0.06       247
       eight       0.05      0.05      0.05       226
        five       0.07      0.07      0.07       224
        four       0.03      0.03      0.03       261
          go       0.05      0.05      0.05       231
        left       0.05      0.04      0.04       233
        nine       0.05      0.07      0.06       215
          no       0.04      0.03      0.03       235
         off       0.05      0.04      0.04       232
          on       0.06      0.06      0.06       234
         one       0.04      0.04      0.04       209
       right       0.04      0.04      0.04       235
       seven       0.07      0.08      0.08       240
         six       0.10      0.10      0.10       241
        stop       0.05      0.06      0.06       225
       three       0.05      0.05      0.05       225
         two       0.05      0.05      0.05       214
          up       0.08    

In [31]:
rf = RandomForestClassifier(n_jobs=-1, verbose=1, min_impurity_decrease=.00005)
rf.fit(X_train.reshape(X_train.shape[0], -1), y_train)
y_pred_rf = rf.predict(X_val.reshape(X_val.shape[0], -1))
print(classification_report(y_val, y_pred_rf))

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:  3.6min finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:    0.1s finished


              precision    recall  f1-score   support

        down       0.07      0.06      0.07       247
       eight       0.03      0.03      0.03       226
        five       0.06      0.05      0.05       224
        four       0.05      0.04      0.04       261
          go       0.08      0.07      0.08       231
        left       0.05      0.06      0.06       233
        nine       0.04      0.05      0.05       215
          no       0.08      0.07      0.07       235
         off       0.06      0.06      0.06       232
          on       0.06      0.06      0.06       234
         one       0.03      0.03      0.03       209
       right       0.07      0.07      0.07       235
       seven       0.07      0.08      0.08       240
         six       0.06      0.07      0.07       241
        stop       0.05      0.06      0.06       225
       three       0.05      0.05      0.05       225
         two       0.06      0.06      0.06       214
          up       0.06    

In [25]:
rf = RandomForestClassifier(n_jobs=-1, verbose=1, min_impurity_decrease=.00001)
rf.fit(X_train.reshape(X_train.shape[0], -1), y_train)
y_pred_rf = rf.predict(X_val.reshape(X_val.shape[0], -1))
print(classification_report(y_val, y_pred_rf))

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:  2.8min finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:    0.1s finished


              precision    recall  f1-score   support

        down       0.04      0.05      0.05       247
       eight       0.05      0.06      0.05       226
        five       0.07      0.09      0.08       224
        four       0.06      0.06      0.06       261
          go       0.05      0.05      0.05       231
        left       0.05      0.06      0.05       233
        nine       0.05      0.07      0.06       215
          no       0.04      0.04      0.04       235
         off       0.09      0.09      0.09       232
          on       0.04      0.04      0.04       234
         one       0.05      0.04      0.04       209
       right       0.06      0.06      0.06       235
       seven       0.05      0.05      0.05       240
         six       0.02      0.02      0.02       241
        stop       0.04      0.04      0.04       225
       three       0.04      0.04      0.04       225
         two       0.08      0.07      0.07       214
          up       0.06    

In [32]:
rf = RandomForestClassifier(n_jobs=-1, verbose=1, min_impurity_decrease=.000001)
rf.fit(X_train.reshape(X_train.shape[0], -1), y_train)
y_pred_rf = rf.predict(X_val.reshape(X_val.shape[0], -1))
print(classification_report(y_val, y_pred_rf))

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:  4.1min finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:    0.2s finished


              precision    recall  f1-score   support

        down       0.05      0.06      0.05       247
       eight       0.04      0.05      0.04       226
        five       0.05      0.07      0.06       224
        four       0.08      0.08      0.08       261
          go       0.05      0.05      0.05       231
        left       0.05      0.06      0.05       233
        nine       0.06      0.08      0.07       215
          no       0.05      0.05      0.05       235
         off       0.07      0.06      0.07       232
          on       0.04      0.04      0.04       234
         one       0.08      0.08      0.08       209
       right       0.04      0.04      0.04       235
       seven       0.05      0.04      0.05       240
         six       0.05      0.05      0.05       241
        stop       0.06      0.06      0.06       225
       three       0.03      0.03      0.03       225
         two       0.03      0.03      0.03       214
          up       0.06    