# Training and Evaluating the Model

This script will be used to train and evaluate an Ensemble Extra Trees model on featurized data. The model will then be saved to a pickle file.

In [1]:
import os
import pickle
import numpy as np
import pandas as pd
import librosa
import librosa.display

from IPython import display
from matplotlib import pyplot
from sklearn.ensemble import BaggingClassifier, ExtraTreesClassifier

In [2]:
# A simple wrapper class for (1-channel) audio data
# data is a 1-D NumPy array containing the data
# rate is a number expressing the samples per second
class Audio:
    def __init__(self, data, rate):
        self.data = data
        self.rate = rate
    def play(self):
        return display.Audio(self.data, rate=self.rate)
    def plot_wave(self):
        librosa.display.waveplot(self.data, sr=self.rate)
    def plot_spectrum(self):
        n_fft = int(self.rate / 20)
        D = librosa.amplitude_to_db(np.abs(librosa.stft(self.data, n_fft)), ref=np.max)
        librosa.display.specshow(D, y_axis='linear', sr=self.rate, hop_length=n_fft/4)
    @classmethod
    def fromfile(cls, fn):
        return cls(*librosa.load(fn, sr=None))

In [3]:
# Credit to Yihui Xiong, 2017, 
# https://github.com/xiongyihui/tdoa/blob/master/gcc_phat.py
def gcc_phat(sig, refsig, fs=1, max_tau=None, interp=16):
    '''
    This function computes the offset between the signal sig and the reference signal refsig
    using the Generalized Cross Correlation - Phase Transform (GCC-PHAT)method.
    '''
    
    # make sure the length for the FFT is larger or equal than len(sig) + len(refsig)
    n = sig.shape[0] + refsig.shape[0]

    # Generalized Cross Correlation Phase Transform
    SIG = np.fft.rfft(sig, n=n)
    REFSIG = np.fft.rfft(refsig, n=n)
    R = SIG * np.conj(REFSIG)

    cc = np.fft.irfft(R / np.abs(R), n=(interp * n))

    max_shift = int(interp * n / 2)
    if max_tau:
        max_shift = np.minimum(int(interp * fs * max_tau), max_shift)

    cc = np.concatenate((cc[-max_shift:], cc[:max_shift+1]))

    # find max cross correlation index
    shift = np.argmax(np.abs(cc)) - max_shift

    tau = shift / float(interp * fs)
    
    return tau, cc

In [4]:
# load training and testing data
train_df = pd.read_csv('data/featurized/featurized_train_data.csv')
test_df = pd.read_csv('data/featurized/featurized_test_data.csv')  

Facing Constitutes as -90 to 90

In [5]:
X_df = train_df.loc[:,list(map(lambda c: c not in ['sample', 'trial', 'angle_pos', 'dov'], list(train_df.columns)))]
X = X_df.values
y_df = train_df.loc[:,"dov"]
y_df = y_df.map(lambda x: int(x in [0,45,90,270,315]))
y = y_df.values
X[np.isnan(X)] = 0

In [18]:
tc = ExtraTreesClassifier(n_estimators=1000)
tc_fitted = BaggingClassifier(tc).fit(X,y)

In [19]:
Z = tc_fitted.predict(X) - y

# training error
(Z.T @ Z)/Z.size

0.001736111111111111

In [8]:
X_df_t = test_df.loc[:,list(map(lambda c: c not in ['sample', 'trial', 'angle_pos', 'dov'], list(test_df.columns)))]
Xt = X_df_t.values
y_df_t = test_df.loc[:,"dov"]
y_df_t = y_df_t.map(lambda x: int(x in [0,45,90,270,315]))
yt = y_df_t.values
Xt[np.isnan(Xt)] = 0

In [9]:
Zt = (tc_fitted.predict(Xt) - yt)

# testing accuracy
1 - ((Zt.T @ Zt) / Zt.size)

0.796875

In [10]:
filename = 'models/model-90to90.sav'
pickle.dump(tc_fitted, open(filename, 'wb'))

Facing Constitutes as -45 to 45

In [11]:
X_df = train_df.loc[:,list(map(lambda c: c not in ['sample', 'trial', 'angle_pos', 'dov'], list(train_df.columns)))]
X = X_df.values
y_df = train_df.loc[:,"dov"]
y_df = y_df.map(lambda x: int(x in [0,45,315]))
y = y_df.values
X[np.isnan(X)] = 0

In [12]:
tc = ExtraTreesClassifier(n_estimators=1000)
tc_fitted = BaggingClassifier(tc).fit(X,y)

In [13]:
Z = tc_fitted.predict(X) - y

# training error
(Z.T @ Z)/Z.size

0.0008680555555555555

In [14]:
X_df_t = test_df.loc[:,list(map(lambda c: c not in ['sample', 'trial', 'angle_pos', 'dov'], list(test_df.columns)))]
Xt = X_df_t.values
y_df_t = test_df.loc[:,"dov"]
y_df_t = y_df_t.map(lambda x: int(x in [0,45,315]))
yt = y_df_t.values
Xt[np.isnan(Xt)] = 0

In [15]:
Zt = (tc_fitted.predict(Xt) - yt)

# testing accuracy
1 - ((Zt.T @ Zt) / Zt.size)

0.8194444444444444

In [16]:
filename = 'models/model-45to45.sav'
pickle.dump(tc_fitted, open(filename, 'wb'))