In [8]:
import numpy as np
import matplotlib.pyplot as plt
import os, json, random, glob
import librosa
from argparse import Namespace


In [9]:
args = Namespace(
    # Data and path information
    data_path="Data/mfcc_samples.json",
    save_dir="Model/",
    # Model hyper parameter
    hidden_size = 64,
    input_size = 32,
    num_classes = 5,
    # Training hyper parameter
    num_epochs=1000,
    learning_rate=0.001,
    seed=1337
)

args.input_size += args.hidden_size

np.random.seed(args.seed)
random.seed(args.seed)


In [11]:
def normalize_audio(audio):
    #audio = audio / np.max(np.abs(audio))
    for ind,freq in enumerate(audio):
        audio[ind] = freq / np.max(np.abs(freq))
    return audio


In [12]:
file_name = 'Test_Voices/' # folder dir
file_ext = '*.wav'
n_mfcc = 32
samples = []
longest = 0

for fn in glob.glob(os.path.join(file_name, file_ext)):
    sound_clip,s = librosa.load(fn)
    mfcc = librosa.feature.mfcc(y=sound_clip, sr=s, n_mfcc=n_mfcc)
    mfcc = normalize_audio(mfcc.T)
    name = fn.split("/")[-1].split(".")[0]
    longest = max(longest, mfcc.shape[0])
    samples.append((name, mfcc))


In [27]:
inFile = open('Model/RNNModel1.json', 'r')
weights = json.load(inFile)


wf = weights['wf']
wi = weights['wi']
wc = weights['wc']
wo = weights['wo']
wy = weights['wy']

bf = weights['bf']
bi = weights['bi']
bc = weights['bc']
bo = weights['bo']
by = weights['by']

In [41]:
def softmax(arr):
    c = np.clip(arr, -700, 700) # float64 maximum expotentiable value
    e = np.exp(c)
    return e / np.sum(e, axis=1, keepdims=True)

def sigmoid(arr):
    c = np.clip(arr, -700, 700)
    return 1 / (1 + np.exp(-c))

def tanh(arr):
    c = np.clip(arr, -350, 350)
    return 2 / (1 + np.exp(-2 * c)) - 1


def LSTM_Cell(input_val):
    batch_num = input_val.shape[1]
    caches = []
    states = []
    states.append([np.zeros([batch_num, args.hidden_size]), np.zeros([batch_num, args.hidden_size])])
    for x in input_val:
        c_prev, h_prev = states[-1]
        x = np.column_stack([x, h_prev])
        
        hf = sigmoid(np.matmul(x, wf) + bf)
        hi = sigmoid(np.matmul(x, wi) + bi)
        ho = sigmoid(np.matmul(x, wo) + bo)
        hc = tanh(np.matmul(x, wc) + bc)
    
        c = hf * c_prev + hi * hc
        h = ho * tanh(c)
        
        states.append([c, h])
        caches.append([x, hf, hi, ho, hc])
        
    return caches, states


In [42]:
outputs = []
for name, mfcc in samples:
    X = np.expand_dims(mfcc, axis=0)
    Xt = np.transpose(X, [1, 0, 2])

    caches, states = LSTM_Cell(Xt)
    c, h = states[-1]

    out = np.dot(h, wy) + by
    pred = softmax(out)
    predicted = np.argmax(pred,axis=1)
    
    outputs.append((name, pred))


In [48]:
f = open('predictions.txt', 'w')
correct, total = 0, 0
for name, out in outputs:
    out = out[0]
    label = 0
    total += 1
    if '1' in name:
        label = 0
    elif '2' in name:
        label = 1
    elif '3' in name:
        label = 2
    elif '4' in name:
        label = 3
    else:
        label = 4
    correct += (1 if label == np.argmax(np.array(out)) else 0)
    out = [str(round(num, 4)) for num in out]
    text = name + ' - ' + out[0] + ', ' + out[1] + ', ' + out[2] + ', ' + out[3] + ', ' + out[4] + '\n'
    f.write(text)
    
print('Accuracy on test - {}%'.format(correct / total * 100))
f.close()

Accuracy on test - 70.53571428571429%
