In [72]:
import numpy as np
import matplotlib.pyplot as plt
import os, json, random
from argparse import Namespace

In [73]:
args = Namespace(
    # Data and path information
    data_path="Data/mfcc_samples.json",
    save_dir="Model/",
    # Model hyper parameter
    hidden_size = 64,
    input_size = 32,
    num_classes = 5,
    # Training hyper parameter
    num_epochs=100,
    learning_rate=0.001,
    seed=1337
)

args.input_size += args.hidden_size

np.random.seed(args.seed)
random.seed(args.seed)



In [74]:
f = open(args.data_path, 'r')
samples = json.load(f)
f.close()
longest = 0
for num in samples.keys():
    for ind, mfcc in enumerate(samples[num]):
        longest = max(longest, len(mfcc))
        samples[num][ind] = np.asarray(mfcc)


In [69]:
s = set()
for elem in samples:
    print(elem)
    for arr in samples[elem]:
        if (len(arr) == 11):
            print(elem)
        s.add(len(arr))


1
1
2
3
4
5


In [75]:
train_samples, test_samples = [], []
train_size = 0.8
for num in samples:
    mfccs = samples[num]
    random.shuffle(mfccs)
    size = len(mfccs)
    train = mfccs[:int(size * train_size)]
    train = [(vec, num) for vec in train]
    test = mfccs[int(size * train_size):]
    test = [(vec, num) for vec in test]
    train_samples.extend(train)
    test_samples.extend(test)

random.shuffle(train_samples)
random.shuffle(test_samples)


In [76]:
# Model Weights

wf = np.random.randn(args.input_size, args.hidden_size) / np.sqrt(args.input_size / 2)
wi = np.random.randn(args.input_size, args.hidden_size) / np.sqrt(args.input_size / 2)
wc = np.random.randn(args.input_size, args.hidden_size) / np.sqrt(args.input_size / 2)
wo = np.random.randn(args.input_size, args.hidden_size) / np.sqrt(args.input_size / 2)
wy = np.random.randn(args.hidden_size, args.num_classes) / np.sqrt(args.hidden_size / 2)

bf = np.zeros(args.hidden_size)
bi = np.zeros(args.hidden_size)
bc = np.zeros(args.hidden_size)
bo = np.zeros(args.hidden_size)
by = np.zeros(args.num_classes)

# Initialize delta values

dwf = np.zeros_like(wf)
dwi = np.zeros_like(wi)
dwc = np.zeros_like(wc)
dwo = np.zeros_like(wo)
dwy = np.zeros_like(wy)

dbf = np.zeros_like(bf)
dbi = np.zeros_like(bi)
dbc = np.zeros_like(bc)
dbo = np.zeros_like(bo)
dby = np.zeros_like(by)

In [77]:
def softmax(arr):
    c = np.clip(arr, -700, 700) # float64 maximum expotentiable value
    e = np.exp(c)
    return e / np.sum(e, axis=1, keepdims=True)

def cross_entropy(out, label):
    entropy = label * np.log(out + 1e-6) # to prevent log value overflow
    return -np.sum(entropy, axis=1, keepdims=True)

def sigmoid(arr):
    c = np.clip(arr, -700, 700)
    return 1 / (1 + np.exp(-c))

def deriv_sigmoid(out):
    return out * (1 - out)

def tanh(arr):
    c = np.clip(arr, -350, 350)
    return 2 / (1 + np.exp(-2 * c)) - 1

def deriv_tanh(out):
    return 1 - np.square(out)

In [78]:
def LSTM_Cell(input_val):
    batch_num = input_val.shape[1]
    caches = []
    states = []
    states.append([np.zeros([batch_num, args.hidden_size]), np.zeros([batch_num, args.hidden_size])])
    
    for x in input_val:
        c_prev, h_prev = states[-1]
    
        x = np.column_stack([x, h_prev])
        hf = sigmoid(np.matmul(x, wf) + bf)
        zd = np.matmul(x, wf)
        hi = sigmoid(np.matmul(x, wi) + bi)
        ho = sigmoid(np.matmul(x, wo) + bo)
        hc = tanh(np.matmul(x, wc) + bc)
    
        c = hf * c_prev + hi * hc
        h = ho * tanh(c)
    
        states.append([c, h])
        caches.append([x, hf, hi, ho, hc])
        
    return caches, states
    


In [79]:
def predict(sample):
    
    input_val = np.transpose(sample, [1, 0, 2])
    
    caches, states = LSTM_Cell(input_val)
    c, h = states[-1]
    
    pred = softmax(np.dot(h, wy) + by)
    label = np.argmax(pred)
    
    return label

In [80]:
for i in range(args.num_epochs):
    loss, correct = 0, 0
    size = len(train_samples)
    for sample, target in train_batched:
        X = sample
        Y = np.zeros((target.shape[0], args.num_classes))
        target = [int(t)-1 for t in target]
        for ind, num in enumerate(target):
            Y[ind][num] = 1

        Xt = np.transpose(X, [1, 0, 2])

        caches, states = LSTM_Cell(Xt)
        c, h = states[-1]
        #print(h[3], h[5])
        out = np.dot(h, wy) + by
        pred = softmax(out)
        loss += np.sum(cross_entropy(pred, Y))
        predicted = np.argmax(pred, axis=1)
        # calculate accuracy
        correct += sum(predicted == target)
        

        # Backpropagation Through Time
        dout = pred - Y
        dwy = np.dot(h.T, dout)
        dby = np.sum(dout, axis=0)

        dc_next = np.zeros_like(c)
        dh_next = np.zeros_like(h)

        for t in range(Xt.shape[0]):
            c, h = states[-t-1]
            c_prev, h_prev = states[-t-2]

            x, hf, hi, ho, hc = caches[-t-1]

            tc = tanh(c)
            dh = np.dot(dout, wy.T) + dh_next

            dc = dh * ho * deriv_tanh(tc)
            dc = dc + dc_next

            dho = dh * tc 
            dho = dho * deriv_sigmoid(ho)

            dhf = dc * c_prev 
            dhf = dhf * deriv_sigmoid(hf)

            dhi = dc * hc 
            dhi = dhi * deriv_sigmoid(hi)

            dhc = dc * hi 
            dhc = dhc * deriv_tanh(hc)

            dwf += np.dot(x.T, dhf)
            dbf += np.sum(dhf, axis=0)
            dXf = np.dot(dhf, wf.T)

            dwi += np.dot(x.T, dhi)
            dbi += np.sum(dhi, axis=0)
            dXi = np.dot(dhi, wi.T)

            dwo += np.dot(x.T, dho)
            dbo += np.sum(dho, axis=0)
            dXo = np.dot(dho, wo.T)

            dwc += np.dot(x.T, dhc)
            dbc += np.sum(dhc, axis=0)
            dXc = np.dot(dhc, wc.T)

            dX = dXf + dXi + dXo + dXc

            dc_next = hf * dc
            dh_next = dX[:, -args.hidden_size:]

            # Update weights        
            """
            rame = args.learning_rate * dwf
            rame = rame.tolist()
            for ind, ram in enumerate(rame):
                rame[ind] = [round(r, 5) for r in ram]
            print(rame)
            """
            wf -= args.learning_rate * dwf
            wi -= args.learning_rate * dwi
            wc -= args.learning_rate * dwc
            wo -= args.learning_rate * dwo
            wy -= args.learning_rate * dwy

            bf -= args.learning_rate * dbf
            bi -= args.learning_rate * dbi
            bc -= args.learning_rate * dbc
            bo -= args.learning_rate * dbo
            by -= args.learning_rate * dby

            # Initialize delta values
            dwf *= 0
            dwi *= 0
            dwc *= 0
            dwo *= 0
            dwy *= 0

            dbf *= 0
            dbi *= 0
            dbc *= 0
            dbo *= 0
            dby *= 0


    if (i+1) % 10 == 0:
        print('Iteration', i)
        print('Training Loss', round(loss / size, 5))
        print('Training Accuracy', round(correct / size * 100, 3), '%')
    """
    loss, correct = 0, 0
    size = len(test_samples)
    for sample, target in test_samples:
        X = np.expand_dims(sample, axis=0)
        Y = np.zeros((args.num_classes,))
        target = int(target) - 1
        Y[target] = 1
        Xt = np.transpose(X, [1, 0, 2])

        caches, states = LSTM_Cell(Xt)
        c, h = states[-1]

        out = np.dot(h, wy) + by
        pred = softmax(out)
        loss += np.sum(cross_entropy(pred, Y))
        predicted = np.argmax(pred)
        
    print('Validation Loss', round(loss / size, 5))
    print('Validation Accuracy', round(correct / size * 100, 3), '%')
    print('----------')
    """

Iteration 9
Training Loss 1.64797
Training Accuracy 20.27 %
Iteration 19
Training Loss 1.64264
Training Accuracy 18.919 %
Iteration 29
Training Loss 1.64091
Training Accuracy 20.27 %
Iteration 39
Training Loss 1.64119
Training Accuracy 18.919 %
Iteration 49
Training Loss 1.64327
Training Accuracy 17.568 %
Iteration 59
Training Loss 1.64759
Training Accuracy 17.568 %
Iteration 69
Training Loss 1.64799
Training Accuracy 14.865 %
Iteration 79
Training Loss 1.6408
Training Accuracy 16.216 %
Iteration 89
Training Loss 1.64746
Training Accuracy 16.216 %
Iteration 99
Training Loss 1.64902
Training Accuracy 16.216 %
