In [1]:
import os, random
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

import numpy as np
import json
from argparse import Namespace


In [7]:
class VoiceDataset(Dataset):
    def __init__(self, samples, longest, train_size=0.8):
        super(VoiceDataset, self).__init__()
        self.longest = longest
        self.train_size = train_size
        train_samples, test_samples = self.train_test_split(samples)
        self.lookup_dict = {'train' : (train_samples, len(train_samples)), 
                           'test' : (test_samples, len(test_samples))}
        self.set_split('train')
        
    def train_test_split(self, samples):
        train_samples, test_samples = [], []
        for num in samples:
            mfccs = samples[num]
            #random.shuffle(mfccs)
            size = len(mfccs)
            train = mfccs[:int(size * self.train_size)]
            train = [(vec, length, num) for vec, length in train]
            test = mfccs[int(size * self.train_size):]
            test = [(vec, length, num) for vec, length in test]
            train_samples.extend(train)
            test_samples.extend(test)
        
        return train_samples, test_samples
    
    def set_split(self, split='train'):
        self.samples, self.length = self.lookup_dict[split]
    
    def __len__(self):
        return self.length

    def __getitem__(self, index):
        mfcc, length, num = self.samples[index]
        num = int(num)-1
        length = int(length)
        return {'x_data' : mfcc,
            'y_target' : num,
               'x_len' : length}
    
    @classmethod
    def create_dataset(cls, path):
        data_path = os.path.join('Data', 'mfcc_samples.json')
        f = open(data_path, 'r')
        samples = json.load(f)
        f.close()
        longest = 0
        for num in samples.keys():
            for ind, mfcc in enumerate(samples[num]):
                longest = max(longest, len(mfcc))
                samples[num][ind] = np.asarray(mfcc)
        
        dim = samples['1'][0].shape[1]
        
        for num in samples.keys():
            for ind, mfcc in enumerate(samples[num]):
                length = len(mfcc)
                to_pad = np.zeros((longest-length, dim))
                samples[num][ind] = (np.vstack((mfcc, to_pad)), length)                
        
        return cls(samples, longest)

    def get_num_batches(self, batch_size):
        return len(self) // batch_size
    
def generate_batches(dataset, batch_size, shuffle=True, drop_last=True, device="cpu"):
    """
        Generates batches using Pytorch's DataLoader and sets the device location to each tensor
    """
    dataloader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)
    for data_dict in dataloader:
        out_data_dict = {}
        for name, tensor in data_dict.items():
            out_data_dict[name] = data_dict[name].to(device)
        yield data_dict

        

In [8]:
def gather_columns(y_out, x_lengths):
    """
    Gets a vector, that's at the position indicated 
    by the corresponding value in x_lengths, from each batch datapoint in y_out.
    """
    x_lengths = x_lengths.long().detach().cpu().numpy() - 1
    out = []
    for batch_index, column_index in enumerate(x_lengths):
        out.append(y_out[batch_index, column_index])

    return torch.stack(out)


class RNN(nn.Module):
    def __init__(self, input_size, num_classes, hidden_size, dropout_rate=0.1, batch_first=True):
        super(RNN, self).__init__()
        
        self.rnn = nn.GRU(input_size=input_size, hidden_size=hidden_size, batch_first=batch_first)
        self.linear1 = nn.Linear(in_features=hidden_size, out_features=32)
        self.linear2 = nn.Linear(in_features=32, out_features=num_classes)
        self.softmax = nn.Softmax(dim=1)
        self.dropout = nn.Dropout(p=dropout_rate)
        self.relu = nn.ReLU()
        
    def forward(self, x_in, x_lengths=None, apply_softmax=False):
        out, _ = self.rnn(x_in)
        
        if x_lengths is not None:
            output = gather_columns(out, x_lengths)
        else:
            output = out[:, -1, :]
            
        output = self.linear1(output)
        output = self.dropout(output)
        output = self.relu(output)
        output = self.linear2(output)
        if apply_softmax:
            output = self.softmax(output)
        return output

    

In [9]:
def set_seed_everywhere(seed, cuda):
    """
        Sets seed for reproducibility
    """
    np.random.seed(seed)
    torch.manual_seed(seed)
    random.seed(seed)
    if cuda:
        torch.cuda.manual_seed_all(seed)
        
def handle_dirs(dirpath):
    """
        Creates directories
    """
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)



In [10]:
args = Namespace(
    # Data and path information
    data_path="Data/mfcc_samples.json",
    save_dir="Model/",
    model_state_file="Model/model.pth",
    # Model hyper parameter
    hidden_size = 64,
    input_size = 32,
    num_classes = 5,
    # Training hyper parameter
    num_epochs=100,
    learning_rate=0.01,
    batch_size=64,
    early_stopping_criteria=3,
    seed=1337,
    # Runtime hyper parameter
    cuda=False
)

if not torch.cuda.is_available():
    args.cuda = False

args.device = torch.device('cuda' if args.cuda else 'cpu')
print('Using cuda : {}'.format(args.cuda))
set_seed_everywhere(args.seed, args.cuda)

handle_dirs(args.save_dir)


Using cuda : False


In [11]:
def make_train_state(args):
    return {'stop_early': False,
            'early_stopping_step': 0,
            'best_val': 1e8,
            'iteration': 0,
            'train_loss': [],
            'train_acc': [],
            'test_loss': [],
            'test_acc': [],
            'model_filename': args.model_state_file}

def update_train_state(args, model, train_state):
    # Updates train state after each epoch, 
    # coordinates early stopping and saves the best model
    if train_state['iteration'] == 0:
        torch.save(model.state_dict(), train_state['model_filename'])
        
    elif train_state['iteration'] > 0:
        last_loss, loss = train_state['test_loss'][-2:]

        if last_loss < loss:
            train_state['early_stopping_step'] += 1
            train_state['early_stop'] = \
                train_state['early_stopping_step'] >= args.early_stopping_criteria
        else:
            if loss < train_state['best_val']:
                torch.save(model.state_dict(), train_state['model_filename'])
                train_state['best_val'] = loss
            train_state['early_stopping_step'] = 0

    return train_state


In [12]:
dataset = VoiceDataset.create_dataset(path = args.data_path)
model = RNN(input_size = args.input_size, num_classes = args.num_classes, hidden_size = args.hidden_size)

loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
train_state = make_train_state(args)

In [13]:
dataset.set_split('train')
train_len = dataset.__len__()
dataset.set_split('test')
test_len = dataset.__len__()

In [14]:
def accuracy(output, actual):
    answers = np.argmax(output.detach().numpy(), axis=1)
    correct = 0
    for pred, ans in zip(actual, answers):
        if pred == ans:
            correct += 1
    return correct

In [15]:
try:
    for epoch_index in range(args.num_epochs):
        train_state['iteration'] = epoch_index

        dataset.set_split('train')
        model.train()
        train_loss, correct_train = 0, 0
        batches = generate_batches(dataset, args.batch_size)
        num_batches = dataset.get_num_batches(args.batch_size)

        for batch_index, batch_dict in enumerate(batches):
            # Clear the gradients
            optimizer.zero_grad()
            # Compute output
            output = model(x_in = batch_dict['x_data'].float(), x_lengths = batch_dict['x_len'])
            correct_train += accuracy(output, batch_dict['y_target'])
            # Calculate loss
            loss = loss_function(output, batch_dict['y_target'])
            train_loss += loss.item()
            # Produce gradients based on loss
            loss.backward()
            # Update gradients
            optimizer.step()

        train_state['train_loss'].append(round((train_loss / num_batches), 4))

        dataset.set_split('test')
        model.eval()
        test_loss, correct_test = 0, 0
        batches = generate_batches(dataset, args.batch_size)
        num_batches = dataset.get_num_batches(args.batch_size)

        for batch_index, batch_dict in enumerate(batches):
            output = model(x_in = batch_dict['x_data'].float(), x_lengths = batch_dict['x_len'])
            correct_test += accuracy(output, batch_dict['y_target'])
            loss = loss_function(output, batch_dict['y_target'])
            test_loss += loss.item()

        train_state['test_loss'].append(round((test_loss / num_batches), 4))

        print('Epoch - {}, train loss - {}, test_loss - {}'.format(epoch_index+1, train_state['train_loss'][-1], 
                                                                   train_state['test_loss'][-1]))
        train_state['train_acc'].append(round(correct_train / train_len * 100, 2))
        train_state['test_acc'].append(round(correct_test / test_len * 100, 2))
        print('Train Acc - {} %  |  Test Acc - {} %'.format(train_state['train_acc'][-1], 
                                                            train_state['test_acc'][-1]))

        train_state = update_train_state(args, model, train_state)
except KeyboardInterrupt:
    print("Exiting loop")


Epoch - 1, train loss - 1.6114, test_loss - 1.6087
Train Acc - 20.25 %  |  Test Acc - 21.06 %
Epoch - 2, train loss - 1.4776, test_loss - 1.1644
Train Acc - 30.48 %  |  Test Acc - 47.72 %
Epoch - 3, train loss - 0.918, test_loss - 0.8864
Train Acc - 59.84 %  |  Test Acc - 63.89 %
Epoch - 4, train loss - 0.584, test_loss - 0.6464
Train Acc - 77.14 %  |  Test Acc - 74.92 %
Epoch - 5, train loss - 0.3495, test_loss - 0.7463
Train Acc - 87.44 %  |  Test Acc - 75.84 %
Epoch - 6, train loss - 0.2176, test_loss - 0.6259
Train Acc - 92.24 %  |  Test Acc - 81.39 %
Epoch - 7, train loss - 0.1797, test_loss - 0.5013
Train Acc - 93.51 %  |  Test Acc - 84.42 %
Epoch - 8, train loss - 0.1612, test_loss - 0.4829
Train Acc - 94.25 %  |  Test Acc - 85.08 %
Epoch - 9, train loss - 0.1026, test_loss - 0.5621
Train Acc - 96.05 %  |  Test Acc - 84.55 %
Epoch - 10, train loss - 0.0788, test_loss - 0.4645
Train Acc - 96.89 %  |  Test Acc - 88.05 %
Epoch - 11, train loss - 0.0474, test_loss - 0.6125
Train Acc