## Notes from meeting on april 8th
Use NN.Embeddings instead of one hot
Randomize the voices

##### What to do:
1. Audio Augmentation
2. Add Noice to Allas output
3. Similarity Augmentation


## By next week

Try Akshat's Model on 2 and 4 intents

#### Try different distance measures: Cosine & Eqculidain



In [29]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

import numpy as np
import math
from tqdm import tqdm
from sklearn.metrics import accuracy_score

from utils.data import *
from utils.models import *

# Dataloaders

In [4]:
def get_data(path_to_data, speakers, recordings, voices, classes, feats=False):
        X = []
        Y = []
        for speaker in speakers:
            for record in recordings.keys():
                for voice in voices:
                    x_path = path_to_data+str(speaker)+"/spchdatadir/recording"+str(record)+"/voice_"+str(voice)+".wav"
                    transcript = recognize(x_path, feats)
                    if not feats:
                        x = encode(transcript)
                        x = np.pad(x, ((math.ceil((28-x.shape[0])/2), int((28-x.shape[0])/2)), (0, 0)))
                    else:
                        x = transcript
                    X.append(x)
                    Y.append(classes[recordings[record]])
        return X, Y

In [48]:
class GarboSamples(torch.utils.data.Dataset):

    def __init__(self, path_to_data, speakers, voices, recordings, classes, feats=False): # You can use partition to specify train or dev
        self.Xs, self.Ys = get_data(path_to_data, speakers, recordings, voices, classes, feats)
        assert(len(self.Xs) == len(self.Ys))
        self.length = len(self.Xs)

    def __len__(self):
        return self.length

    def __getitem__(self, ind):

        X = self.Xs[ind]
        Y = self.Ys[ind]

        Yy = torch.tensor(Y, dtype=torch.long).view(-1, 1)

        return torch.from_numpy(X).float(), Yy

    def collate_fn(self, batch):

        batch_x = [x for x, y in batch]
        batch_y = [y for x, y in batch]

        batch_x_pad = pad_sequence(batch_x, batch_first=True)
        lengths_x = [len(x) for x in batch_x]
        
        batch_y_pad = pad_sequence(batch_y, batch_first=True) 
        lengths_y = [len(y) for y in batch_y] 

        return batch_x_pad, batch_y_pad, torch.tensor(lengths_x).type(torch.int), torch.tensor(lengths_y)

# Baseline Model

In [43]:
class Baseline(nn.Module):
    def __init__(self, input_channels, num_classes):
        super(Baseline, self).__init__()

        self.CNN = nn.Sequential(
            nn.Conv1d(in_channels=input_channels,out_channels= 256, kernel_size=3),
            nn.ReLU(inplace=True)
        )
        self.lstm = nn.LSTM(input_size=256, hidden_size=256, bidirectional=False)
        self.classify = nn.Sequential(
            nn.Linear(in_features=int(256*26), out_features=num_classes),
            nn.Softmax()
        )
    def forward(self, x):
        cnn_input = x.permute(0, 2, 1)
        cnn_out = self.CNN(cnn_input)
        lstm_in = cnn_out.permute(0, 2, 1)
        out1, (out2, out3) = self.lstm(lstm_in)
        linear_input = out1.reshape(BATCH_SIZE, -1)
        output = self.classify(linear_input)
        
        return output


In [44]:
path_to_data = "Data/Speakers/pp"
train_speakers = range(2,3)
val_speakers = [9, 10]
test_speakers = [11, 12]
recordings = {29:"Lift",
            1:"Approach"}
classes = {"Lift":0, 
        "Grap":1, 
        "Point":2, 
        "Approach":3}
voices = range(1, 8)
feats = True

"""

            33:"Grap", 
            36:"Point",
"""

'\n\n            33:"Grap", \n            36:"Point",\n'

In [45]:
BATCH_SIZE=2
train_data = GarboSamples(path_to_data,train_speakers, voices, recordings, classes, feats)
val_data = GarboSamples(path_to_data,val_speakers, voices, recordings, classes, feats)
test_data = GarboSamples(path_to_data, test_speakers, voices, recordings, classes, feats)
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, collate_fn=train_data.collate_fn)
val_loader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=True, collate_fn=val_data.collate_fn)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False, collate_fn=test_data.collate_fn)


In [46]:
print("Train dataset samples = {}, batches = {}".format(train_data.__len__(), len(train_loader)))
print("Val dataset samples = {}, batches = {}".format(val_data.__len__(), len(val_loader)))
print("Test dataset samples = {}, batches = {}".format(test_data.__len__(), len(test_loader)))

Train dataset samples = 14, batches = 7
Val dataset samples = 28, batches = 14
Test dataset samples = 28, batches = 14


In [47]:
for data in val_loader:
    x, y, lx, ly = data # if you face an error saying "Cannot unpack", then you are not passing the collate_fn argument
    print(x.shape, y.shape, lx.shape, ly.shape)
    break

torch.Size([1, 1])
torch.Size([2, 85, 120]) torch.Size([2, 1, 1]) torch.Size([2]) torch.Size([2])


In [351]:
epochs = 50
model=Baseline(229, 4)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=(len(train_loader) * epochs))

In [303]:
writer = SummaryWriter()
for x, y in train_loader:
    writer.add_graph(model, x)
    break

  return forward_call(*input, **kwargs)
  input = module(input)


## Train

In [352]:
for epoch in range(epochs):
    model.train()
    total_loss = 0
    predictions = []
    trues = []
    for i,(x, y) in enumerate(train_loader):

        optimizer.zero_grad()
        output = model(x)

        loss = criterion(output, y)
        total_loss+=loss
        loss.backward()
        optimizer.step()
        pred = np.argmax(output.detach().numpy())
        predictions.append(pred)
        trues.append(y.flatten()[0].numpy())
        scheduler.step()
        
        del x
        del y
        del loss
    


  input = module(input)


## Validate

In [353]:
batch_bar = tqdm(total=len(val_loader), dynamic_ncols=True, leave=False, position=0, desc='Validation')
total_loss=0
predictions = []
trues = []
for i,(x, y) in enumerate(val_loader):

    with torch.no_grad():
        output = model(x)
        
    loss = criterion(output, y)
    total_loss+=loss
    pred = np.argmax(output.detach().numpy())
    predictions.append(pred)
    trues.append(y.flatten()[0].numpy())
    
    
    batch_bar.set_postfix(
        loss="{:.04f}".format(float(total_loss / (i + 1))))
    batch_bar.update()
    del x
    del y
    del loss
print("Validation Loss {:.04f}, Validation Accuracy {:.04f}".format(
        float(total_loss / len(val_loader)),
        float(accuracy_score(predictions, trues))))


Validation:  93%|█████████▎| 13/14 [00:00<00:00, 132.58it/s, loss=0.8865]

Validation Loss 0.8865, Validation Accuracy 1.0000


## Test

In [354]:
predictions = []
trues = []
batch_bar = tqdm(total=len(test_loader), dynamic_ncols=True, leave=False, position=0, desc='Test')
total_loss=0
for i,(x,y) in enumerate(test_loader):
    with torch.no_grad():
        output = model(x)
    
    pred = np.argmax(output.detach().numpy())
    predictions.append(pred)
    trues.append(y.flatten()[0].numpy())
    
    batch_bar.set_postfix(
        progress="{:.04f}".format(float(i / len(test_loader))))
    batch_bar.update()
    del x
    del y
print("Testing Accuracy: {:.04f}".format(accuracy_score(predictions, trues)))


  input = module(input)
Test:  93%|█████████▎| 13/14 [00:00<00:00, 155.83it/s, progress=0.9286]

Testing Accuracy: 0.9286
