In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
from torch.utils.data import DataLoader
# from torch.utils.tensorboard import SummaryWriter

import numpy as np
import math
from tqdm import tqdm
from sklearn.metrics import accuracy_score

from utils.data import *
import torch.nn.functional as F

from utils.models import ICASSP1CNN, ICASSP2CNN, ICASSP3CNN

In [29]:
def get_data(path_to_data, speakers, recordings, voices, classes, get_close = False, closeness=2):
        X = []
        Y = []
        for speaker in speakers:
            for record in recordings.keys():
                for voice in voices:
                    x_path = path_to_data+str(speaker)+"/spchdatadir/recording"+str(record)+"/voice_"+str(voice)+".wav"
                    if get_close:
                        transcript = get_similar(x_path, closeness)
                    else:
                        transcript = get_transcript(x_path)
                    X.append(transcript)
                    Y.append(classes[recordings[record]])
        return X, Y

In [3]:
class GarboSamples(torch.utils.data.Dataset):

    def __init__(self, path_to_data, speakers, voices, recordings, classes, get_close = False, closeness=2): # You can use partition to specify train or dev
        self.Xs, self.Ys = get_data(path_to_data, speakers, recordings, voices, classes, feats, get_close, closeness)
        assert(len(self.Xs) == len(self.Ys))
        self.length = len(self.Xs)

    def __len__(self):
        return self.length

    def __getitem__(self, ind):

        X = self.Xs[ind]
        Y = self.Ys[ind]

        Yy = torch.tensor(Y, dtype=torch.long).view(-1, 1)
        Xx = torch.from_numpy(X).long()
        return Xx, Yy

    def collate_fn(self, batch):

        batch_x = [x for x, y in batch]
        batch_y = [y for x, y in batch]

        batch_x_pad = pad_sequence(batch_x, batch_first=True)
        lengths_x = [len(x) for x in batch_x]
        
        batch_y_pad = pad_sequence(batch_y, batch_first=True) 
        lengths_y = [len(y) for y in batch_y] 

        return batch_x_pad, batch_y_pad, torch.tensor(lengths_x).type(torch.int), torch.tensor(lengths_y)

In [5]:
path_to_data = "Data/Speakers/pp"
train_speakers = range(2,3)
val_speakers = [9, 10]
test_speakers = [11, 12]
two_intent_recordings = {29:"Lift",
            1:"Approach"}
four_intent_recordings = {
        29:"Lift",
        1:"Approach",
        33:"Grap", 
        36:"Point"
}
classes = {"Lift":0, 
        "Grap":3, 
        "Point":2, 
        "Approach":1}
voices = range(1, 8)
get_close = False
closeness = 2

In [5]:
BATCH_SIZE=2
train_data = GarboSamples(path_to_data,train_speakers, voices, two_intent_recordings, classes, get_close, closeness)
val_data = GarboSamples(path_to_data,val_speakers, voices, two_intent_recordings, classes, get_close, closeness)
test_data = GarboSamples(path_to_data, test_speakers, voices, two_intent_recordings, classes, get_close, closeness)
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, collate_fn=train_data.collate_fn)
val_loader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=True, collate_fn=val_data.collate_fn)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False, collate_fn=test_data.collate_fn)

In [6]:
print("Train dataset samples = {}, batches = {}".format(train_data.__len__(), len(train_loader)))
print("Val dataset samples = {}, batches = {}".format(val_data.__len__(), len(val_loader)))
print("Test dataset samples = {}, batches = {}".format(test_data.__len__(), len(test_loader)))

Train dataset samples = 14, batches = 7
Val dataset samples = 28, batches = 14
Test dataset samples = 28, batches = 14


In [7]:
for data in train_loader:
    x, y, lx, ly = data # if you face an error saying "Cannot unpack", then you are not passing the collate_fn argument
    print(x.shape, y.shape, lx.shape, ly.shape)
    print(y)
    break


torch.Size([2, 21]) torch.Size([2, 1, 1]) torch.Size([2]) torch.Size([2])
tensor([[[0]],

        [[1]]])


In [8]:
epochs = 50
model=ICASSP1CNN(229,label_size=2)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=(len(train_loader) * epochs))

In [9]:
for i,(data) in enumerate(train_loader):
    x, y, lx, ly = data
    optimizer.zero_grad()
    print(x.shape)
    output = model(x,lx)#.view(-1, x.size(2)), lx)
    print(output)
    break


torch.Size([2, 21])
tensor([[-0.0254, -0.0210],
        [-0.0047, -0.0105]], grad_fn=<AddmmBackward>)


In [19]:
two_intent = True
for epoch in range(epochs):
    model.train()
    total_loss = 0
    predictions = []
    trues = []
    batch_bar = tqdm(total=len(val_loader), dynamic_ncols=True, leave=False, position=0, desc='Train')
    for i, data in enumerate(train_loader):
        x, y, lx, ly = data

        optimizer.zero_grad()
        output = model(x,lx)

        loss = criterion(output, y.reshape(y.shape[0]))
        total_loss+=loss

        loss.backward()
        optimizer.step()

        pred = np.argmax(output.detach().numpy())
        predictions.append(pred)
        trues.append(y.flatten()[0].numpy())

        scheduler.step()
        
        batch_bar.set_postfix(
        loss="{:.04f}".format(float(total_loss / (i + 1))))
        batch_bar.update()
        
        del x
        del y
        del loss
    print("Epoch: {}, Training Loss {:.04f}, Training Accuracy {:.04f}".format(
        epoch+1,
        float(total_loss / len(train_loader)),
        float(accuracy_score(predictions, trues))))
batch_bar.close()
    

Train:  50%|█████     | 7/14 [00:02<00:02,  2.35it/s, loss=0.0004]    

Epoch: 1, Training Loss 0.0004, Training Accuracy 0.1429


Train:  50%|█████     | 7/14 [00:02<00:01,  3.62it/s, loss=0.0003]

Epoch: 2, Training Loss 0.0003, Training Accuracy 0.4286


Train:  50%|█████     | 7/14 [00:02<00:02,  2.87it/s, loss=0.0003]

Epoch: 3, Training Loss 0.0003, Training Accuracy 0.2857


Train:  50%|█████     | 7/14 [00:02<00:02,  2.64it/s, loss=0.0003]

Epoch: 4, Training Loss 0.0003, Training Accuracy 0.1429


Train:  50%|█████     | 7/14 [00:03<00:03,  2.32it/s, loss=0.0003]

Epoch: 5, Training Loss 0.0003, Training Accuracy 0.5714


Train:  50%|█████     | 7/14 [00:02<00:03,  2.30it/s, loss=0.0003]

Epoch: 6, Training Loss 0.0003, Training Accuracy 0.7143


Train:  50%|█████     | 7/14 [00:02<00:02,  2.65it/s, loss=0.0003]

Epoch: 7, Training Loss 0.0003, Training Accuracy 0.4286


Train:  50%|█████     | 7/14 [00:03<00:03,  2.10it/s, loss=0.0003]

Epoch: 8, Training Loss 0.0003, Training Accuracy 0.4286


Train:  50%|█████     | 7/14 [00:02<00:03,  2.16it/s, loss=0.0003]

Epoch: 9, Training Loss 0.0003, Training Accuracy 0.5714


Train:  50%|█████     | 7/14 [00:02<00:02,  3.04it/s, loss=0.0003]

Epoch: 10, Training Loss 0.0003, Training Accuracy 0.4286


Train:  50%|█████     | 7/14 [00:03<00:03,  2.22it/s, loss=0.0002]

Epoch: 11, Training Loss 0.0002, Training Accuracy 0.5714


Train:  50%|█████     | 7/14 [00:02<00:02,  2.63it/s, loss=0.0002]

Epoch: 12, Training Loss 0.0002, Training Accuracy 0.4286


Train:  50%|█████     | 7/14 [00:02<00:02,  3.01it/s, loss=0.0002]

Epoch: 13, Training Loss 0.0002, Training Accuracy 0.2857


Train:  50%|█████     | 7/14 [00:02<00:02,  3.31it/s, loss=0.0002]

Epoch: 14, Training Loss 0.0002, Training Accuracy 0.5714


Train:  50%|█████     | 7/14 [00:02<00:02,  2.46it/s, loss=0.0002]

Epoch: 15, Training Loss 0.0002, Training Accuracy 0.2857


Train:  50%|█████     | 7/14 [00:02<00:02,  3.03it/s, loss=0.0002]

Epoch: 16, Training Loss 0.0002, Training Accuracy 0.5714


Train:  50%|█████     | 7/14 [00:02<00:03,  2.30it/s, loss=0.0002]

Epoch: 17, Training Loss 0.0002, Training Accuracy 0.5714


Train:  50%|█████     | 7/14 [00:02<00:02,  3.24it/s, loss=0.0002]

Epoch: 18, Training Loss 0.0002, Training Accuracy 0.8571


Train:  50%|█████     | 7/14 [00:01<00:01,  3.85it/s, loss=0.0002]

Epoch: 19, Training Loss 0.0002, Training Accuracy 0.8571


Train:  50%|█████     | 7/14 [00:01<00:01,  4.44it/s, loss=0.0002]

Epoch: 20, Training Loss 0.0002, Training Accuracy 0.2857


Train:  50%|█████     | 7/14 [00:01<00:01,  4.07it/s, loss=0.0002]

Epoch: 21, Training Loss 0.0002, Training Accuracy 0.5714


Train:  50%|█████     | 7/14 [00:02<00:02,  3.21it/s, loss=0.0002]

Epoch: 22, Training Loss 0.0002, Training Accuracy 0.4286


Train:  50%|█████     | 7/14 [00:02<00:02,  3.33it/s, loss=0.0002]

Epoch: 23, Training Loss 0.0002, Training Accuracy 0.2857


Train:  50%|█████     | 7/14 [00:02<00:01,  3.51it/s, loss=0.0002]

Epoch: 24, Training Loss 0.0002, Training Accuracy 0.4286


Train:  43%|████▎     | 6/14 [00:01<00:02,  3.48it/s, loss=0.0002]

In [16]:
batch_bar = tqdm(total=len(val_loader), dynamic_ncols=True, leave=False, position=0, desc='Validation')
total_loss=0
predictions = []
trues = []
for i,data in enumerate(val_loader):
    x,y,lx,ly = data
    with torch.no_grad():
        output = model(x,lx)
        
    loss = criterion(output, y.reshape(y.shape[0]))
    total_loss+=loss
    pred = np.argmax(output.detach().numpy(),axis=1)
    for p in pred:
        predictions.append(p)
    for t in y.reshape(y.shape[0]):
        trues.append(int(t))

    
    batch_bar.set_postfix(
        loss="{:.04f}".format(float(total_loss / (i + 1))))
    batch_bar.update()
    del x
    del y
    del loss
batch_bar.close()

print("Validation Loss {:.04f}, Validation Accuracy {:.04f}".format(
        float(total_loss / len(val_loader)),
        float(accuracy_score(predictions, trues))))

                                                                        

Validation Loss 2.9693, Validation Accuracy 0.7500




In [18]:
predictions = []
trues = []
batch_bar = tqdm(total=len(test_loader), dynamic_ncols=True, leave=False, position=0, desc='Test')
total_loss=0
for i, data in enumerate(test_loader):
    x,y,lx,ly = data
    with torch.no_grad():
        output = model(x,lx)
    
    pred = np.argmax(output.detach().numpy(), axis=1)
    for p in pred:
        predictions.append(p)
    for t in y.reshape(y.shape[0]):
        trues.append(int(t))  

    batch_bar.set_postfix(
        progress="{:.04f}".format(float(i / len(test_loader))))
    batch_bar.update()
    del x
    del y
print("Testing Accuracy: {:.04f}".format(accuracy_score(predictions, trues)))


Test:  93%|█████████▎| 13/14 [00:00<00:00, 56.67it/s, progress=0.9286]

Testing Accuracy: 0.5714


Test: 100%|██████████| 14/14 [00:13<00:00, 56.67it/s, progress=0.9286]