In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader
# from torch.utils.tensorboard import SummaryWriter

import numpy as np
import math
from tqdm import tqdm

from utils.data import *
import torch.nn.functional as F

from utils.models import *

In [2]:
def get_data(path_to_data, speakers, recordings, voices, classes, less_prob = False, closeness=2):
        X = []
        Y = []
        for speaker in speakers:
            for record in recordings.keys():
                for voice in voices:
                    x_path = path_to_data+str(speaker)+"/spchdatadir/recording"+str(record)+"/voice_"+str(voice)+".wav"
                    if less_prob:
                        transcript = get_less_probable(x_path, closeness)
                    else:
                        transcript = get_transcript(x_path)
                    X.append(transcript)
                    Y.append(classes[recordings[record]])
        return X, Y

In [21]:
class GarboSamples(torch.utils.data.Dataset):

    def __init__(self, path_to_data, speakers, voices, recordings, classes, less_prob = False, closeness=2): # You can use partition to specify train or dev
        self.Xs, self.Ys = get_data(path_to_data, speakers, recordings, voices, classes, less_prob, closeness)
        assert(len(self.Xs) == len(self.Ys))
        self.length = len(self.Xs)

    def __len__(self):
        return self.length

    def __getitem__(self, ind):

        X = self.Xs[ind]
        Y = self.Ys[ind]

        Yy = torch.tensor(Y, dtype=torch.long).view(-1)
        Xx = torch.from_numpy(X).long()
        return Xx, Yy

    def collate_fn(self, batch):

        batch_x = [x for x, y in batch]
        batch_y = [y for x, y in batch]

        batch_x_pad = pad_sequence(batch_x, batch_first=True)
        lengths_x = [len(x) for x in batch_x]
        
        batch_y_pad = pad_sequence(batch_y, batch_first=True) 
        lengths_y = [len(y) for y in batch_y] 

        return batch_x_pad, batch_y_pad, torch.tensor(lengths_x).type(torch.int), torch.tensor(lengths_y)

In [22]:
path_to_data = "Data/Speakers/pp"
train_speakers = range(2,3)
val_speakers = [9, 10]
test_speakers = [11, 12]
two_intent_recordings = {29:"Lift",
            1:"Approach"}
four_intent_recordings = {
        29:"Lift",
        1:"Approach",
        33:"Grap", 
        36:"Point"
}
classes = {"Lift":0, 
        "Grap":3, 
        "Point":2, 
        "Approach":1}
voices = range(1, 8)
less_prob = False
closeness = 2

In [23]:
BATCH_SIZE=4
train_data = GarboSamples(path_to_data,train_speakers, voices, two_intent_recordings, classes, less_prob, closeness)
val_data = GarboSamples(path_to_data,val_speakers, voices, two_intent_recordings, classes, less_prob, closeness)
test_data = GarboSamples(path_to_data, test_speakers, voices, two_intent_recordings, classes, less_prob, closeness)
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, collate_fn=train_data.collate_fn)
val_loader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=True, collate_fn=val_data.collate_fn)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False, collate_fn=test_data.collate_fn)

In [24]:
print("Train dataset samples = {}, batches = {}".format(train_data.__len__(), len(train_loader)))
print("Val dataset samples = {}, batches = {}".format(val_data.__len__(), len(val_loader)))
print("Test dataset samples = {}, batches = {}".format(test_data.__len__(), len(test_loader)))

Train dataset samples = 14, batches = 4
Val dataset samples = 28, batches = 7
Test dataset samples = 28, batches = 7


In [30]:
for data in train_loader:
    x, y, lx, ly = data
    print(x.shape, y.shape, lx.shape, ly.shape)
    break


torch.Size([4, 21]) torch.Size([4, 1]) torch.Size([4]) torch.Size([4])
tensor([1, 0, 1, 0])
tensor([[1],
        [0],
        [1],
        [0]])


In [26]:
epochs = 50
model=ICASSP1CNN(229,label_size=2)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=(len(train_loader) * epochs))

In [27]:
for i,(data) in enumerate(train_loader):
    x, y, lx, ly = data
    optimizer.zero_grad()
    print(x.shape)
    output = model(x,lx)
    print(output)
    break


torch.Size([4, 21])
tensor([[-0.0148,  0.0204],
        [-0.0046,  0.0226],
        [-0.0115,  0.0186],
        [-0.0223,  0.0131]], grad_fn=<AddmmBackward>)


In [None]:
two_intent = True
for epoch in range(epochs):
    model.train()
    total_loss = 0
    num_corrects = 0
    batch_bar = tqdm(total=len(train_loader), dynamic_ncols=True, leave=False, position=0, desc='Train')
    for i, data in enumerate(train_loader):
        x, y, lx, ly = data

        optimizer.zero_grad()
        output = model(x,lx)

        loss = criterion(output, y.flatten())
        total_loss+=float(loss)

        loss.backward()
        optimizer.step()

        num_corrects += int((torch.argmax(output, axis=1) == y.flatten()).sum())

        scheduler.step()
        
        batch_bar.set_postfix(
        loss="{:.04f}".format(float(total_loss / (i + 1))),
        acc="{:0.4f}".format(float(100 * num_corrects / ((i + 1) * BATCH_SIZE))))
        batch_bar.update()
        
        del x
        del y
        del loss
    print("Epoch: {}/{}, Training Loss {:.04f}, Training Accuracy {:.04f}".format(
        epoch+1,
        epochs,
        float(total_loss / len(train_loader)),
        float(100 * num_corrects / (len(train_loader) * BATCH_SIZE))))
batch_bar.close()
    

In [34]:
batch_bar = tqdm(total=len(val_loader), dynamic_ncols=True, leave=False, position=0, desc='Validation')
total_loss=0
num_corrects = 0
for i,data in enumerate(val_loader):
    x,y,lx,ly = data
    with torch.no_grad():
        output = model(x,lx)
        
    loss = criterion(output, y.flatten())
    total_loss+=float(loss)
    num_corrects += int((torch.argmax(output, axis=1) == y.flatten()).sum())

    
    batch_bar.set_postfix(
        loss="{:.04f}".format(float(total_loss / (i + 1))),
        acc="{:0.4f}".format(float(100 * num_corrects / ((i + 1) * BATCH_SIZE))))
    batch_bar.update()
    del x
    del y
    del loss
batch_bar.close()

print("Validation Loss {:.04f}, Validation Accuracy {:.04f}".format(
        float(total_loss / len(train_loader)),
        float(100 * num_corrects / (len(val_loader) * BATCH_SIZE))))

                                                                                   

Validation Loss 1.2171, Validation Accuracy 50.0000




In [33]:
predictions = []
total_loss=0
num_corrects = 0
for i, data in enumerate(test_loader):
    x,y,lx,ly = data
    with torch.no_grad():
        output = model(x,lx)
    
    pred = np.argmax(output.detach().numpy(), axis=1)
    num_corrects += int((torch.argmax(output, axis=1) ==y.flatten()).sum())

    del x
    del y
print("Testing Accuracy: {:.04f}".format(float(100 * num_corrects / (len(test_loader) * BATCH_SIZE))))


Testing Accuracy: 50.0000
