In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader
import json
# from torch.utils.tensorboard import SummaryWriter

import numpy as np
from tqdm import tqdm

from utils.data import *
from utils.models import *

In [22]:
vocab = PHONE

In [32]:
def get_data(path_to_data, speakers, recordings, voices, classes, less_prob = False, closeness=2):
        X = []
        Y = []
        for speaker in speakers:
            for record in recordings.keys():
                for voice in voices:
                    x_path = path_to_data+str(speaker)+"/spchdatadir/recording"+str(record)+"/voice_"+str(voice)+".wav"
                    if less_prob:
                        transcript_second = get_less_probable(x_path, closeness)
                        transcript = get_transcript(x_path)
                        index = np.random.randint(low=1, high=len(transcript), size=2)
                        for ix  in index:
                            transcript[ix] = transcript_second[ix]
                    else:
                        transcript = get_transcript(x_path)
                    X.append(transcript)
                    Y.append(classes[recordings[record]])
        return X, Y

In [33]:
class GarboSamples(torch.utils.data.Dataset):

    def __init__(self, path_to_data, speakers, voices, recordings, classes, less_prob = False, closeness=2): # You can use partition to specify train or dev
        self.Xs, self.Ys = get_data(path_to_data, speakers, recordings, voices, classes, less_prob, closeness)
        assert(len(self.Xs) == len(self.Ys))
        self.length = len(self.Xs)

    def __len__(self):
        return self.length

    def __getitem__(self, ind):

        X = self.Xs[ind]
        Y = self.Ys[ind]

        Yy = torch.tensor(Y, dtype=torch.long).view(-1)
        Xx = torch.from_numpy(X).long()
        return Xx, Yy

    def collate_fn(self, batch):

        batch_x = [x for x, y in batch]
        batch_y = [y for x, y in batch]

        batch_x_pad = pad_sequence(batch_x, batch_first=True)
        lengths_x = [len(x) for x in batch_x]
        
        batch_y_pad = pad_sequence(batch_y, batch_first=True) 
        lengths_y = [len(y) for y in batch_y] 

        return batch_x_pad, batch_y_pad, torch.tensor(lengths_x).type(torch.int), torch.tensor(lengths_y)

In [34]:
def do_it_all(S, K, num_intents):
        path_to_data = "Data/Speakers/pp"
        train_speakers = range(2,S)
        val_speakers = [9, 10]
        test_speakers = [11, 12]
        two_intent_recordings = {29:"Lift",
                1:"Approach"}
        four_intent_recordings = {
                29:"Lift",
                1:"Approach",
                33:"Grap", 
                36:"Point"
        }
        classes = {"Lift":0, 
                "Grap":3, 
                "Point":2, 
                "Approach":1}
        voices = np.random.randint(low=1, high=14, size=K)
        less_prob = True
        closeness = 2
        BATCH_SIZE=4
        if num_intents == 2:
                rcrd = two_intent_recordings
        else:
                rcrd = four_intent_recordings
        train_data = GarboSamples(path_to_data,train_speakers, voices, rcrd, classes, less_prob, closeness)
        val_data = GarboSamples(path_to_data,val_speakers, voices, rcrd, classes, less_prob, closeness)
        test_data = GarboSamples(path_to_data, test_speakers, voices, rcrd, classes, less_prob, closeness)
        train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, collate_fn=train_data.collate_fn)
        val_loader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=True, collate_fn=val_data.collate_fn)
        test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False, collate_fn=test_data.collate_fn)

        epochs = 20
        model=ICASSP1CNN(229,label_size=num_intents)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.01)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=(len(train_loader) * epochs))

        for epoch in range(epochs):
                model.train()
                total_loss = 0
                num_corrects = 0
                for i, data in enumerate(train_loader):
                        x, y, lx, ly = data

                        optimizer.zero_grad()
                        output = model(x,lx)

                        loss = criterion(output, y.flatten())
                        total_loss+=float(loss)

                        loss.backward()
                        optimizer.step()

                        num_corrects += int((torch.argmax(output, axis=1) == y.flatten()).sum())

                        scheduler.step()

                        del x
                        del y
                        del loss
        total_loss=0
        num_corrects = 0
        for i,data in enumerate(val_loader):
                x,y,lx,ly = data
                with torch.no_grad():
                        output = model(x,lx)
                        
                loss = criterion(output, y.flatten())
                total_loss+=float(loss)
                num_corrects += int((torch.argmax(output, axis=1) == y.flatten()).sum())

                del x
                del y
                del loss

        predictions = []
        total_loss=0
        num_corrects = 0
        for i, data in enumerate(test_loader):
                x,y,lx,ly = data
                with torch.no_grad():
                        output = model(x,lx)
                
                pred = np.argmax(output.detach().numpy(), axis=1)
                predictions.append(pred)
                num_corrects += int((torch.argmax(output, axis=1) ==y.flatten()).sum())

                del x
                del y
        return float(100 * num_corrects / (len(test_loader) * BATCH_SIZE))
        

In [None]:
log = {}
for s in [6, 7, 8, 9]:
    for k in range(1, 8):
        scores = []
        for i in range(3):
            scores.append(do_it_all(s, k, num_intents=4))
        avg_scores = np.mean(scores)
        print("S: {}, k:{}, 3 exp Test Accuracy is {:0.4f}".format(s-2, k, avg_scores))
        log["S: {}, k:{}".format(s-2, k)] = avg_scores
        

In [None]:
log_file = open("NAME OF EXP.json", "w")
json.dump(log, log_file)
log_file.close()