In [63]:
from allosaurus.app import read_recognizer
import torch
import torch.nn as nn
import torch.optim as optim
import allosaurus
import numpy as np
import allosaurus
from utils import *
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from sklearn.metrics import accuracy_score
import math

# Dataloaders

In [197]:
class GarboSamples(torch.utils.data.Dataset):

    def __init__(self, path_to_data, speakers, recordings, voices, classes): # You can use partition to specify train or dev
        self.Xs = []
        self.Ys = []
        for speaker in speakers:
            for record in recordings.keys():
                for voice in voices:
                    x_path = path_to_data+str(speaker)+"/spchdatadir/recording"+str(record)+"/voice_"+str(voice)+".wav"
                    transcript = recognize(x_path)
                    x = encode(transcript)
                    x = np.pad(x, ((math.ceil((28-x.shape[0])/2), int((28-x.shape[0])/2)), (0, 0)))
                    self.Xs.append(x)
                    self.Ys.append(classes[recordings[record]])
        
        #torched_X = np.array(self.Xs)
        #print(torched_X.shape)
        #self.X_final = nn.utils.rnn.pad_sequence(torch.Tensor(torched_X), batch_first=True)
        
        assert(len(self.Xs) == len(self.Ys))

        self.length = len(self.Xs)


    def __len__(self):
        return self.length

    def __getitem__(self, ind):

        X = self.Xs[ind]
        Y = self.Ys[ind]

        Yy = torch.tensor(Y, dtype=torch.float32)

        return torch.from_numpy(X).float(), Yy

# Baseline Model

In [191]:
class Baseline(nn.Module):
    def __init__(self, input_channels, num_classes):
        super(Baseline, self).__init__()

        self.CNN = nn.Sequential(
            nn.Conv1d(in_channels=input_channels,out_channels= 256, kernel_size=3),
            nn.ReLU()
        )
        self.lstm = nn.LSTM(input_size=256, hidden_size=256, bidirectional=False)
        self.classify = nn.Sequential(
            nn.Linear(in_features=256*26, out_features=num_classes),
            nn.Softmax()
        )
    def forward(self, x):
        cnn_input = x.permute(0, 2, 1)
        cnn_out = self.CNN(cnn_input)
        lstm_in = cnn_out.permute(0, 2, 1)
        out1, (out2, out3) = self.lstm(lstm_in)
        ## (batch, downsamples_seq, 256)
        linear_input = torch.flatten(out1).reshape(1, -1)
        output = self.classify(linear_input)
        ## (batch, down., 4)
        
        return output


In [207]:
path_to_data = "Data/Speakers/pp"
train_speakers = range(2, 8)
val_speakers = [9, 10]
test_speakers = [11, 12]
recordings = {29:"Lift",
            33:"Grap", 
            36:"Point",
            1:"Approach"}
classes = {"Lift":[1,0,0,0], 
        "Grap":[0,1,0,0], 
        "Point":[0,0,1,0], 
        "Approach":[0,0,0,1]}
voices = [1]

In [198]:
train_data = GarboSamples(path_to_data=path_to_data,speakers=train_speakers, voices=voices, recordings=recordings, classes = classes)
val_data = GarboSamples(path_to_data=path_to_data,speakers=val_speakers, voices=voices, recordings=recordings, classes = classes)
test_data = GarboSamples(path_to_data=path_to_data,speakers=test_speakers, voices=voices, recordings=recordings, classes = classes)
train_loader = DataLoader(train_data, batch_size=1, shuffle=True)
val_loader = DataLoader(val_data, batch_size=1, shuffle=True)
test_loader = DataLoader(test_data, batch_size=1, shuffle=True)


In [49]:
print("Train dataset samples = {}, batches = {}".format(train_data.__len__(), len(train_loader)))
print("Val dataset samples = {}, batches = {}".format(val_data.__len__(), len(val_loader)))
print("Test dataset samples = {}, batches = {}".format(test_data.__len__(), len(test_loader)))

Train dataset samples = 24, batches = 24
Val dataset samples = 8, batches = 8
Test dataset samples = 8, batches = 8


In [241]:
epochs = 50
model=Baseline(229, 4)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=(len(train_loader) * epochs))
scaler = torch.cuda.amp.GradScaler()



In [None]:
for epoch in range(epochs):
    model.train()
    total_loss = 0
    batch_bar = tqdm(total=len(train_loader), dynamic_ncols=True, leave=False, position=0, desc='Train') 
    predictions = []
    trues = []
    for i,(x, y) in enumerate(train_loader):

        optimizer.zero_grad()
        output = model(x)

        loss = criterion(output, y)
        total_loss+=loss
        loss.backward()
        optimizer.step()
        predictions.append(np.array(output.detach().numpy()[0], dtype=np.int32))
        trues.append(y.detach().numpy()[0])

        scheduler.step()
        batch_bar.set_postfix(
                    loss="{:.04f}".format(float(total_loss / (i + 1))),
                    lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))
        batch_bar.update()
        del x
        del y
        del loss
    
    print("Epoch {}/{}: Train Loss {:.04f}, Training Accuracy {:.04f}, Learning Rate {:.04f}".format(
        epoch + 1,
        epochs,
        float(total_loss / len(train_loader)),
        float(accuracy_score(predictions, trues)),
        float(optimizer.param_groups[0]['lr'])))
    


In [254]:
batch_bar = tqdm(total=len(val_loader), dynamic_ncols=True, leave=False, position=0, desc='Validation')
total_loss=0
for i,(x, y) in enumerate(val_loader):

    with torch.no_grad():
        output = model(x)
        
    loss = criterion(output, y)
    total_loss+=loss
    predictions.append(np.array(output.detach().numpy()[0], dtype=np.int32))
    trues.append(y.detach().numpy()[0])
    
    batch_bar.set_postfix(
        loss="{:.04f}".format(float(total_loss / (i + 1))))
    batch_bar.update()
    del x
    del y
    del loss
print("Validation Loss {:.04f}, Validation Accuracy {:.04f}".format(
        float(total_loss / len(val_loader)),
        accuracy_score(predictions, trues)))


  input = module(input)
Validation:  88%|████████▊ | 7/8 [00:00<00:00, 119.89it/s, loss=0.7857]

Validation Loss 0.7857, Validation Accuracy 0.2500


In [253]:
predictions = []
trues = []
batch_bar = tqdm(total=len(test_loader), dynamic_ncols=True, leave=False, position=0, desc='Test')
total_loss=0
for i,(x,y) in enumerate(test_loader):
    with torch.no_grad():
        output = model(x)
    
    predictions.append(np.array(output.detach().numpy()[0], dtype=np.int32))
    trues.append(y.detach().numpy()[0])
    batch_bar.set_postfix(
        progress="{:.04f}".format(float(i / len(test_loader))))
    batch_bar.update()
    del x
    del y
print("Testing Accuracy: ", accuracy_score(predictions, trues))


  input = module(input)
Test:  88%|████████▊ | 7/8 [00:00<00:00, 130.39it/s, progress=0.8750]

Testing Accuracy:  0.25
