In [63]:
from allosaurus.app import read_recognizer
import torch
import torch.nn as nn
import torch.optim as optim
import allosaurus
import numpy as np
import allosaurus
from utils import *
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

# Dataloaders

In [133]:
class GarboSamples(torch.utils.data.Dataset):

    def __init__(self, path_to_data, speakers, recordings, voices, classes): # You can use partition to specify train or dev
        self.Xs = []
        self.Ys = []
        for speaker in speakers:
            for record in recordings.keys():
                for voice in voices:
                    x_path = path_to_data+str(speaker)+"/spchdatadir/recording"+str(record)+"/voice_"+str(voice)+".wav"
                    transcript = recognize(x_path)
                    self.Xs.append(encode(transcript))
                    self.Ys.append(classes[recordings[record]])
        self.X_final = nn.utils.rnn.pad_sequence(torch.from_numpy(self.Xs))
        
        assert(len(self.X_final) == len(self.Ys))

        self.length = len(self.X_final)


    def __len__(self):
        return self.length

    def __getitem__(self, ind):

        X = self.Xs[ind]
        Y = self.Ys[ind]

        Yy = torch.tensor(Y, dtype=torch.long)

        return torch.from_numpy(X).float(), Yy
    
    def collate_fn(self, batch):

        batch_x = np.array([x for x, y in batch])
        batch_y = np.array([y for x, y in batch])

        return torch.from_numpy(batch_x), torch.from_numpy(batch_y)

    


# You can either try to combine test data in the previous class or write a new Dataset class for test data
class GarboTestSamples(torch.utils.data.Dataset):

    def __init__(self, path_to_data, speakers, recordings, voices, classes): # You can use partition to specify train or dev
        self.Xs = []
        for speaker in speakers:
            for record in recordings.keys():
                for voice in voices:
                    x_path = path_to_data+str(speaker)+"/spchdatadir/recording"+str(record)+"/voice_"+str(voice)+".wav"
                    transcript = recognize(x_path)
                    self.Xs.append(encode(transcript))
        

        self.length = len(self.Xs)


    def __len__(self):
        return self.length

    def __getitem__(self, ind):

        X = self.Xs[ind]

        return torch.from_numpy(X).float()
    
    def collate_fn(self, batch):

        batch_x = np.array([x for x, y in batch])

        return torch.from_numpy(batch_x)
    

# Baseline Model

In [123]:
class Baseline(nn.Module):
    def __init__(self, input_channels, num_classes):
        super(Baseline, self).__init__()

        self.CNN = nn.Sequential(
            nn.Conv1d(in_channels=input_channels,out_channels= 256, kernel_size=3),
            nn.ReLU()
        )
        self.lstm = nn.LSTM(input_size=256, hidden_size=256, bidirectional=False)
        self.classify = nn.Sequential(
            nn.Linear(in_features=256, out_features=num_classes),
            nn.Softmax()
        )
    def forward(self, x):
        cnn_input = x.permute(0, 2, 1)
        cnn_out = self.CNN(cnn_input)
        lstm_in = cnn_out.permute(0, 2, 1)
        out1, (out2, out3) = self.lstm(lstm_in)
        ## (batch, downsamples_seq, 256)
        output = self.classify(out1)
        ## (batch, down., 4)
        
        return output, cnn_out, out1


In [4]:
path_to_data = "Data/Speakers/pp"
train_speakers = range(2, 8)
val_speakers = [9, 10]
test_speakers = [11, 12]
recordings = {29:"Lift",
            33:"Grap", 
            36:"Point",
            1:"Approach"}
classes = {"Lift":[1,0,0,0], 
        "Grap":[0,1,0,0], 
        "Point":[0,0,1,0], 
        "Approach":[0,0,0,1]}
voices = [1]

In [134]:
train_data = GarboSamples(path_to_data=path_to_data,speakers=train_speakers, voices=voices, recordings=recordings, classes = classes)
val_data = GarboSamples(path_to_data=path_to_data,speakers=val_speakers, voices=voices, recordings=recordings, classes = classes)
test_data = GarboSamples(path_to_data=path_to_data,speakers=test_speakers, voices=voices, recordings=recordings, classes = classes)
train_loader = DataLoader(train_data, batch_size=1, shuffle=True)
val_loader = DataLoader(val_data, batch_size=1, shuffle=True)
test_loader = DataLoader(test_data, batch_size=1, shuffle=True)


TypeError: expected np.ndarray (got list)

In [None]:
for data in 

In [49]:
print("Train dataset samples = {}, batches = {}".format(train_data.__len__(), len(train_loader)))
print("Val dataset samples = {}, batches = {}".format(val_data.__len__(), len(val_loader)))
print("Test dataset samples = {}, batches = {}".format(test_data.__len__(), len(test_loader)))

Train dataset samples = 24, batches = 24
Val dataset samples = 8, batches = 8
Test dataset samples = 8, batches = 8


In [124]:
epochs = 50
model=Baseline(229, 4)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=2e-3)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=(len(train_loader) * epochs))
scaler = torch.cuda.amp.GradScaler()

In [130]:
for i,(x, y) in enumerate(train_loader):

    classification_output, cnn_output, lstm_output = model(x)
    print(x.shape)
    print(classification_output.shape)
    print(cnn_output.shape)
    print(lstm_output.shape)
    #print(y)
    break

torch.Size([1, 14, 229])
torch.Size([1, 12, 4])
torch.Size([1, 256, 12])
torch.Size([1, 12, 256])


  input = module(input)


In [108]:
for epoch in range(epochs):
    model.train()
    total_loss = 0
    batch_bar = tqdm(total=len(train_loader), dynamic_ncols=True, leave=False, position=0, desc='Train') 
    for i,(x, y) in enumerate(train_loader):

        output = model(x)
        
        loss = criterion(output, y)
        total_loss+=loss
        loss.backward()
        optimizer.step()

        scheduler.step()
        batch_bar.set_postfix(
                    loss="{:.04f}".format(float(total_loss / (i + 1))),
                    lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))
        batch_bar.update()
        del x
        del y
        del loss
    print("Epoch {}/{}: Train Loss {:.04f}, Learning Rate {:.04f}".format(
        epoch + 1,
        epochs,
        float(total_loss / len(train_loader)),
        float(optimizer.param_groups[0]['lr'])))


                                                                    

tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]], grad_fn=<SoftmaxBackward>)
tensor([[0, 0, 1, 0]])


  input = module(input)


'loss = criterion(output, y)\n        total_loss+=loss\n        loss.backward()\n        optimizer.step()\n\n        scheduler.step()\n        batch_bar.set_postfix(\n                    loss="{:.04f}".format(float(total_loss / (i + 1))),\n                    lr="{:.04f}".format(float(optimizer.param_groups[0][\'lr\'])))\n        batch_bar.update()\n        del x\n        del y\n        del loss\n    print("Epoch {}/{}: Train Loss {:.04f}, Learning Rate {:.04f}".format(\n        epoch + 1,\n        epochs,\n        float(total_loss / len(train_loader)),\n        float(optimizer.param_groups[0][\'lr\'])))\n'

In [89]:
batch_bar = tqdm(total=len(val_loader), dynamic_ncols=True, leave=False, position=0, desc='Validation')
total_loss=0
for i,(x, y) in enumerate(val_loader):

    output = model(x)
        
    loss = criterion(output, y)
    total_loss+=loss
    
    batch_bar.set_postfix(
        loss="{:.04f}".format(float(total_loss / (i + 1))))
    batch_bar.update()
    del x
    del y
    del loss
print("Validation Loss {:.04f}".format(
        float(total_loss / len(val_loader))))


Validation:  88%|████████▊ | 7/8 [00:00<00:00, 118.25it/s, loss=2.1092]

Validation Loss 2.1092


In [105]:
predictions = []
trues = []
batch_bar = tqdm(total=len(test_loader), dynamic_ncols=True, leave=False, position=0, desc='Test')
total_loss=0
for i,(x,y) in enumerate(test_loader):

    output = model(x)
    
    predictions.append(output.detach().numpy())
    trues.append(y.detach().numpy())
    batch_bar.set_postfix(
        progress="{:.04f}".format(float(i / len(test_loader))))
    batch_bar.update()
    del x
    del y



  input = module(input)
Test:  88%|████████▊ | 7/8 [00:00<00:00, 80.07it/s, progress=0.8750]

In [128]:
from sklearn.metrics import accuracy_score 

In [129]:
accuracy_score(predictions, trues)

ValueError: could not broadcast input array from shape (15,4) into shape (1,)

In [64]:
x = torch.tensor(np.ones((1, 3, 12)), dtype=torch.float)

In [72]:
model(x)

tensor([[[1., 1., 1.]]], grad_fn=<SoftmaxBackward>)