In [1]:
import warnings
warnings.filterwarnings("ignore")
from dataset import AudioDataset
from torch.utils.data import DataLoader
import torch.optim as optim
from torch import nn
import torch
from model import LSTMnet_RnnAtten

In [2]:
train_annotations = '/Users/zuzia/Downloads/MELD.Raw/train/train_sent_emo.csv'
val_annotations = '/Users/zuzia/Downloads/MELD.Raw/dev_sent_emo.csv'

train_audio = '/Users/zuzia/Downloads/MELD.Raw/train/train_splits/wav'
val_audio = '/Users/zuzia/Downloads/MELD.Raw/dev_splits_complete/wav'


In [3]:
train = AudioDataset(train_annotations, train_audio)
val = AudioDataset(val_annotations, val_audio)

train_dataloader = DataLoader(train, batch_size=32, shuffle = True, drop_last=True)
val_dataloader = DataLoader(val, batch_size=32, shuffle=True, drop_last=True)
train_features, train_labels = next(iter(train_dataloader))

In [4]:
len(train)

9988

In [5]:
# Sanity check: Size should be [batch_size, num_features, feature_vector_len], [batch_size]
# Where batch size is 32, num_features (num mfccs) is 40 
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")
features = train_features[0]
label = train_labels[0]
features,label


Feature batch shape: torch.Size([32, 1, 100])
Labels batch shape: torch.Size([32])


(tensor([[ 7.1128e+01,  2.9132e+01, -6.6171e+00,  2.6409e+00, -7.9006e+00,
          -9.9457e+00, -7.1557e+00, -4.0811e+00, -3.2150e+00, -3.0161e+00,
          -1.6573e+00, -4.6688e+00,  1.2981e+00,  2.6853e+00, -1.3311e+00,
           2.2540e+00, -2.2698e+00, -2.2175e+00, -2.1314e+00, -3.0604e+00,
          -1.8158e+00, -1.4692e+00, -4.7822e+00, -6.1581e+00, -6.2343e+00,
          -1.2349e+00,  3.7591e+00,  4.5148e+00,  4.1583e+00,  4.6243e+00,
           4.1283e+00,  2.2471e+00, -5.3318e-01, -8.5891e-02, -3.8426e-01,
           3.2498e-01,  1.2366e+00, -7.2077e-01, -5.6705e-01,  2.1665e-01,
          -2.5582e+00, -3.4500e+00, -3.0593e+00, -2.1821e+00, -2.5977e+00,
          -3.2442e+00, -3.0872e+00, -2.3810e+00, -2.2971e+00, -2.1400e+00,
          -2.2100e+00, -2.1986e+00, -6.4927e-01,  3.7317e-01, -9.7138e-02,
           5.6490e-01,  1.6908e+00,  1.0802e+00,  3.6913e-01,  9.4291e-01,
           7.5714e-01, -8.6515e-01,  1.0522e-01,  1.1569e+00, -1.5143e-01,
          -5.6470e-01, -2

In [6]:
for batch in train_dataloader:
    x, y = batch
    print(x.shape, y.shape)
    break

torch.Size([32, 1, 100]) torch.Size([32])


In [7]:
def loss_fnc(predictions, targets):
    return nn.CrossEntropyLoss()(input=predictions,target=targets)

In [8]:
def make_train_step(model, loss_fnc, optimizer):
    def train_step(X,Y):
        # set model to train mode
        model.train()
        # forward pass
        output_logits = model(X)
        predictions = torch.argmax(output_logits,dim=1)
        accuracy = torch.sum(Y==predictions)/float(len(Y))
        # compute loss
        loss = loss_fnc(output_logits, Y)
        # compute gradients
        loss.backward()
        # update parameters and zero gradients
        optimizer.step()
        optimizer.zero_grad()
        return loss.item(), accuracy*100
    return train_step

In [9]:
def make_validate_fnc(model,loss_fnc):
    def validate(X,Y):
        with torch.no_grad():
            model.eval()
            output_logits = model(X)
            predictions = torch.argmax(output_logits,dim=1)
            accuracy = torch.sum(Y==predictions)/float(len(Y))
            loss = loss_fnc(output_logits,Y)
        return loss.item(), accuracy*100, predictions
    return validate

In [10]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = LSTMnet_RnnAtten(input_dim = 100, hidden_dim=128, output_dim=7, num_layers=10).to(device)
print('Number of trainable params: ',sum(p.numel() for p in model.parameters()) )
OPTIMIZER = torch.optim.Adam(model.parameters(),lr=0.0001, weight_decay=1e-3)


Number of trainable params:  1406471


In [11]:
train_step = make_train_step(model, loss_fnc, optimizer=OPTIMIZER)
validate = make_validate_fnc(model,loss_fnc)

losses=[]
val_losses = []
epochs = 30
for epoch in range(epochs):
    epoch_acc = 0
    epoch_loss = 0
    for idx, (features, labels) in enumerate(train_dataloader):
        X, Y = features, labels
        X_tensor = torch.tensor(X,device=device).float()
        Y_tensor = torch.tensor(Y, dtype=torch.long,device=device)
        loss, acc = train_step(X_tensor,Y_tensor)
        epoch_acc += acc*len(features)/len(train)
        epoch_loss += loss*len(features)/len(train)
        print(f"\r Epoch {epoch}: batch {idx}",end='')
    for batch in val_dataloader:
        X_val, Y_val = batch
        break
    X_val_tensor = torch.tensor(X_val,device=device).float()
    Y_val_tensor = torch.tensor(Y_val,dtype=torch.long,device=device)
    val_loss, val_acc, _ = validate(X_val_tensor,Y_val_tensor)
    losses.append(epoch_loss)
    val_losses.append(val_loss)
    print('')
    print(f"Epoch {epoch} --> loss:{epoch_loss:.4f}, acc:{epoch_acc:.2f}%, val_loss:{val_loss:.4f}, val_acc:{val_acc:.2f}%")

 Epoch 0: batch 17