In [1]:
import warnings
warnings.filterwarnings("ignore")
from dataset import AudioDataset
from torch.utils.data import DataLoader
import torch.optim as optim
from torch import nn
import torch
from model import LSTMnet_RnnAtten
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

In [2]:

print(os.getcwd())

train_annotations = '.\\data\\MELD\\train_sent_emo.csv'
val_annotations = 'data\MELD\dev_sent_emo.csv'

train_audio = '.\\data\\MELD\\audio\\wav\\'
val_audio = '.\\data\\MELD\\audio\\wav_dev\\'


c:\Users\kjart\OneDrive\Dokumenter\KU\2. semester\CogSci2\research\multimodal


In [3]:
train = AudioDataset(train_annotations, train_audio)
#val = AudioDataset(val_annotations, val_audio)

train, val = train_test_split(train, test_size=0.2)

train_dataloader = DataLoader(train, batch_size=64, shuffle = True, drop_last=False)
val_dataloader = DataLoader(val, batch_size=64, shuffle=True, drop_last=False)
train_features, train_labels = next(iter(train_dataloader))

In [11]:
# Sanity check: Size should be [batch_size, num_features, feature_vector_len], [batch_size]
# Where batch size is 32, num_features (num mfccs) is 40 
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")
features = train_features[0]
label = train_labels[0]

features.shape,label


Feature batch shape: torch.Size([64, 1611])
Labels batch shape: torch.Size([64])


(torch.Size([1611]), tensor(4))

In [12]:
for batch in train_dataloader:
    x, y = batch
    print(x.shape, y.shape)
    break

torch.Size([64, 1611]) torch.Size([64])


In [13]:
def loss_fnc(predictions, targets):
    return nn.CrossEntropyLoss()(input=predictions,target=targets)

In [14]:
def make_train_step(model, loss_fnc, optimizer):
    def train_step(X,Y):
        # set model to train mode
        model.train()
        # forward pass
        output_logits = model(X)
        predictions = torch.argmax(output_logits,dim=1)
        accuracy = torch.sum(Y==predictions)/float(len(Y))
        f1 = f1_score(Y, predictions, average='weighted')
        # compute loss
        loss = loss_fnc(output_logits, Y)
        # compute gradients
        loss.backward()
        # update parameters and zero gradients
        optimizer.step()
        optimizer.zero_grad()
        return loss.item(), accuracy*100, f1*100
    return train_step

In [15]:
def make_validate_fnc(model,loss_fnc):
    def validate(X,Y):
        with torch.no_grad():
            model.eval()
            output_logits = model(X)
            predictions = torch.argmax(output_logits,dim=1)
            accuracy = torch.sum(Y==predictions)/float(len(Y))
            f1 = f1_score(Y, predictions, average='weighted')
            loss = loss_fnc(output_logits,Y)
        return loss.item(), accuracy*100, f1*100, predictions
    return validate

In [26]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = LSTMnet_RnnAtten(input_dim = 1611, hidden_dim=256, output_dim=7, num_layers=5).to(device)
print('Number of trainable params: ',sum(p.numel() for p in model.parameters()) )
OPTIMIZER = torch.optim.Adam(model.parameters(),lr=0.0001, weight_decay=1e-3)


Number of trainable params:  4415495


In [None]:
train_step = make_train_step(model, loss_fnc, optimizer=OPTIMIZER)
validate = make_validate_fnc(model,loss_fnc)

losses=[]
val_losses = []
f1s = []
val_f1s= []
epochs = 150
for epoch in range(epochs):
    epoch_acc = 0
    epoch_loss = 0
    epoch_f1 = 0
    for idx, (features, labels) in enumerate(train_dataloader):
        X, Y = features, labels
        X_tensor = torch.tensor(X,device=device).float()
        Y_tensor = torch.tensor(Y, dtype=torch.long,device=device)
        loss, acc, f1 = train_step(X_tensor,Y_tensor)
        epoch_acc += acc*len(features)/len(train)
        epoch_loss += loss*len(features)/len(train)
        epoch_f1 += f1*len(features)/len(train)
    for batch in val_dataloader:
        X_val, Y_val = batch
        break
    X_val_tensor = torch.tensor(X_val,device=device).float()
    Y_val_tensor = torch.tensor(Y_val,dtype=torch.long,device=device)
    val_loss, val_acc, val_f1, _ = validate(X_val_tensor,Y_val_tensor)
    losses.append(epoch_loss)
    val_losses.append(val_loss)
    f1s.append(epoch_f1)
    val_f1s.append(val_f1)
    print('')
    print(f"Epoch {epoch} --> loss:{epoch_loss:.4f}, acc:{epoch_acc:.2f}%, f1:{f1:.2f}%, val_loss:{val_loss:.4f}, val_acc:{val_acc:.2f}%, val_f1:{val_f1:.2f}%")


Epoch 0 --> loss:1.7092, acc:37.64%, f1:28.41%, val_loss:1.8161, val_acc:42.19%, val_f1:25.03%

Epoch 1 --> loss:1.5649, acc:47.50%, f1:24.66%, val_loss:1.3892, val_acc:56.25%, val_f1:40.50%

Epoch 2 --> loss:1.5511, acc:47.53%, f1:30.35%, val_loss:1.6300, val_acc:40.62%, val_f1:23.47%

Epoch 3 --> loss:1.5476, acc:47.53%, f1:24.66%, val_loss:1.5413, val_acc:48.44%, val_f1:31.61%
