In [1]:
import warnings
warnings.filterwarnings("ignore")
from dataset import AudioDataset
from torch.utils.data import DataLoader
from model2 import EmotionRecognizer, ConvNet
import torch.optim as optim
from torch import nn
import torch
from model import LSTMnet_RnnAtten

In [2]:
train_annotations = '/Users/zuzia/Downloads/MELD.Raw/train/train_sent_emo.csv'
val_annotations = '/Users/zuzia/Downloads/MELD.Raw/dev_sent_emo.csv'

train_audio = '/Users/zuzia/Downloads/MELD.Raw/train/train_splits/wav'
val_audio = '/Users/zuzia/Downloads/MELD.Raw/dev_splits_complete/wav'


In [3]:
train = AudioDataset(train_annotations, train_audio, transform = None)
val = AudioDataset(val_annotations, val_audio, transform = None)

train_dataloader = DataLoader(train, batch_size=32, shuffle = True, drop_last=True)
val_dataloader = DataLoader(val, batch_size=32, shuffle=True, drop_last=True)
train_features, train_labels = next(iter(train_dataloader))

In [4]:
len(train)

9988

In [4]:
# Sanity check: Size should be [batch_size, num_features, feature_vector_len], [batch_size]
# Where batch size is 32, num_features (num mfccs) is 40 
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")
features = train_features[0]
label = train_labels[0]
features,label


Feature batch shape: torch.Size([32, 1, 100])
Labels batch shape: torch.Size([32])


(tensor([[ 88.2079,  60.0846,   6.6171,  14.8374,  -3.0069,  -2.0702, -15.3161,
          -13.9392,  -5.0488,  -4.0143,  -7.3686,  -3.9030,  -2.7670,   4.9882,
           -3.1765,   3.0647,  -0.3962,  -4.9282,  -2.7082,  -4.1611,  -2.7944,
           -0.6329,  -3.3335,  -4.6907,  -5.8244,  -3.3087,  -7.7267,  -5.5704,
           -4.2631,  -1.4815,   2.0238,   7.8157,   9.3493,   7.5352,   3.4834,
           -2.0878,  -0.4129,   0.1249,   1.1677,   1.0979,  -2.4662,  -0.7578,
           -0.9688,  -0.4189,  -1.5832,  -2.3393,  -1.5784,  -0.5006,  -1.3474,
           -1.2672,  -0.9041,  -0.6816,  -3.1787,  -1.4073,  -1.0606,  -1.9261,
           -2.8761,  -2.0089,  -1.2797,  -1.2950,   0.2506,   1.5064,   0.2917,
           -0.6703,   0.3881,   1.8584,   1.5484,   0.6946,   0.7729,  -1.2744,
           -1.5535,   0.5202,   0.8158,  -0.6739,  -0.4841,  -0.6081,  -0.7773,
           -0.2676,  -0.4270,  -1.0843,  -1.2439,   0.2346,   0.1006,  -0.2424,
           -0.0979,  -0.3875,  -0.3077, 

In [None]:
model = EmotionRecognizer(in_feat=train.num_features, num_classes=train.num_classes, p_dropout=0.0, lr=1e-4)
print(model)
print('num params:', model.count_parameters())

In [None]:
for batch in train_dataloader:
    x, y = batch
    print(x.shape, y.shape)
    break

In [8]:
h = model(x) 

In [None]:
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import TensorBoardLogger

logger = TensorBoardLogger(save_dir='logs/',name='cnn_logs')
trainer = Trainer(max_epochs=150, logger=logger)

In [None]:
trainer.fit(model, train_dataloader, val_dataloader)

In [None]:
net = ConvNet()

#### 0.03 -> 1e-3
optimizer = optim.Adam(net.parameters(), lr=2e-5)

#### BCELoss -> CrossEntropyLoss
loss_function = nn.CrossEntropyLoss()

In [None]:
epochs = 30
steps = 0
train_losses, test_losses = [], []
for e in range(epochs):
    running_loss = 0
    #### put net in train mode
    net.train()
    for idx, (features, labels) in enumerate(train_dataloader):
        optimizer.zero_grad()
        log_ps = net(features)
        loss = loss_function(log_ps, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    else:
        test_loss = 0
        accuracy = 0

        #### put net in eval mode
        net.eval()
        with torch.no_grad():
            for features, labels in val_dataloader:
                log_ps = net(features)
                test_loss += loss_function(log_ps, labels)
                #### removed torch.exp() since exponential is monotone, taking it doesn't change the order of outputs. Similarly with torch.softmax()
                top_p, top_class = log_ps.topk(1, dim=1)
                #### convert to float/long using proper methods. what you have won't work for cuda tensors.
                equals = top_class.long() == labels.long().view(*top_class.shape)
                accuracy += torch.mean(equals.float())
        train_losses.append(running_loss/len(train_dataloader))
        test_losses.append(test_loss/len(val_dataloader))
        print("[Epoch: {}/{}] ".format(e+1, epochs),
              "[Training Loss: {:.3f}] ".format(running_loss/len(val_dataloader)),
              "[Test Loss: {:.3f}] ".format(test_loss/len(val_dataloader)),
              "[Test Accuracy: {:.3f}]".format(accuracy/len(val_dataloader)))


In [5]:
def loss_fnc(predictions, targets):
    return nn.CrossEntropyLoss()(input=predictions,target=targets)

In [6]:
def make_train_step(model, loss_fnc, optimizer):
    def train_step(X,Y):
        # set model to train mode
        model.train()
        # forward pass
        output_logits = model(X)
        predictions = torch.argmax(output_logits,dim=1)
        accuracy = torch.sum(Y==predictions)/float(len(Y))
        # compute loss
        loss = loss_fnc(output_logits, Y)
        # compute gradients
        loss.backward()
        # update parameters and zero gradients
        optimizer.step()
        optimizer.zero_grad()
        return loss.item(), accuracy*100
    return train_step

In [7]:
def make_validate_fnc(model,loss_fnc):
    def validate(X,Y):
        with torch.no_grad():
            model.eval()
            output_logits = model(X)
            predictions = torch.argmax(output_logits,dim=1)
            accuracy = torch.sum(Y==predictions)/float(len(Y))
            loss = loss_fnc(output_logits,Y)
        return loss.item(), accuracy*100, predictions
    return validate

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = LSTMnet_RnnAtten(input_dim = 100, hidden_dim=128, output_dim=7, num_layers=10).to(device)
print('Number of trainable params: ',sum(p.numel() for p in model.parameters()) )
OPTIMIZER = torch.optim.Adam(model.parameters(),lr=0.0001, weight_decay=1e-3)


Number of trainable params:  1406471


In [9]:
train_step = make_train_step(model, loss_fnc, optimizer=OPTIMIZER)
validate = make_validate_fnc(model,loss_fnc)

losses=[]
val_losses = []
epochs = 30
for epoch in range(epochs):
    epoch_acc = 0
    epoch_loss = 0
    for idx, (features, labels) in enumerate(train_dataloader):
        X, Y = features, labels
        X_tensor = torch.tensor(X,device=device).float()
        Y_tensor = torch.tensor(Y, dtype=torch.long,device=device)
        loss, acc = train_step(X_tensor,Y_tensor)
        epoch_acc += acc*len(features)/len(train)
        epoch_loss += loss*len(features)/len(train)
        print(f"\r Epoch {epoch}: batch {idx}",end='')
    for batch in val_dataloader:
        X_val, Y_val = batch
        break
    X_val_tensor = torch.tensor(X_val,device=device).float()
    Y_val_tensor = torch.tensor(Y_val,dtype=torch.long,device=device)
    val_loss, val_acc, _ = validate(X_val_tensor,Y_val_tensor)
    losses.append(epoch_loss)
    val_losses.append(val_loss)
    print('')
    print(f"Epoch {epoch} --> loss:{epoch_loss:.4f}, acc:{epoch_acc:.2f}%, val_loss:{val_loss:.4f}, val_acc:{val_acc:.2f}%")

 Epoch 0: batch 311
Epoch 0 --> loss:1.7783, acc:36.39%, val_loss:1.8624, val_acc:46.88%
 Epoch 1: batch 122