In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import umap
import torch
import torch.nn as nn
import torch.nn.functional as F
import gc
from tqdm import tqdm

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
os.chdir('../../')
os.getcwd()

'/home/bunny/projects/IConNet'

In [5]:
dataset_name = 'iemocap'
experiment_prefix = "scb14"
log_dir = f'../{experiment_prefix}_models/{dataset_name}/'
data_dir = "../data/data_preprocessed/"

sr = 16000
batch_size = 2
in_channels = 1
kernel_size = 511
stride = 125
embedding_dim = 511
num_embeddings = 384
cls_dim = 512
learning_rate = 1e-4
max_num_tokens = 2048
num_classes = 4
commitment_cost = 0.1

In [6]:
from IConNet.acov.audio_vqvae import VqVaeClsLoss
from IConNet.trainer.train_torch import get_dataloader
from IConNet.trainer.train_torch import Trainer_SCB10 as Trainer
from IConNet.acov.model import SCB14 as SCB
from omegaconf import OmegaConf as ocf

In [7]:
iconnet_config_path = f'config/model/m19win.yaml'
iconnet_config = ocf.load(iconnet_config_path)
print(iconnet_config)

{'name': 'M19', 'description': 'FirConv with learnable windows', 'fe': {'n_block': 1, 'n_channel': [256], 'kernel_size': [511], 'stride': [2], 'window_k': [5], 'pooling': 'mean', 'filter_type': 'sinc', 'learnable_bands': False, 'learnable_windows': True, 'shared_window': False, 'window_func': 'hamming', 'mel_resolution': 3, 'conv_mode': 'conv', 'norm_type': 'LocalResponseNorm'}, 'cls': {'n_block': 2, 'n_hidden_dim': [512, 512], 'norm_type': 'LayerNorm'}}


In [8]:
dataset_config_path = f'config/dataset/{dataset_name}4.yaml'
dataset_config = ocf.load(dataset_config_path)
print(dataset_config)
train_loader, test_loader, batch_size = get_dataloader(
    dataset_config, data_dir, batch_size=batch_size)
print(batch_size)

{'name': 'iemocap', 'dataset_class': 'WaveformDataset', 'root': 'iemocap/', 'audio_dir': 'full_release/', 'feature_dir': 'preprocessing/', 'label_name': 'label4', 'feature_name': 'audio16k', 'num_classes': 4, 'label_values': ['neu', 'hap', 'sad', 'ang'], 'classnames': ['neu', 'hap', 'sad', 'ang'], 'target_labels': ['ang', 'neu', 'sad', 'hap']}
2


In [9]:
loss_ratio=VqVaeClsLoss(perplexity=0, loss_vq=0, loss_recon=0, loss_cls=1)

trainer = Trainer(batch_size=batch_size, log_dir=log_dir, 
                  experiment_prefix=experiment_prefix, device=device,
                 accumulate_grad_batches=8)
trainer.prepare(train_loader=train_loader, 
                test_loader=test_loader, 
                batch_size=batch_size,
               loss_ratio=loss_ratio)

In [10]:
codebook_pretrained_path = f'../scb11_models/ravdess/epoch=220.codebook.pt'
model = SCB(
    in_channels=in_channels,    
    num_embeddings=num_embeddings, 
    stride=stride,
    embedding_dim=embedding_dim, 
    num_classes=num_classes, 
    cls_dim=512,
    sample_rate=sr,
    commitment_cost=commitment_cost,
    distance_type='euclidean',
    codebook_pretrained_path=codebook_pretrained_path,
    freeze_codebook=True,
    loss_type='minami',
    iconnet_config=iconnet_config,
)

In [11]:
trainer.setup(model=model, lr=learning_rate)

In [None]:
trainer.fit(n_epoch=100, self_supervised=False, train_task='embedding', test_n_epoch=10)

  0%|▏                                                  | 0.476833976833989/100 [06:45<715:43:12, 25889.38s/it]

Epoch: 1	Loss: 1.116 [perplexity=0.000, loss_vq=0.000, loss_recon=0.000, loss_cls=1.116]	Val_acc: 84/197 (42.64%)

Saved new best val model: ../scb14_models/iemocap/model.epoch=1.step=864.loss=1.116.val_acc=0.426.pt


  1%|▊                                                    | 1.4295366795367364/100 [17:42<16:57:50, 619.56s/it]

Correct: 388/988 (0.3927)
Saved new best test model: ../scb14_models/iemocap/model.epoch=1.step=1727.test_acc=0.3927.pt
{'acc_unweighted': tensor(0.4425, device='cuda:0'),
 'acc_weighted': tensor(0.3927, device='cuda:0'),
 'f1s_unweighted': tensor(0.2767, device='cuda:0'),
 'f1s_weighted': tensor(0.2458, device='cuda:0'),
 'rocauc': tensor(0.7574, device='cuda:0'),
 'uar': tensor(0.4425, device='cuda:0'),
 'wap': tensor(0.5246, device='cuda:0')}
{'acc_detail': tensor([0.8869, 0.0029, 0.8802, 0.0000], device='cuda:0'),
 'f1s_detail': tensor([0.4906, 0.0058, 0.6102, 0.0000], device='cuda:0'),
 'precision_detail': tensor([0.3391, 1.0000, 0.4670, 0.0000], device='cuda:0'),
 'recall_detail': tensor([0.8869, 0.0029, 0.8802, 0.0000], device='cuda:0'),
 'rocauc_detail': tensor([0.8096, 0.6920, 0.8755, 0.6525], device='cuda:0')}
tensor([[196,   0,  25,   0],
        [174,   1, 167,   0],
        [ 26,   0, 191,   0],
        [182,   0,  26,   0]], device='cuda:0')


  2%|▉                                                 | 1.9063706563707945/100 [24:30<717:11:28, 26320.65s/it]

Epoch: 2	Loss: 4.435 [perplexity=0.000, loss_vq=0.000, loss_recon=0.000, loss_cls=4.435]	Val_acc: 93/197 (47.21%)

Saved new best val model: ../scb14_models/iemocap/model.epoch=2.step=2591.loss=4.435.val_acc=0.472.pt


  3%|█▍                                                | 2.8590733590736597/100 [37:24<714:07:34, 26465.21s/it]

Epoch: 3	Loss: 0.646 [perplexity=0.000, loss_vq=0.000, loss_recon=0.000, loss_cls=0.646]	Val_acc: 100/197 (50.76%)

Saved new best val model: ../scb14_models/iemocap/model.epoch=3.step=4318.loss=0.646.val_acc=0.508.pt


  4%|█▉                                                 | 3.811776061776525/100 [50:19<707:12:32, 26468.44s/it]

Epoch: 4	Loss: 2.019 [perplexity=0.000, loss_vq=0.000, loss_recon=0.000, loss_cls=2.019]	Val_acc: 96/197 (48.73%)



  5%|██▍                                               | 4.76447876447939/100 [1:03:16<705:59:31, 26687.22s/it]

Epoch: 5	Loss: 0.491 [perplexity=0.000, loss_vq=0.000, loss_recon=0.000, loss_cls=0.491]	Val_acc: 111/197 (56.35%)

Saved new best val model: ../scb14_models/iemocap/model.epoch=5.step=7772.loss=0.491.val_acc=0.563.pt


  6%|██▊                                              | 5.717181467182255/100 [1:16:22<698:15:56, 26661.87s/it]

Epoch: 6	Loss: 1.136 [perplexity=0.000, loss_vq=0.000, loss_recon=0.000, loss_cls=1.136]	Val_acc: 91/197 (46.19%)



  7%|███▎                                              | 6.66988416988512/100 [1:29:23<695:20:54, 26821.51s/it]

Epoch: 7	Loss: 0.732 [perplexity=0.000, loss_vq=0.000, loss_recon=0.000, loss_cls=0.732]	Val_acc: 106/197 (53.81%)



  8%|███▋                                             | 7.622586872587985/100 [1:42:29<690:58:31, 26927.70s/it]

Epoch: 8	Loss: 0.613 [perplexity=0.000, loss_vq=0.000, loss_recon=0.000, loss_cls=0.613]	Val_acc: 108/197 (54.82%)



  9%|████▎                                             | 8.57528957529085/100 [1:55:38<683:30:11, 26914.08s/it]

Epoch: 9	Loss: 0.720 [perplexity=0.000, loss_vq=0.000, loss_recon=0.000, loss_cls=0.720]	Val_acc: 113/197 (57.36%)

Saved new best val model: ../scb14_models/iemocap/model.epoch=9.step=14680.loss=0.720.val_acc=0.574.pt


 10%|████▋                                            | 9.527992277993715/100 [2:08:50<676:09:14, 26905.06s/it]

Epoch: 10	Loss: 3.071 [perplexity=0.000, loss_vq=0.000, loss_recon=0.000, loss_cls=3.071]	Val_acc: 110/197 (55.84%)



 10%|█████▏                                           | 10.48069498069658/100 [2:22:02<670:32:49, 26965.91s/it]

Epoch: 11	Loss: 0.474 [perplexity=0.000, loss_vq=0.000, loss_recon=0.000, loss_cls=0.474]	Val_acc: 116/197 (58.88%)

Saved new best val model: ../scb14_models/iemocap/model.epoch=11.step=18134.loss=0.474.val_acc=0.589.pt


 11%|█████▊                                             | 11.433397683399445/100 [2:33:28<15:56:53, 648.25s/it]

Correct: 549/988 (0.5557)
Saved new best test model: ../scb14_models/iemocap/model.epoch=11.step=18997.test_acc=0.5557.pt
{'acc_unweighted': tensor(0.5417, device='cuda:0'),
 'acc_weighted': tensor(0.5557, device='cuda:0'),
 'f1s_unweighted': tensor(0.5455, device='cuda:0'),
 'f1s_weighted': tensor(0.5511, device='cuda:0'),
 'rocauc': tensor(0.8229, device='cuda:0'),
 'uar': tensor(0.5417, device='cuda:0'),
 'wap': tensor(0.5714, device='cuda:0')}
{'acc_detail': tensor([0.4118, 0.6462, 0.7051, 0.4038], device='cuda:0'),
 'f1s_detail': tensor([0.5260, 0.5808, 0.6623, 0.4128], device='cuda:0'),
 'precision_detail': tensor([0.7280, 0.5274, 0.6245, 0.4221], device='cuda:0'),
 'recall_detail': tensor([0.4118, 0.6462, 0.7051, 0.4038], device='cuda:0'),
 'rocauc_detail': tensor([0.8650, 0.7671, 0.8968, 0.7628], device='cuda:0')}
tensor([[ 91,  49,   6,  75],
        [  5, 221,  80,  36],
        [  0,  60, 153,   4],
        [ 29,  89,   6,  84]], device='cuda:0')


 12%|█████▋                                          | 11.910231660233503/100 [2:40:29<656:05:39, 26812.87s/it]

Epoch: 12	Loss: 0.859 [perplexity=0.000, loss_vq=0.000, loss_recon=0.000, loss_cls=0.859]	Val_acc: 107/197 (54.31%)



 13%|██████▏                                         | 12.862934362936368/100 [2:53:45<656:07:01, 27106.97s/it]

Epoch: 13	Loss: 0.656 [perplexity=0.000, loss_vq=0.000, loss_recon=0.000, loss_cls=0.656]	Val_acc: 112/197 (56.85%)



 14%|██████▋                                         | 13.815637065639233/100 [3:07:08<646:27:27, 27003.13s/it]

Epoch: 14	Loss: 0.774 [perplexity=0.000, loss_vq=0.000, loss_recon=0.000, loss_cls=0.774]	Val_acc: 118/197 (59.90%)

Saved new best val model: ../scb14_models/iemocap/model.epoch=14.step=23315.loss=0.774.val_acc=0.599.pt


 14%|███████                                            | 13.857625482627657/100 [3:07:42<19:56:40, 833.51s/it]

In [None]:
trainer.load_best_model(val_model=True)

In [None]:
from pprint import pprint

In [None]:
metrics, metrics_details, confusion_matrix = trainer.test_step()
me = metrics.compute()
med = metrics_details.compute()
cm = confusion_matrix.compute()

In [None]:
pprint(me)
pprint(med)
pprint(cm)

In [None]:
labels = dataset_config.target_labels
a = cm
# a /= reduce(a, 'y yh -> y 1', 'sum')
a /= reduce(a, 'y yh -> 1 yh', 'sum')
df_cm = pd.DataFrame(a, columns=labels, index = labels)
df_cm.index.name = 'Actual'
df_cm.columns.name = 'Predicted'
plt.figure(figsize = (10,7))
sns.set(font_scale=1.4) #for label size
sns.heatmap(
    df_cm, cmap="Blues", 
    annot=True,
    annot_kws={"size": 16})# font size

In [None]:
a = cm
a /= reduce(a, 'y yh -> y 1', 'sum')
a /= reduce(a, 'y yh -> 1 yh', 'sum')
df_cm = pd.DataFrame(a, columns=labels, index = labels)
df_cm.index.name = 'Actual'
df_cm.columns.name = 'Predicted'
plt.figure(figsize = (10,7))
sns.set(font_scale=1.4) #for label size
sns.heatmap(
    df_cm, cmap="Blues", 
    annot=True,
    annot_kws={"size": 16})# font size

In [None]:
import pandas as pd
import IPython.display as ipd
import seaborn as sns

from IConNet.acov.visualize import (
    visualize_speech_codebook, get_embedding_color, 
    visualize_embedding_umap, visualize_training_curves,
    get_embedding_color_v2, get_zcs_color_v2
)

In [None]:
dt = []
for i, ld in enumerate(trainer.train_losses_detail):
    d = {'loss': trainer.train_losses[i]}
    for j, k in enumerate(ld._fields):
        d[k] = ld[j]
    dt.append(d)
loss_df = pd.DataFrame(dt)
loss_df

In [None]:
from scipy.signal import savgol_filter

fig, axes = plt.subplots(1, 5, figsize=(14,3))
for i, c in enumerate(loss_df.columns):
    axes[i].plot(savgol_filter(loss_df[c], 301, 7))
    axes[i].set_title(c)

In [None]:
from einops import reduce, repeat, rearrange

In [None]:
trained_epoch = trainer.current_epoch
n = len(trainer.train_losses)
trained_steps = n - (n % trained_epoch)
train_losses = reduce(np.array(
    trainer.train_losses[:trained_steps]), '(n k) -> n', 'mean', n=trained_epoch)

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(12,3))
axes[0].plot(train_losses, color="b")
axes[0].set_title("Average train loss per epoch")
axes[1].plot(trainer.val_accuracy, color="r", linestyle="dashed")
axes[1].set_title("Val accuracy per epoch")
axes[2].plot(trainer.test_accuracy, color="r")
axes[2].set_title("Test accuracy per 10 epochs")

In [None]:
train_log = {
    'train_loss': torch.tensor(train_losses),
    'val_acc': torch.tensor(trainer.val_accuracy),
    'test_acc': torch.tensor(trainer.test_accuracy),
    'test_confusion_matrix': cm,
    'test_metrics': me,
    'test_metrics_detail': med
}
torch.save(train_log, log_dir+"train_metrics.pt")
loss_df.to_csv(log_dir+"train_losses.csv")