In [2]:
%pip install -e 'src/hear21passt'

Obtaining file:///root/data/exploratory_notebooks/da/src/hear21passt
  Preparing metadata (setup.py) ... [?25ldone
Installing collected packages: hear21passt
  Attempting uninstall: hear21passt
    Found existing installation: hear21passt 0.0.23
    Uninstalling hear21passt-0.0.23:
      Successfully uninstalled hear21passt-0.0.23
  Running setup.py develop for hear21passt
Successfully installed hear21passt-0.0.23
[0mNote: you may need to restart the kernel to use updated packages.


In [3]:
%pip install tensorboard

[0mNote: you may need to restart the kernel to use updated packages.


In [None]:
import os
os._exit(00)

In [2]:
import sys
import os
import datetime
import re

import numpy as np
import pandas as pd
import torch
import torchaudio
import tqdm

from operator import itemgetter
from hashlib import sha1

from torch.optim import AdamW, lr_scheduler
from torch.utils.data import DataLoader, default_collate
from torch.nn import functional as F
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import ChainDataset

from sklearn.metrics import f1_score

from IPython.display import Audio

from optim.lr_scheduler import get_scheduler_lambda
from utils.model import get_pretrained_passt_model
from utils.gdsc22_dataset import BuzzIterableDataset, BuzzAugmentedIterableDataset
from utils.dataset_augmentations import BuzzIterableTransformedDataset, MixUp1Level, MixUp2Level, RandomGain, Rolling, ColoredNoise

  warn(


## 1. Initial configuration

In [3]:
torch.manual_seed(42)
np.random.seed(42)

In [4]:
project_dir = os.path.abspath(os.path.join(os.getcwd(), '../../'))
data_processed_dir = os.path.join(project_dir, 'gdsc_data/data_processed/')
data_source_dir = os.path.join(project_dir, 'gdsc_data/data_source/')

In [5]:
ts = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S')

MODEL_NAME = f"gdsc22_passt_kd_NOEXP_lr_mismatch_size_t40_f4_epochs40_gain_roll_mixup_noise@{ts}.pt"
TORCH_SAVE_PATH = os.path.join(project_dir, f'checkpoints/{MODEL_NAME}')

TENSORBOARD_RUNS_PATH = os.path.join(project_dir, "torch_runlogs/")
TENSORBOARD_EXP_FOLDER = f"PaSST_kd_NOEXP_lr_mismatch_size_t40_f4_epochs40_gain_roll_mixup_noise@{ts}"
TENSORBOARD_EXP_COMMENT = "PaSST_s_kd_p16_128_ap486_NOEXP_lr_gain_roll_mixup_noise"

TRAIN_BATCH_SIZE = 32
VAL_BATCH_SIZE = 32

# fine tuning and ohe of real labels
CLASSIFIER_N_CLASSES = 66

DATA_TAG = "data_8c86715"

DEBUG_TRANSFORMATIONS = False

In [6]:
#final_dataset_path = find_final_dataset_path(data_processed_dir, DATA_TAG)
final_dataset_path_augmented = os.path.join(data_processed_dir, DATA_TAG, '01_apply_ir_function')
final_dataset_val_as_train_path_augmented = os.path.join(data_processed_dir, DATA_TAG, '03_apply_ir_function_on_validation')

final_dataset_path = data_source_dir
final_dataset_val_as_train_path = os.path.join(data_processed_dir, DATA_TAG, '02_resample_validation/')

In [7]:
train_dir_augmented = os.path.join(final_dataset_path_augmented, 'train/')
val_as_train_dir_augmented = os.path.join(final_dataset_val_as_train_path_augmented, 'train/')

train_dir = os.path.join(final_dataset_path, 'train/')
val_as_train_dir = os.path.join(final_dataset_val_as_train_path, 'train/')

val_dir = os.path.join(final_dataset_val_as_train_path, 'val/')

## 2. Dataloaders and batching

### 2.1 Debugging collate_fn

Used to check the random seed and batchability of the individual points 0:)

In [7]:
def debug_collate(batch):
    get_1st = itemgetter(0)
    files = ", ".join(map(get_1st, batch)).encode('utf-8')
    worker_info = torch.utils.data.get_worker_info()   
    print(f"Worker_id '{worker_info.id}' batch hash is '{sha1(files).hexdigest()}'") 
    
    batch = default_collate(batch)
    return batch

In [14]:
train_dataset = BuzzIterableDataset(
    train_dir,
    metadata_csv=os.path.join(train_dir, 'metadata.csv'), 
    shuffle=True,
    subsample=1.0,
    total_classes=CLASSIFIER_N_CLASSES
)
val_dataset = BuzzIterableDataset(
    val_as_train_dir,
    metadata_csv=os.path.join(val_as_train_dir, 'metadata.csv'), 
    subsample=1.0,
    total_classes=CLASSIFIER_N_CLASSES
)

train_dataset_transformed = BuzzIterableTransformedDataset(
    transformers=[RandomGain(debug=DEBUG_TRANSFORMATIONS), Rolling(debug=DEBUG_TRANSFORMATIONS), MixUp1Level(debug=DEBUG_TRANSFORMATIONS), ColoredNoise(debug=DEBUG_TRANSFORMATIONS)],
    buzz_iterable=train_dataset
)

# I had too much hope on the IR augmented dataset and took a wrong path...
# train_dataset_augmented = BuzzAugmentedIterableDataset(
#     train_dir_augmented, 
#     metadata_csv=os.path.join(final_dataset_path_augmented, 'metadata_grouped_train.csv'), 
#     shuffle=True,
#     subsample=0.2,
#     total_classes=CLASSIFIER_N_CLASSES
# )
# val_as_train_dataset_augmented = BuzzAugmentedIterableDataset(
#     val_as_train_dir_augmented, 
#     metadata_csv=os.path.join(final_dataset_val_as_train_path_augmented, 'metadata_grouped_train.csv'), 
#     shuffle=True,
#     subsample=0.2,
#     total_classes=CLASSIFIER_N_CLASSES
# )
    
train_dataloader = DataLoader(
    train_dataset_transformed, 
    batch_size=TRAIN_BATCH_SIZE, 
    pin_memory=True,
    num_workers=4,
    #collate_fn=debug_collate
)


In [9]:
# val_dataset = BuzzIterableDataset(
#     val_dir, 
#     metadata_csv=os.path.join(val_dir, 'metadata.csv'),
#     shuffle=False,
#     total_classes=CLASSIFIER_N_CLASSES,
# )
val_dataloader = DataLoader(val_dataset, batch_size=VAL_BATCH_SIZE)

## 3. Init pytorch objects

In [10]:
writer = SummaryWriter(os.path.join(TENSORBOARD_RUNS_PATH, TENSORBOARD_EXP_FOLDER), comment=TENSORBOARD_EXP_COMMENT)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [11]:
%%sh
nvidia-smi

Wed Jul 12 22:47:46 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:1E.0 Off |                    0 |
| N/A   43C    P0    25W /  70W |      3MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [12]:
# Default stride for t and f is 10
# NOTE: FMAX is used in frequency augmentation. Every sample has a random subset of fmin and fmax applied to it...
model = get_pretrained_passt_model(mode="all", n_classes=66, s_patchout_t=40, s_patchout_f=4)

#model = get_pretrained_passt_model(arch='passt_s_swa_p16_128_ap476', mode="all", n_classes=66, s_patchout_t=10, s_patchout_f=3)
model.to(device)



 Loading PaSST pre-trained on AudioSet (with KD) Patch 16 stride 10 structured patchout mAP=486 


PaSST(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=768, out_features=2304, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=768, out_features=768, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (drop_path): Identity()
      (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (act): GELU(approximate='none')
        (fc2): Linear(in_features=3072, out_features=768, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
   

PaSSTGDSCIface(
  (mel): AugmentMelSTFT(
    winsize=800, hopsize=320
    (freqm): FrequencyMasking()
    (timem): TimeMasking()
  )
  (net): PaSST(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (blocks): Sequential(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (drop_path): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU(approximate='none')
          (fc2): Linear(in_features=307

In [15]:
adamw = AdamW(model.net.parameters(), lr=0.00001, weight_decay=0.0001)
lr_scheduler = torch.optim.lr_scheduler.LambdaLR(adamw, get_scheduler_lambda(warm_up_len=0))

scaler = torch.cuda.amp.GradScaler()

print(f"Initial Learning rate:{lr_scheduler.get_last_lr()[0]}")
for epoch in range(0, 40):
    y_tr_pred_f1 = []
    y_tr_true_f1 = []
    train_loss = []

    model.train()
    for _, _, X_batch, y in (progress := tqdm.tqdm(train_dataloader)):
        adamw.zero_grad()        
        
        X_batch = X_batch.to(device)
        y = y.to(device)
        X_batch_spectr = model.mel(X_batch).unsqueeze(1)
        
        with  torch.cuda.amp.autocast(): 
            y_hat, emb = model.net(X_batch_spectr)     
            
            loss = F.cross_entropy(y_hat, y, reduction="none")
            loss = loss.mean()
            
            train_loss.append(loss.detach().cpu())
            progress.set_description(f"TrainL {float(loss):.2f}")
            
        # Scales loss.  Calls backward() on scaled loss to create scaled gradients.
        # Backward passes under autocast are not recommended.
        # Backward ops run in the same dtype autocast chose for corresponding forward ops.
        scaler.scale(loss).backward()
        # scaler.step() first unscales the gradients of the optimizer's assigned params.
        # If these gradients do not contain infs or NaNs, optimizer.step() is then called,
        # otherwise, optimizer.step() is skipped.
        scaler.step(adamw)
        # Updates the scale for next iteration.
        scaler.update()
        
        prob = F.softmax(y_hat.detach(), dim=1)
        y_pred = torch.argmax(prob.detach(), dim=1)
        y_tr_pred_f1.append(y_pred.cpu())
        y_tr_true_f1.append(y.cpu().argmax(dim=1))

    f_score_train = f1_score(torch.cat(y_tr_true_f1).numpy(), torch.cat(y_tr_pred_f1).numpy(), average='macro')
    total_train_loss = torch.stack(train_loss, dim=0).mean()
    print(f"=" * 80)
    print("\r\n")
    print(f"Train Loss(epoch={epoch}): {total_train_loss}")
    print(f"F1 score(train) {f_score_train}")
    print("\r\n")
    print(f"=" * 80)
    print("\r\n")

    val_loss = []
    y_val_pred_f1 = []
    y_val_true_f1 = []
    model.eval()
    
    with torch.no_grad():
        for _, _, X_batch, y_val in (progress := tqdm.tqdm(val_dataloader)):
            X_batch = X_batch.to(device)
            y_val = y_val.to(device)
            X_batch_spectr = model.mel(X_batch).unsqueeze(1)

            with torch.cuda.amp.autocast():
                y_hat, emb = model.net(X_batch_spectr)
            
                _loss = F.cross_entropy(y_hat, y_val, reduction="none")
                _loss = _loss.mean()
                val_loss.append(_loss.detach().cpu())
            
            progress.set_description(f"ValL {float(_loss):.2f}")            

            prob = F.softmax(y_hat.detach(), dim=1)
            y_pred = torch.argmax(prob.detach(), dim=1)

            y_val_pred_f1.append(y_pred.cpu())
            y_val_true_f1.append(y_val.cpu().argmax(dim=1))
            
    f1_score_val = f1_score(torch.cat(y_val_true_f1).numpy(), torch.cat(y_val_pred_f1).numpy(), average='macro')
    total_val_loss = torch.stack(val_loss, dim=0).mean()
    print(f"=" * 80)
    print("\r\n")
    print(f"Val Loss: {total_val_loss}")
    print(f"F1 score(val) {f1_score_val}")
    print("\r\n")    
    print(f"=" * 80)
    print("\r\n")
       
    writer.add_scalar('Loss/train', total_train_loss, epoch)
    writer.add_scalar('Loss/val', total_val_loss, epoch)
    writer.add_scalar('F1/train', f_score_train, epoch)
    writer.add_scalar('F1/val', f1_score_val, epoch)
    writer.add_scalar('LR', lr_scheduler.get_last_lr()[0], epoch)

    lr_scheduler.step()
    print(f"New lr: {lr_scheduler.get_last_lr()[0]}")
       
    torch.save(
        {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': adamw.state_dict(),
        }, TORCH_SAVE_PATH)

Initial Learning rate:1e-05


  return _VF.stft(input, n_fft, hop_length, win_length, window,  # type: ignore[attr-defined]


x torch.Size([32, 1, 128, 1000])
self.norm(x) torch.Size([32, 768, 12, 99])
 patch_embed :  torch.Size([32, 768, 12, 99])
 self.time_new_pos_embed.shape torch.Size([1, 768, 1, 99])
 self.freq_new_pos_embed.shape torch.Size([1, 768, 12, 1])
X Before time Patchout of 40  torch.Size([32, 768, 12, 99])
X after time Patchout torch.Size([32, 768, 12, 59])
X Before Freq Patchout of 4  torch.Size([32, 768, 12, 59])
 
 X after freq Patchout:  torch.Size([32, 768, 8, 59])
X flattened torch.Size([32, 472, 768])
 self.new_pos_embed.shape torch.Size([1, 2, 768])
 self.cls_tokens.shape torch.Size([32, 1, 768])
 self.dist_token.shape torch.Size([32, 1, 768])
 final sequence x torch.Size([32, 474, 768])


TrainL 4.03: : 0it [00:05, ?it/s]

 after 12 atten blocks x torch.Size([32, 474, 768])
forward_features torch.Size([32, 768])
head torch.Size([32, 66])


TrainL 4.06: : 356it [05:19,  1.12it/s]




Train Loss(epoch=0): 4.0191497802734375
F1 score(train) 0.03503912994163198






ValL 3.92: : 94it [02:02,  1.30s/it]




Val Loss: 3.5901899337768555
F1 score(val) 0.06199552468974685




New lr: 1e-05


TrainL 1.04: : 356it [05:21,  1.11it/s]




Train Loss(epoch=1): 3.102017402648926
F1 score(train) 0.12871242078816938






ValL 2.55: : 94it [02:05,  1.34s/it]




Val Loss: 2.474287509918213
F1 score(val) 0.20882271640849076




New lr: 1e-05


TrainL 3.30: : 356it [05:21,  1.11it/s]




Train Loss(epoch=2): 2.3887648582458496
F1 score(train) 0.2531373864639941






ValL 0.89: : 94it [02:01,  1.29s/it]




Val Loss: 1.9398194551467896
F1 score(val) 0.34329923464133477




New lr: 1e-05


TrainL 2.71: : 356it [05:26,  1.09it/s]




Train Loss(epoch=3): 2.068187713623047
F1 score(train) 0.3696614701496375






ValL 1.69: : 94it [01:54,  1.21s/it]




Val Loss: 1.6670383214950562
F1 score(val) 0.40784726339613886




New lr: 1e-05


TrainL 1.43: : 356it [05:11,  1.14it/s]




Train Loss(epoch=4): 1.8168284893035889
F1 score(train) 0.44343901311956246






ValL 1.95: : 94it [02:01,  1.29s/it]




Val Loss: 1.4778447151184082
F1 score(val) 0.4663362251372076




New lr: 1e-05


TrainL 0.55: : 356it [05:09,  1.15it/s]




Train Loss(epoch=5): 1.6471351385116577
F1 score(train) 0.4797286664827826






ValL 0.81: : 94it [01:55,  1.23s/it]




Val Loss: 1.2829912900924683
F1 score(val) 0.5044269338478728




New lr: 1e-05


TrainL 1.79: : 356it [05:12,  1.14it/s]




Train Loss(epoch=6): 1.5304661989212036
F1 score(train) 0.5186742609715238






ValL 1.56: : 94it [01:56,  1.24s/it]




Val Loss: 1.1991761922836304
F1 score(val) 0.50286814093297




New lr: 1e-05


TrainL 2.05: : 356it [05:04,  1.17it/s]




Train Loss(epoch=7): 1.413121223449707
F1 score(train) 0.5686447390876169






ValL 2.26: : 94it [01:54,  1.22s/it]




Val Loss: 1.1090487241744995
F1 score(val) 0.5350794587063928




New lr: 1e-05


TrainL 3.10: : 356it [05:11,  1.14it/s]




Train Loss(epoch=8): 1.3115798234939575
F1 score(train) 0.5988239390427088






ValL 1.00: : 94it [02:02,  1.30s/it]




Val Loss: 1.0119478702545166
F1 score(val) 0.5540289628768987




New lr: 1e-05


TrainL 1.05: : 356it [05:20,  1.11it/s]




Train Loss(epoch=9): 1.25967538356781
F1 score(train) 0.6160160267503003






ValL 1.28: : 94it [01:58,  1.26s/it]




Val Loss: 0.9460635185241699
F1 score(val) 0.5870757679833669




New lr: 1e-05


TrainL 0.82: : 356it [05:05,  1.16it/s]




Train Loss(epoch=10): 1.1927146911621094
F1 score(train) 0.6504034307247551






ValL 1.07: : 94it [02:02,  1.30s/it]




Val Loss: 0.8865995407104492
F1 score(val) 0.6211074085215762




New lr: 1e-05


TrainL 0.77: : 356it [05:18,  1.12it/s]




Train Loss(epoch=11): 1.1515253782272339
F1 score(train) 0.6730980300608043






ValL 2.03: : 94it [02:01,  1.30s/it]




Val Loss: 0.8569907546043396
F1 score(val) 0.6499668838988828




New lr: 1e-05


TrainL 1.64: : 356it [05:17,  1.12it/s]




Train Loss(epoch=12): 1.1279886960983276
F1 score(train) 0.6881459951203396






ValL 0.66: : 94it [02:01,  1.30s/it]




Val Loss: 0.808559000492096
F1 score(val) 0.6542796216467698




New lr: 1e-05


TrainL 1.35: : 356it [05:16,  1.12it/s]




Train Loss(epoch=13): 1.0587238073349
F1 score(train) 0.709708434906591






ValL 0.80: : 94it [02:01,  1.30s/it]




Val Loss: 0.7760686874389648
F1 score(val) 0.6931583502968176




New lr: 1e-05


TrainL 0.85: : 356it [05:18,  1.12it/s]




Train Loss(epoch=14): 1.0298449993133545
F1 score(train) 0.7134580219986885






ValL 0.12: : 94it [02:01,  1.30s/it]




Val Loss: 0.7386535406112671
F1 score(val) 0.7025180647211339




New lr: 1e-05


TrainL 0.79: : 356it [05:12,  1.14it/s]




Train Loss(epoch=15): 0.997533917427063
F1 score(train) 0.7307301588227






ValL 0.43: : 94it [01:54,  1.22s/it]




Val Loss: 0.7440552115440369
F1 score(val) 0.6951081937701663




New lr: 1e-05


TrainL 1.42: : 356it [05:07,  1.16it/s]




Train Loss(epoch=16): 0.9912154674530029
F1 score(train) 0.7319849137472513






ValL 2.17: : 94it [01:55,  1.22s/it]




Val Loss: 0.7122585773468018
F1 score(val) 0.7106874968551333




New lr: 1e-05


TrainL 1.29: : 356it [05:03,  1.17it/s]




Train Loss(epoch=17): 0.9823962450027466
F1 score(train) 0.7477386957456257






ValL 0.86: : 94it [01:59,  1.27s/it]




Val Loss: 0.6836481094360352
F1 score(val) 0.720241515387606




New lr: 1e-05


TrainL 0.90: : 356it [05:16,  1.13it/s]




Train Loss(epoch=18): 0.9584404826164246
F1 score(train) 0.7463531660147069






ValL 0.19: : 94it [01:57,  1.25s/it]




Val Loss: 0.6436178088188171
F1 score(val) 0.7224419052996994




New lr: 1e-05


TrainL 1.11: : 356it [05:06,  1.16it/s]




Train Loss(epoch=19): 0.876545250415802
F1 score(train) 0.7690239798700641






ValL 0.81: : 94it [02:02,  1.31s/it]




Val Loss: 0.630730390548706
F1 score(val) 0.7341721516434756




New lr: 1e-05


TrainL 1.15: : 356it [05:15,  1.13it/s]




Train Loss(epoch=20): 0.8601989150047302
F1 score(train) 0.7743863535598857






ValL 0.07: : 94it [02:02,  1.30s/it]




Val Loss: 0.6211017370223999
F1 score(val) 0.7316179850547538




New lr: 9.505e-06


TrainL 0.35: : 356it [05:18,  1.12it/s]




Train Loss(epoch=21): 0.90526282787323
F1 score(train) 0.7707402939445266






ValL 0.90: : 94it [01:56,  1.24s/it]




Val Loss: 0.604657769203186
F1 score(val) 0.7298826260022072




New lr: 9.01e-06


TrainL 0.89: : 356it [05:03,  1.17it/s]




Train Loss(epoch=22): 0.8738616704940796
F1 score(train) 0.7902502821493805






ValL 2.77: : 94it [01:54,  1.22s/it]




Val Loss: 0.6151306629180908
F1 score(val) 0.7507237882777155




New lr: 8.515e-06


TrainL 1.22: : 356it [05:15,  1.13it/s]




Train Loss(epoch=23): 0.8430383205413818
F1 score(train) 0.794946473985425






ValL 0.18: : 94it [02:02,  1.30s/it]




Val Loss: 0.6026797890663147
F1 score(val) 0.7365676317334676




New lr: 8.020000000000001e-06


TrainL 0.39: : 356it [05:15,  1.13it/s]




Train Loss(epoch=24): 0.8164146542549133
F1 score(train) 0.7915879982302634






ValL 0.68: : 94it [02:01,  1.29s/it]




Val Loss: 0.5929185152053833
F1 score(val) 0.7556254366328571




New lr: 7.525e-06


TrainL 0.39: : 356it [05:02,  1.18it/s]




Train Loss(epoch=25): 0.8286821246147156
F1 score(train) 0.800428294079025






ValL 0.61: : 94it [01:54,  1.22s/it]




Val Loss: 0.5819605588912964
F1 score(val) 0.746308041987954




New lr: 7.0300000000000005e-06


TrainL 0.48: : 356it [05:03,  1.17it/s]




Train Loss(epoch=26): 0.7986461520195007
F1 score(train) 0.8111201627725755






ValL 0.30: : 94it [01:59,  1.27s/it]




Val Loss: 0.5827895998954773
F1 score(val) 0.7560683931419209




New lr: 6.535e-06


TrainL 0.87: : 356it [05:15,  1.13it/s]




Train Loss(epoch=27): 0.798811137676239
F1 score(train) 0.8069447254984242






ValL 0.30: : 94it [02:02,  1.30s/it]




Val Loss: 0.5574008226394653
F1 score(val) 0.7659765517680422




New lr: 6.040000000000001e-06


TrainL 0.72: : 356it [05:07,  1.16it/s]




Train Loss(epoch=28): 0.8233628273010254
F1 score(train) 0.8145470235067173






ValL 0.18: : 94it [01:54,  1.22s/it]




Val Loss: 0.5479687452316284
F1 score(val) 0.7649221260341371




New lr: 5.545e-06


TrainL 0.79: : 356it [05:10,  1.15it/s]




Train Loss(epoch=29): 0.8038310408592224
F1 score(train) 0.8124190019788907






ValL 1.19: : 94it [02:02,  1.30s/it]




Val Loss: 0.5590080618858337
F1 score(val) 0.7733298690283582




New lr: 5.050000000000001e-06


TrainL 2.02: : 356it [05:16,  1.13it/s]




Train Loss(epoch=30): 0.7771840691566467
F1 score(train) 0.8240537180771756






ValL 0.16: : 94it [02:02,  1.30s/it]




Val Loss: 0.5508750081062317
F1 score(val) 0.7748819211401844




New lr: 4.5550000000000004e-06


TrainL 0.48: : 356it [05:15,  1.13it/s]




Train Loss(epoch=31): 0.8205325603485107
F1 score(train) 0.8128924942827513






ValL 0.15: : 94it [02:01,  1.29s/it]




Val Loss: 0.5467661619186401
F1 score(val) 0.7732685262485854




New lr: 4.060000000000001e-06


TrainL 0.47: : 356it [05:15,  1.13it/s]




Train Loss(epoch=32): 0.7676957249641418
F1 score(train) 0.8136925386805486






ValL 0.15: : 94it [02:02,  1.30s/it]




Val Loss: 0.54839688539505
F1 score(val) 0.773720606074539




New lr: 3.565e-06


TrainL 0.56: : 356it [05:16,  1.12it/s]




Train Loss(epoch=33): 0.7815594673156738
F1 score(train) 0.8205093871221589






ValL 0.14: : 94it [02:02,  1.30s/it]




Val Loss: 0.539027988910675
F1 score(val) 0.77463001435081




New lr: 3.0700000000000003e-06


TrainL 1.00: : 356it [05:09,  1.15it/s]




Train Loss(epoch=34): 0.7604636549949646
F1 score(train) 0.8246950305423943






ValL 0.39: : 94it [01:56,  1.24s/it]




Val Loss: 0.5473848581314087
F1 score(val) 0.7716488460151824




New lr: 2.5750000000000003e-06


TrainL 0.54: : 356it [05:06,  1.16it/s]




Train Loss(epoch=35): 0.7496727108955383
F1 score(train) 0.8407109123887969






ValL 1.16: : 94it [02:01,  1.29s/it]




Val Loss: 0.5381060242652893
F1 score(val) 0.7740959365543839




New lr: 2.0800000000000004e-06


TrainL 0.84: : 356it [05:03,  1.17it/s]




Train Loss(epoch=36): 0.7662928700447083
F1 score(train) 0.8347666557353247






ValL 0.20: : 94it [01:54,  1.22s/it]




Val Loss: 0.5349243879318237
F1 score(val) 0.773113583105099




New lr: 1.585e-06


TrainL 0.54: : 356it [05:05,  1.17it/s]




Train Loss(epoch=37): 0.7630372047424316
F1 score(train) 0.8371275621184643






ValL 0.67: : 94it [01:54,  1.22s/it]




Val Loss: 0.5360065698623657
F1 score(val) 0.775971954544219




New lr: 1.0900000000000002e-06


TrainL 0.81: : 356it [05:07,  1.16it/s]




Train Loss(epoch=38): 0.7713930010795593
F1 score(train) 0.81629155079209






ValL 0.12: : 94it [01:54,  1.22s/it]




Val Loss: 0.5338691473007202
F1 score(val) 0.7772203203664688




New lr: 5.950000000000001e-07


TrainL 0.96: : 356it [05:08,  1.15it/s]




Train Loss(epoch=39): 0.7524335980415344
F1 score(train) 0.8378362897276539






ValL 0.06: : 94it [02:01,  1.29s/it]




Val Loss: 0.5331915616989136
F1 score(val) 0.7774776150207878




New lr: 1.0000000000000001e-07


In [None]:
model_eval = get_pretrained_passt_model(mode="all", n_classes=66, s_patchout_t=0, s_patchout_f=0)
state = torch.load(TORCH_SAVE_PATH)
model_eval.load_state_dict(state['model_state_dict'])

In [7]:
model_eval = model_eval.cuda()

In [8]:
    val_loss = 0.0
    ypred_f1 = []
    ytrue_f1 = []
    model_eval.eval()
    
    with torch.no_grad():
        for batch, y_val in tqdm.tqdm(val_dataloader):
            batch = batch.cuda()
            y_val = y_val.cuda()

            y_hat, emb = model_eval(batch)
            _loss = F.cross_entropy(y_hat, y_val, reduction="none")

            prob = F.softmax(y_hat, dim=1)
            y_pred = torch.argmax(prob, dim=1)

            ypred_f1.append(y_pred.cpu())
            ytrue_f1.append(y_val.cpu())

            val_loss += _loss.sum()

    f1_score_val = f1_score(torch.cat(ytrue_f1).numpy(), torch.cat(ypred_f1).numpy(), average='macro')

    print(f"Val Loss: {val_loss}")
    print(f"=" * 80)
    print(f"F1 score(val) {f1_score_val}")
    print(f"=" * 80)

  return _VF.stft(input, n_fft, hop_length, win_length, window,  # type: ignore[attr-defined]


x torch.Size([12, 1, 128, 1000])
self.norm(x) torch.Size([12, 768, 12, 99])
 patch_embed :  torch.Size([12, 768, 12, 99])
 self.time_new_pos_embed.shape torch.Size([1, 768, 1, 99])
 self.freq_new_pos_embed.shape torch.Size([1, 768, 12, 1])
X flattened torch.Size([12, 1188, 768])
 self.new_pos_embed.shape torch.Size([1, 2, 768])
 self.cls_tokens.shape torch.Size([12, 1, 768])
 self.dist_token.shape torch.Size([12, 1, 768])
 final sequence x torch.Size([12, 1190, 768])
 after 12 atten blocks x torch.Size([12, 1190, 768])
forward_features torch.Size([12, 768])
head torch.Size([12, 66])


100%|██████████| 169/169 [03:09<00:00,  1.12s/it]

Val Loss: 1069.6846923828125
F1 score(val) 0.8392231749961314





In [None]:
%%sh
nvidia-smi


## Miscellaneous

### Glossary

SWA - Stochastic weight averaging

### Model stored weights
```
Downloading: "https://github.com/kkoutini/PaSST/releases/download/v0.0.1-audioset/passt-s-f128-p16-s10-ap.476-swa.pt" to /root/.cache/torch/hub/checkpoints/passt-s-f128-p16-s10-ap.476-swa.pt
```