In [1]:
import audioread
import logging
import os
import random
import time
import warnings

import librosa
import librosa.display as display
import numpy as np
import pandas as pd
import soundfile as sf
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from contextlib import contextmanager
from IPython.display import Audio
from pathlib import Path
from typing import Optional, List

from catalyst.dl import Callback
#from catalyst.runners import Runner
from fastprogress import progress_bar
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, average_precision_score

import pandas as pd
import yaml
import torchaudio
from tqdm import tqdm
#from functools import partial
from sklearn import metrics
from datasets import PANNsDataset
from panns_models import *
from evaluate import Evaluator, StatisticsContainer
from losses import PANNsLoss

2023-07-18 22:59:04.242061: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-07-18 22:59:05.909684: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-07-18 22:59:05.914059: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = True  # type: ignore
    
    
def get_logger(out_file=None):
    logger = logging.getLogger()
    formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
    logger.handlers = []
    logger.setLevel(logging.INFO)

    handler = logging.StreamHandler()
    handler.setFormatter(formatter)
    handler.setLevel(logging.INFO)
    logger.addHandler(handler)

    if out_file is not None:
        fh = logging.FileHandler(out_file)
        fh.setFormatter(formatter)
        fh.setLevel(logging.INFO)
        logger.addHandler(fh)
    logger.info("logger set up")
    return logger
    
    
@contextmanager
def timer(name: str, logger: Optional[logging.Logger] = None):
    t0 = time.time()
    msg = f"[{name}] start"
    if logger is None:
        print(msg)
    else:
        logger.info(msg)
    yield

    msg = f"[{name}] done in {time.time() - t0:.2f} s"
    if logger is None:
        print(msg)
    else:
        logger.info(msg)
    
    
set_seed(1213)

In [3]:
import yaml
with open("./confs/default.yaml", "r") as f:
        configs = yaml.safe_load(f)

In [4]:
#ROOT = Path.cwd().parent
#INPUT_ROOT = ROOT / "input"
#RAW_DATA = INPUT_ROOT / "birdsong-recognition"
#TRAIN_AUDIO_DIR = RAW_DATA / "train_audio"
TRAIN_DATA = configs["data"]["synth_tsv"]
TRAIN_RESAMPLED_AUDIO_DIRS = configs["data"]["synth_folder"]
TEST_AUDIO_DIR = configs["data"]["test_folder"]
train_df = pd.read_csv(TRAIN_DATA, sep = "\t")

VAL_DATA = configs["data"]["synth_val_tsv"]
VAL_AUDIO_DIR = configs["data"]["synth_val_folder"]

val_df = pd.read_csv(TRAIN_DATA, sep = "\t")

In [5]:
SAMPLE_RATE = configs["data"]["fs"]
N_FFT = configs["feats"]["n_window"]
WIN_LENGTH = configs["feats"]["n_window"]
HOP_LENGTH = configs["feats"]["hop_length"]
F_MIN = configs["feats"]["f_min"]
F_MAX = configs["feats"]["f_max"]
N_MELS = configs["feats"]["n_mels"]
WINDOW_FN = torch.hamming_window
WKWARGS = {"periodic": False}
POWER = 1
NUM_SAMPLES = SAMPLE_RATE

LEARNING_RATE = configs["opt"]["lr"]
epochs = 5
BATCH_SIZE = 8

    #frame_length_in_seconds
frame_length_sec = HOP_LENGTH / SAMPLE_RATE

"""model_config = {
    "sample_rate": SAMPLE_RATE,
    "window_size": WIN_LENGTH,
    "hop_size": HOP_LENGTH,
    "mel_bins": N_MELS,
    "fmin": F_MIN,
    "fmax": F_MAX,
    "classes_num": 10
}"""

#model = PANNsCNN14Att(**model_config)

'model_config = {\n    "sample_rate": SAMPLE_RATE,\n    "window_size": WIN_LENGTH,\n    "hop_size": HOP_LENGTH,\n    "mel_bins": N_MELS,\n    "fmin": F_MIN,\n    "fmax": F_MAX,\n    "classes_num": 10\n}'

In [6]:
device = "cuda" if torch.cuda.is_available() else "cpu"
train_dataset = PANNsDataset(annotations_file = configs["data"]["synth_tsv"], 
                                          audio_dir = configs["data"]["synth_folder"], 
                                          transformation = None, 
                                          target_sample_rate = SAMPLE_RATE,
                                          num_samples = NUM_SAMPLES,
                                          device = device)

val_dataset = PANNsDataset(annotations_file = configs["data"]["synth_val_tsv"],
                                          audio_dir = configs["data"]["synth_val_folder"],
                                          transformation = None, 
                                          target_sample_rate = SAMPLE_RATE,
                                          num_samples = NUM_SAMPLES,
                                          device = device)
# loaders
loaders = {
    "train": DataLoader(train_dataset, 
                             batch_size= 2, 
                             shuffle=False),
    "valid": DataLoader(val_dataset, 
                             batch_size=2, 
                             shuffle=False)
}

In [7]:
train_dataset[0]["waveform"].shape

torch.Size([16000])

In [8]:
model_config = {
    "sample_rate": 16000,
    "window_size": 1024,
    "hop_size": 320,
    "mel_bins": 64,
    "fmin": 50,
    "fmax": 14000,
    "classes_num": 10
}

In [9]:
logdir = "./log_dir"

In [10]:
# model
model_config["classes_num"] = 527
model = PANNsCNN14Att(**model_config)
weights = torch.load("Cnn14_DecisionLevelAtt_mAP0.425.pth", map_location = "cpu")
# Fixed in V3
model.load_state_dict(weights["model"])
model.att_block = AttBlock(2048, 10, activation='sigmoid')
#model.att_block.init_weights()
model.to(device)

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Scheduler
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

# Loss
criterion = PANNsLoss().to(device)
#F1Callback(input_key="targets", output_key="logits", prefix="f1")
# callbacks
#callbacks = [
    
#    mAPCallback(input_key="targets", output_key="logits", prefix="mAP"),
 #   CheckpointCallback(save_best =0, logdir = logdir)
#]

  self.melW = librosa.filters.mel(sr=sr, n_fft=n_fft, n_mels=n_mels,


In [11]:
evaluator = Evaluator(model=model)

In [12]:
for batch in loaders["train"]:
    print(batch["waveform"].shape)    #print(b)
    break

torch.Size([2, 16000])


In [13]:
def _lwlrap_sklearn(truth, scores):
    """Reference implementation from https://colab.research.google.com/drive/1AgPdhSp7ttY18O3fEoHOQKlt_3HJDLi8"""
    sample_weight = np.sum(truth > 0, axis=1)
    nonzero_weight_sample_indices = np.flatnonzero(sample_weight > 0)
    overall_lwlrap = metrics.label_ranking_average_precision_score(
        truth[nonzero_weight_sample_indices, :] > 0, 
        scores[nonzero_weight_sample_indices, :], 
        sample_weight=sample_weight[nonzero_weight_sample_indices])
    return overall_lwlrap

class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

class MetricMeter(object):
    def __init__(self):
        self.reset()
    
    def reset(self):
        self.y_true = []
        self.y_pred = []
    
    def update(self, y_true, y_pred):
        self.y_true.extend(y_true.cpu().detach().numpy().tolist())
        self.y_pred.extend(y_pred.cpu().detach().numpy().tolist())

    @property
    def avg(self):
        #score_class, weight = lwlrap(np.array(self.y_true), np.array(self.y_pred))
        self.score = _lwlrap_sklearn(np.array(self.y_true), np.array(self.y_pred)) #(score_class * weight).sum()
        return {
            "lwlrap" : self.score
        }

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

In [14]:
best_lwlrap = -np.inf
early_stop_count = 0

In [15]:
statistics_path = configs["data"]["statistics"]

In [16]:
if not os.path.isdir(statistics_path):
    os.mkdir(statistics_path)

In [17]:
statistics_container = StatisticsContainer(statistics_path)

In [16]:
def train_epoch(device, model, loader, criterion, optimizer, scheduler, epoch):
    losses = AverageMeter()
    scores = MetricMeter()

    model.train()
    t = tqdm(loader)
    for i, sample in enumerate(t):
        optimizer.zero_grad()
        input = sample['waveform'].to(device)
        target = sample['targets'].to(device)
        output = model(input)
        #print(output)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        #if scheduler and args.step_scheduler:
            #scheduler.step()

        bs = input.size(0)
        scores.update(target, torch.sigmoid(torch.max(output['framewise_output'], dim=1)[0]))
        losses.update(loss.item(), bs)

        t.set_description(f"Train E:{epoch} - Loss{losses.avg:0.4f}")
    t.close()
    return scores.avg, losses.avg
        
def valid_epoch(device, model, loader, criterion, epoch):
    
    
    logging.info('Validate bal mAP: {:.3f}'.format(
                np.mean(eval_statistics['average_precision'])))
    losses = AverageMeter()
    scores = MetricMeter()
    model.eval()
    with torch.no_grad():
        t = tqdm(loader)
        for i, sample in enumerate(t):
            eval_statistics = evaluator.evaluate(sample)
            statistics_container.append(i, eval_statistics, data_type='eval')
            logging.info('Validate bal mAP: {:.3f}'.format(
                np.mean(eval_statistics['average_precision'])))
            print('Validate bal mAP: {:.3f}'.format(
                np.mean(eval_statistics['average_precision'])))
            input = sample['waveform'].to(device)
            target = sample['targets'].to(device)
            output = model(input)
            loss = criterion(output, target)

            bs = input.size(0)
            scores.update(target, torch.sigmoid(torch.max(output['framewise_output'], dim=1)[0]))
            losses.update(loss.item(), bs)
            t.set_description(f"Valid E:{epoch} - Loss:{losses.avg:0.4f}")
    t.close()
    return scores.avg, losses.avg

def test_epoch(device, model, loader):
    model.eval()
    pred_list = []
    id_list = []
    with torch.no_grad():
        t = tqdm(loader)
        for i, sample in enumerate(t):
            input = sample["image"].to(device)
            bs, seq, w = input.shape
            input = input.reshape(bs*seq, w)
            id = sample["id"]
            output = model(input)
            output = torch.sigmoid(torch.max(output['framewise_output'], dim=1)[0])
            output = output.reshape(bs, seq, -1)
            output = torch.sum(output, dim=1)
            #output, _ = torch.max(output, dim=1)
            output = output.cpu().detach().numpy().tolist()
            pred_list.extend(output)
            id_list.extend(id)
    
    return pred_list, id_list

In [None]:
#criterion = PANNsLoss()
best_lwlrap = -np.inf
early_stop_count = 0
scheduler = None
save_path = "./log_dir"
exp_name = "logging"
early_stop = 15

for epoch in range(2):
        train_avg, train_loss = train_epoch(device, model, loaders["train"], criterion, optimizer, scheduler, epoch)
        valid_avg, valid_loss = valid_epoch(device, model, loaders["valid"], criterion, epoch)
        
        #if args.epoch_scheduler:
           # scheduler.step()
        
        content = f"""
                {time.ctime()} \n
                Epoch:{epoch}, lr:{optimizer.param_groups[0]['lr']:.7}\n
                Train Loss:{train_loss:0.4f} - LWLRAP:{train_avg['lwlrap']:0.4f}\n
                Valid Loss:{valid_loss:0.4f} - LWLRAP:{valid_avg['lwlrap']:0.4f}\n
        """
        with open(f'{save_path}/log_{exp_name}.txt', 'a') as appender:
            appender.write(content+'\n')
        
        if valid_avg['lwlrap'] > best_lwlrap:
            print(f"########## >>>>>>>> Model Improved From {best_lwlrap} ----> {valid_avg['lwlrap']}")
            torch.save(model.state_dict(), save_path+'.bin')
            best_lwlrap = valid_avg['lwlrap']
            early_stop_count = 0
        else:
            early_stop_count += 1
        #torch.save(model.state_dict(), os.path.join(args.save_path, f'fold-{args.fold}_last.bin'))

        if early_stop == early_stop_count:
            print("\n $$$ ---? Ohoo.... we reached early stoping count :", early_stop_count)
            break
    
model.load_state_dict(torch.load(save_path+'.bin'), map_location=device)
model = model.to(device)



In [None]:
"""target_cols = sub_df.columns[1:].values.tolist()
test_pred, ids = test_epoch(device, model, test_loader)
print(np.array(test_pred).shape)

test_pred_df = pd.DataFrame({
        "recording_id" : sub_df.recording_id.values
    })
test_pred_df[target_cols] = test_pred
test_pred_df.to_csv(save_path+'.bin'+"-submission.csv", index=False)
print(os.path.join(save_path, f"-submission.csv"))
        
        #print(content)"""

In [None]:
target_cols = sub_df.columns[1:].values.tolist()
test_pred, ids = test_epoch(device, model, test_loader)
print(np.array(test_pred).shape)

test_pred_df = pd.DataFrame({
        "recording_id" : sub_df.recording_id.values
    })
test_pred_df[target_cols] = test_pred
test_pred_df.to_csv(save_path+'.bin'+"-submission.csv", index=False)
print(os.path.join(save_path, f"-submission.csv"))
        
        #print(content)

In [None]:
"""warnings.simplefilter("ignore")

runner = SupervisedRunner(
    input_key="waveform",
    target_key="targets")

runner.train(
    model=model,
    criterion=criterion,
    loaders=loaders,
    optimizer=optimizer,
    scheduler=scheduler,
    num_epochs=10,
    verbose=True,
    logdir=f"fold0",
    callbacks=callbacks)"""