In [None]:
!nvidia-smi

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from google.colab import auth
auth.authenticate_user()

In [None]:
!pip install timm torchaudio evaluations wandb audiomentations acoustics google-cloud-secret-manager torchlibrosa > /dev/null

In [None]:
from google.cloud import secretmanager

def access_secret(project_id, secret_name, version='latest'):
    client = secretmanager.SecretManagerServiceClient()
    name = client.secret_version_path(project_id, secret_name, version)
    response = client.access_secret_version(request={"name":name})
    payload = response.payload.data.decode("UTF-8")
    return payload

PROJECT_ID = "cyberagent-312"
SECRET_NAME  = "wandb"
wandb_key = access_secret(PROJECT_ID, SECRET_NAME)

!wandb login {wandb_key}

In [None]:
!mkdir -p birdclef-2021

!cp -r ./drive/MyDrive/Study/BirdCLEF/input/birdclef-2021/train_soundscapes birdclef-2021
!cp -r ./drive/MyDrive/Study/BirdCLEF/input/birdclef-2021/train_metadata.csv birdclef-2021
!cp -r ./drive/MyDrive/Study/BirdCLEF/input/birdclef-2021/train_soundscape_labels.csv birdclef-2021

In [None]:
!pip install -q kaggle
!mkdir -p .kaggle
!cp "./drive/My Drive/Study/config/kaggle.json" .kaggle/
!chmod 600 .kaggle/kaggle.json
!mv .kaggle /root

In [None]:
%%time
# 3分くらい

# 500-400
#!kaggle datasets download takamichitoda/birdclef-split-audio-frequency-500400
#!unzip birdclef-split-audio-frequency-500400.zip -d birdclef-2021 > /dev/null
#!rm birdclef-split-audio-frequency-500400.zip 


# 400-300
#!kaggle datasets download takamichitoda/birdclef-split-audio-by-label-frequency-400300
#!unzip birdclef-split-audio-by-label-frequency-400300.zip -d birdclef-2021 > /dev/null
#!rm birdclef-split-audio-by-label-frequency-400300.zip

# 300-250
!kaggle datasets download takamichitoda/birdclef-split-audio-frequency-300250
!unzip birdclef-split-audio-frequency-300250.zip -d birdclef-2021 > /dev/null
!rm birdclef-split-audio-frequency-300250.zip

In [None]:
import os
import librosa
import psutil
import torch.nn as nn
import random

import numpy as np
import pandas as pd
import soundfile as sf

import matplotlib.pyplot as plt

import albumentations as A
from torchvision import transforms

from sklearn.model_selection import StratifiedKFold

import torch
from torch.nn import functional as F
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR

import wandb
import timm
from tqdm.notebook import tqdm as tqdm_notebook

import torchaudio
from torchaudio.transforms import MelSpectrogram, AmplitudeToDB
from IPython.display import Audio
from audiomentations import Compose, AddGaussianNoise, AddBackgroundNoise, AddGaussianSNR, AddShortNoises, Gain
import acoustics

from sklearn.metrics import precision_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import f1_score
from sklearn.metrics import average_precision_score
from evaluations.kaggle_2020 import row_wise_micro_averaged_f1_score
from torchlibrosa.augmentation import DropStripes
from pathlib import Path

device = torch.device("cuda")

In [None]:
"""def load_mel_spec(data):
    mel_spec = librosa.feature.melspectrogram(y=data, 
                                              sr=32000, 
                                              n_fft=2048, 
                                              n_mels=128, 
                                              fmin=20, 
                                              fmax=16000)
    mel_spec = librosa.power_to_db(mel_spec, ref=np.max) 
    return mel_spec

wave, sr = sf.read(f"birdclef-2021/daejun/XC269060.ogg")
spec_x = load_mel_spec(wave)

idx = 0
plt.imshow(spec_x[:, 313*idx:313*(idx+1)])
Audio(wave, rate=32000)"""

In [None]:
class config:
    EXP_NUM = "0024"
    EXP_NAME = "frame_max_loss"
    # data setting
    INPUT_ROOT = "/content/birdclef-2021"
    WORK_ROOT = "/content"
    OUTPUT_ROOT = "/content/drive/MyDrive/Study/BirdCLEF/output"
    NOISE_ROOT = "/content/noise/"
    LABEL_FREQ = "300-250"
    # audio setting
    SAMPLE_RATE = 32000
    FMIN = 20
    FMAX = 16000
    N_FFT = 2048
    SPEC_HEIGHT = 128
    PERIOD = 5
    HOP_LENGTH = 512
    # AudioAugument
    MAX_SNR_IN_DB = 100
    MIN_SNR_IN_DB = 3
    # ML setting
    SEED = 416
    BATCH_SIZE = 64
    MODEL_NAME = "resnet18"
    LEARNING_RATE = 1e-3
    T_MAX = 5
    NUM_EPOCHS = 5
    N_ACCUMULATE = 1
    LABEL_SMOOTHING = 0.1
    # infer setting
    THRESHOLD = 0.5
print("exp number:", config.EXP_NUM)
print("detail:", config.EXP_NAME)

In [None]:
def set_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [None]:
# https://github.com/karolpiczak/ESC-50
if not os.path.exists(config.NOISE_ROOT):

    os.makedirs(f"{config.NOISE_ROOT}", exist_ok=True)

    os.makedirs(f"{config.NOISE_ROOT}/simple", exist_ok=True)

    brown_noise = acoustics.generator.brown(5*config.SAMPLE_RATE)
    pink_noise = acoustics.generator.pink(5*config.SAMPLE_RATE, np.random.RandomState(config.SEED))

    sf.write(f"{config.NOISE_ROOT}/simple/brown_noise.wav", brown_noise, samplerate=config.SAMPLE_RATE)
    sf.write(f"{config.NOISE_ROOT}/simple/pink_noise.wav", pink_noise, samplerate=config.SAMPLE_RATE)

    !git clone https://github.com/karolpiczak/ESC-50.git

    esc50_meta_df = pd.read_csv("ESC-50/meta/esc50.csv")
    airplane_fnames = esc50_meta_df.query("category=='airplane'")["filename"]
    rain_fnames = esc50_meta_df.query("category=='rain'")["filename"]
    wind_fnames = esc50_meta_df.query("category=='wind'")["filename"]
    insects_fnames = esc50_meta_df.query("category=='insects'")["filename"]
    engine_fnames = esc50_meta_df.query("category=='engine'")["filename"]
    crickets_fnames = esc50_meta_df.query("category=='crickets'")["filename"]
    water_drops_fnames = esc50_meta_df.query("category=='water_drops'")["filename"]
    crackling_fire_fnames = esc50_meta_df.query("category=='crackling_fire'")["filename"]
    frog_fnames = esc50_meta_df.query("category=='frog'")["filename"]

    os.makedirs(f"{config.NOISE_ROOT}/airplane", exist_ok=True)
    os.makedirs(f"{config.NOISE_ROOT}/rain", exist_ok=True)
    os.makedirs(f"{config.NOISE_ROOT}/wind", exist_ok=True)
    os.makedirs(f"{config.NOISE_ROOT}/insects", exist_ok=True)
    os.makedirs(f"{config.NOISE_ROOT}/engine", exist_ok=True)
    os.makedirs(f"{config.NOISE_ROOT}/crickets", exist_ok=True)
    os.makedirs(f"{config.NOISE_ROOT}/water_drops", exist_ok=True)
    os.makedirs(f"{config.NOISE_ROOT}/crackling_fire", exist_ok=True)
    os.makedirs(f"{config.NOISE_ROOT}/frog", exist_ok=True)

    for fname in airplane_fnames:
        wave, sr = sf.read(f"ESC-50/audio/{fname}")
        sf.write(f"{config.NOISE_ROOT}/airplane/{fname}.wav", wave, samplerate=config.SAMPLE_RATE)
    for fname in rain_fnames:
        wave, sr = sf.read(f"ESC-50/audio/{fname}")
        sf.write(f"{config.NOISE_ROOT}/rain/{fname}.wav", wave, samplerate=config.SAMPLE_RATE)
    for fname in wind_fnames:
        wave, sr = sf.read(f"ESC-50/audio/{fname}")
        sf.write(f"{config.NOISE_ROOT}/wind/{fname}.wav", wave, samplerate=config.SAMPLE_RATE)
    for fname in insects_fnames:
        wave, sr = sf.read(f"ESC-50/audio/{fname}")
        sf.write(f"{config.NOISE_ROOT}/insects/{fname}.wav", wave, samplerate=config.SAMPLE_RATE)
    for fname in engine_fnames:
        wave, sr = sf.read(f"ESC-50/audio/{fname}")
        sf.write(f"{config.NOISE_ROOT}/engine/{fname}.wav", wave, samplerate=config.SAMPLE_RATE)
    for fname in crickets_fnames:
        wave, sr = sf.read(f"ESC-50/audio/{fname}")
        sf.write(f"{config.NOISE_ROOT}/crickets/{fname}.wav", wave, samplerate=config.SAMPLE_RATE)
    for fname in water_drops_fnames:
        wave, sr = sf.read(f"ESC-50/audio/{fname}")
        sf.write(f"{config.NOISE_ROOT}/water_drops/{fname}.wav", wave, samplerate=config.SAMPLE_RATE)
    for fname in crackling_fire_fnames:
        wave, sr = sf.read(f"ESC-50/audio/{fname}")
        sf.write(f"{config.NOISE_ROOT}/crackling_fire/{fname}.wav", wave, samplerate=config.SAMPLE_RATE)
    for fname in frog_fnames:
        wave, sr = sf.read(f"ESC-50/audio/{fname}")
        sf.write(f"{config.NOISE_ROOT}/frog/{fname}.wav", wave, samplerate=config.SAMPLE_RATE)                          
    !rm -rf ESC-50

!ls {config.NOISE_ROOT}/*

In [None]:
def arrange_wave_length(waveform):
    effective_length = config.PERIOD * config.SAMPLE_RATE
    input_length = waveform.shape[1]
    if input_length > effective_length:
        _waveform = waveform[:, :effective_length]
    elif input_length < effective_length:
        pad = torch.zeros((1, effective_length - input_length))
        _waveform = torch.hstack([waveform, pad])
    else:
        _waveform = waveform
    return _waveform

class BirdCLEFTrainDataset(torch.utils.data.Dataset):
    def __init__(self, fnames, labels, mode):
        self.fnames = fnames
        self.labels = labels
        self.mode = mode

        # https://github.com/iver56/audiomentations
        self.augment = Compose([
            AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
            AddGaussianSNR(min_SNR=0.001, max_SNR=1.0, p=0.5),
            AddBackgroundNoise(sounds_path=f"{config.NOISE_ROOT}/simple", min_snr_in_db=config.MIN_SNR_IN_DB, max_snr_in_db=config.MAX_SNR_IN_DB, p=0.5),
            AddBackgroundNoise(sounds_path=f"{config.NOISE_ROOT}/airplane", min_snr_in_db=config.MIN_SNR_IN_DB, max_snr_in_db=config.MAX_SNR_IN_DB, p=0.5),
            AddBackgroundNoise(sounds_path=f"{config.NOISE_ROOT}/rain", min_snr_in_db=config.MIN_SNR_IN_DB, max_snr_in_db=config.MAX_SNR_IN_DB, p=0.5),
            #AddBackgroundNoise(sounds_path=f"{config.NOISE_ROOT}/wind", min_snr_in_db=config.MIN_SNR_IN_DB, max_snr_in_db=config.MAX_SNR_IN_DB, p=0.5),
            #AddBackgroundNoise(sounds_path=f"{config.NOISE_ROOT}/insects", min_snr_in_db=config.MIN_SNR_IN_DB, max_snr_in_db=config.MAX_SNR_IN_DB, p=0.5),
            #AddBackgroundNoise(sounds_path=f"{config.NOISE_ROOT}/engine", min_snr_in_db=config.MIN_SNR_IN_DB, max_snr_in_db=config.MAX_SNR_IN_DB, p=0.5),
            #AddBackgroundNoise(sounds_path=f"{config.NOISE_ROOT}/crickets", min_snr_in_db=config.MIN_SNR_IN_DB, max_snr_in_db=config.MAX_SNR_IN_DB, p=0.5),
            #AddBackgroundNoise(sounds_path=f"{config.NOISE_ROOT}/frog", min_snr_in_db=config.MIN_SNR_IN_DB, max_snr_in_db=config.MAX_SNR_IN_DB, p=0.5),
            #AddBackgroundNoise(sounds_path=f"{config.NOISE_ROOT}/crackling_fire", min_snr_in_db=config.MIN_SNR_IN_DB, max_snr_in_db=config.MAX_SNR_IN_DB, p=0.5),
            #AddBackgroundNoise(sounds_path=f"{config.NOISE_ROOT}/water_drops", min_snr_in_db=config.MIN_SNR_IN_DB, max_snr_in_db=config.MAX_SNR_IN_DB, p=0.5),
            #Gain(min_gain_in_db=-12, max_gain_in_db=12, p=0.5),
            #AddShortNoises(config.NOISE_ROOT, p=0.5),
        ])

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        fname = self.fnames[idx]
        label = self.labels[idx]

        audio_path = f"{config.INPUT_ROOT}/{label}/{fname}"

        waveform, sample_rate = torchaudio.load(audio_path)
        waveform = arrange_wave_length(waveform)
        waveform = waveform.numpy()[0]
        if self.mode == "train":
            waveform = self.augment(waveform, sample_rate=config.SAMPLE_RATE)
        
        label_ohe = torch.eye(n_labels)[label_dic[label]]
        if self.mode == "train":
            label_ohe = label_ohe * (1 - config.LABEL_SMOOTHING) + (config.LABEL_SMOOTHING/n_labels)
        
        return waveform, label_ohe


class TestDataset(torch.utils.data.Dataset):
    def __init__(self, all_audios):
        self.all_audios = all_audios
        
    def __len__(self):
        return len(self.all_audios)
    
    def __getitem__(self, idx):
        audio_path = self.all_audios[idx]
        audio_id, site, _ = audio_path.name.split("_")
        clip, sample_rate = torchaudio.load(audio_path)

        clips, row_ids = [], []
        for tail_s in range(5, 605, 5):
            head_s = tail_s - 5
            _clip = clip[:, head_s*config.SAMPLE_RATE:tail_s*config.SAMPLE_RATE]
            clips.append(_clip)
            row_ids.append(f"{audio_id}_{site}_{tail_s}")
            
        clips = torch.cat(clips, dim=0)
        return clips, row_ids

In [None]:
def birdclef_criterion(outputs, targets):
    clipwise_output = outputs["clipwise_output"]
    segmentwise_output, _ = outputs["segmentwise_output"].max(2)
    loss1 = nn.BCEWithLogitsLoss(reduction="mean")(clipwise_output, targets)
    loss2 = nn.BCEWithLogitsLoss(reduction="mean")(segmentwise_output, targets)
    loss = loss1 + loss2
    return loss

In [None]:
MODEL_HEADER_INFO = {
    "resnet18": (-2, 512)
}

def interpolate_and_padding(x, frames_num):  # x: (batch, class_num, time)
    ratio = frames_num // x.shape[2]
    x = x.transpose(1, 2)  # (batch, time, class_num)
    
    # interpolate
    (batch_size, time_steps, classes_num) = x.shape
    upsampled = x[:, :, None, :].repeat(1, 1, ratio, 1)
    upsampled = upsampled.reshape(batch_size, time_steps * ratio, classes_num)

    # padding
    output = F.interpolate(
        upsampled.unsqueeze(1),
        size=(frames_num, upsampled.size(2)),
        align_corners=True,
        mode="bilinear").squeeze(1)
    
    output = output.transpose(1, 2) # (batch, class_num, time)
    
    return output

class BirdCLEFNet(nn.Module):
    def __init__(self, model_name):
        super(BirdCLEFNet, self).__init__()
        self.model_name = model_name
        self.n_label = (n_labels)
        
        self.mel_spectrogram_extractor = MelSpectrogram(
            sample_rate=config.SAMPLE_RATE,
            n_fft=config.N_FFT,
            f_min=config.FMIN, 
            f_max=config.FMAX,
            n_mels=config.SPEC_HEIGHT,
            hop_length=config.HOP_LENGTH,
        )
        self.amplitude_to_db = AmplitudeToDB()
        self.spec_augment_freq = DropStripes(dim=2, drop_width=30, stripes_num=1)
        #self.spec_augment_time = DropStripes(dim=3, drop_width=30, stripes_num=1)

        base_model = timm.create_model(model_name, pretrained=True, in_chans=1)
        h_idx, n_dense = MODEL_HEADER_INFO[model_name]        
        self.model_head = nn.Sequential(*list(base_model.children())[:h_idx])
                
        self.fc_a = nn.Conv1d(n_dense, self.n_label, 1)
        self.fc_b = nn.Conv1d(n_dense, self.n_label, 1)

    def forward(self, x):  # input x: (batch, Hz, time)
        h = x.unsqueeze(1)  # (batch, channel, Hz, time)
        h = self.mel_spectrogram_extractor(h)  # (batch, channel, Hz, time)
        h = self.amplitude_to_db(h)

        if self.training:
            #h = self.spec_augment_freq(h)
            #h = self.spec_augment_time(h)
            pass

        frames_num = h.shape[3]
        h = self.model_head(h)  # (batch, unit, Hz, time)        
        h = F.relu(h)
        time_pool = torch.mean(h, dim=2)  # (batch, unit, time)

        xa = self.fc_a(time_pool)  # (batch, n_class, time)
        xb = self.fc_b(time_pool)  # (batch, n_class, time)
        xb = torch.softmax(xb, dim=2)

        # time pool
        clipwise_output = torch.sum(xa * xb, dim=2)
        segmentwise_output = interpolate_and_padding(xa, frames_num)

        return {
            "clipwise_output": clipwise_output,
            "segmentwise_output": segmentwise_output,
        }

In [None]:
train_metadata_df = pd.read_csv(f"{config.INPUT_ROOT}/train_metadata.csv")
train_soundscape_labels_df = pd.read_csv(f"{config.INPUT_ROOT}/train_soundscape_labels.csv")

test_audios = list(Path(f"{config.INPUT_ROOT}/train_soundscapes/").glob("*.ogg"))
test_dset = TestDataset(test_audios)

exist_labels = os.listdir(f"{config.INPUT_ROOT}")
print("original data:", len(train_metadata_df))
train_metadata_df = train_metadata_df.query(f"primary_label in {exist_labels}").reset_index(drop=True)
print("use data:", len(train_metadata_df))

filenames = train_metadata_df["filename"]
primary_labels = train_metadata_df["primary_label"]
label_dic = {v:i for i, v in enumerate(primary_labels.unique())}
label_dic_inv = {i:v for i, v in enumerate(primary_labels.unique())}
n_labels = len(label_dic)

print("### labels ###")
print(label_dic)
print(label_dic_inv)

In [None]:
def train_loop(train_data_loader, model, optimizer, scheduler):
    losses, lrs = [], []
    model.train()
    optimizer.zero_grad()
    for n_iter, (X, y) in tqdm_notebook(enumerate(train_data_loader), total=len(train_data_loader)):
        X, y = X.to(device), y.to(device)
        outputs = model(X)
        loss = birdclef_criterion(outputs, y)
        loss.backward()
        
        if n_iter % config.N_ACCUMULATE == 0:
            optimizer.step()
            optimizer.zero_grad()
        
        if scheduler is not None:
            scheduler.step()

        lrs.append(np.array([param_group["lr"] for param_group in optimizer.param_groups]).mean())
        losses.append(loss.item())
        
    return losses, lrs

In [None]:
def valid_loop(valid_data_loader, model):
    losses = []
    predicts = []
    model.eval()
    for n_iter, (X, y) in tqdm_notebook(enumerate(valid_data_loader), total=len(valid_data_loader)):
        X, y = X.to(device), y.to(device)
        with torch.no_grad():
            outputs = model(X)
        loss = birdclef_criterion(outputs, y)
        losses.append(loss.item())
        _pred = outputs["clipwise_output"]
        #_pred, _ = outputs["segmentwise_output"].max(2)
        predicts.append(_pred)
    valid_predicts = torch.cat(predicts, dim=0)
    return losses, valid_predicts

In [None]:
def test_loop(test_dset, model):
    res_dfs = []
    model.eval()
    for wave, row_ids in tqdm_notebook(test_dset):
        X = wave.to(device)        
        with torch.no_grad():
            outputs = model(X)
        _pred = outputs["clipwise_output"].sigmoid().cpu()
        #_pred, _ = outputs["segmentwise_output"].sigmoid().cpu().max(2)
        predict_labels = output_to_label(_pred, config.THRESHOLD) 
        res_df = pd.DataFrame(zip(row_ids, predict_labels), columns=["row_id", "birds"])
        res_dfs.append(res_df)
    submission_df = pd.concat(res_dfs, axis=0)

    rows = []
    for row_id in train_soundscape_labels_df["row_id"]:
        row = submission_df.query(f"row_id=='{row_id}'")
        rows.append(row)
    submission_df = pd.concat(rows).reset_index(drop=True)

    y_true = train_soundscape_labels_df["birds"].tolist()
    y_pred = submission_df["birds"].tolist()
    local_score = row_wise_micro_averaged_f1_score(y_true, y_pred)

    return local_score

In [None]:
def output_to_label(clipwise_output, thr):
    lst = []
    for pred in clipwise_output:
        pred_labs = [label_dic_inv[i] for i, v in enumerate(pred) if v > thr]
        if len(pred_labs) == 0:
            pred_labs = "nocall"
        else:
            pred_labs = " ".join(pred_labs)
        lst.append(pred_labs)
    return lst

In [None]:
def calc_mAP(valid_primary_labels, valid_predicts):
    targets = np.array([label_dic[i] for i in valid_primary_labels])
    precisions = []
    for lab_i in range(n_labels):
        y_true = (targets == lab_i).astype(int)
        y_pred = (valid_predicts[:, lab_i] > config.THRESHOLD).numpy().astype(int)
        precision = precision_score(y_true, y_pred)
        precisions.append(precision)
    mAP = np.array(precisions).mean()
    return mAP

In [None]:
#%debug
skf = StratifiedKFold(n_splits=5,  shuffle=True, random_state=config.SEED)
for fold, (train_index, valid_index) in enumerate(skf.split(filenames, primary_labels)):
    if fold in [0, 1, 2]:
        continue
    print(f"### Fold-{fold} ###")
    set_seed(config.SEED)
    outdir = f"{config.OUTPUT_ROOT}/exp{config.EXP_NUM}_{config.EXP_NAME}"
    os.makedirs(outdir, exist_ok=True)

    # データセットの準備
    train_primary_labels = primary_labels.loc[train_index].values
    valid_primary_labels = primary_labels.loc[valid_index].values
    train_filenames = filenames.loc[train_index].values 
    valid_filenames = filenames.loc[valid_index].values
    train_dset = BirdCLEFTrainDataset(train_filenames, train_primary_labels, "train")
    train_data_loader = torch.utils.data.DataLoader(train_dset, batch_size=config.BATCH_SIZE, shuffle=True)
    valid_dset = BirdCLEFTrainDataset(valid_filenames, valid_primary_labels, "valid")
    valid_data_loader = torch.utils.data.DataLoader(valid_dset, batch_size=config.BATCH_SIZE, shuffle=False)
    
    # モデル関係
    model = BirdCLEFNet(config.MODEL_NAME)
    model.to(device)
    optimizer = Adam(model.parameters(), lr=config.LEARNING_RATE)
    scheduler = CosineAnnealingLR(optimizer, T_max=len(train_data_loader)*config.T_MAX, eta_min=0.0)

    # 学習ログのwatch
    uniqe_exp_name = f"exp{config.EXP_NUM}_freq{config.LABEL_FREQ}_f{fold}_{config.EXP_NAME}"
    wandb.init(project='toda_exp', entity='birdclef', name=uniqe_exp_name)
    wandb_config = wandb.config
    wandb_config.fold = fold
    for k, v in dict(vars(config)).items():
        if k[:2] == "__":
            continue
        wandb_config[k] = v
    #wandb.watch(model)
    
    best_f1, best_mAP = 0, 0
    for epoch in range(config.NUM_EPOCHS):
        print(f"[{epoch} epoch]")
        train_losses, lrs = train_loop(train_data_loader, model, optimizer, scheduler)
        valid_losses, valid_predicts = valid_loop(valid_data_loader, model)
        f1_train_soundscape = test_loop(test_dset, model)

        valid_predicts = valid_predicts.sigmoid().cpu()
    

        predict_labels = output_to_label(valid_predicts, config.THRESHOLD)
        epoch_f1 = row_wise_micro_averaged_f1_score(valid_primary_labels, predict_labels)
        epoch_mAP = calc_mAP(valid_primary_labels, valid_predicts)

        if best_f1 < epoch_f1:
            best_f1 = epoch_f1
            torch.save(model.state_dict(), f"{outdir}/birdclefnet_f{fold}_f1_best_model.bin")
        if best_mAP < epoch_mAP:
            best_mAP = epoch_mAP
            torch.save(model.state_dict(), f"{outdir}/birdclefnet_f{fold}_mAP_best_model.bin")

        res_d = dict()
        res_d["t_loss"] = np.array(train_losses).mean()
        res_d["v_loss"] = np.array(valid_losses).mean()
        res_d["lr_avg"] = np.array(lrs).mean()
        res_d["epoch_f1"] = epoch_f1
        res_d["best_f1"] = best_f1
        res_d["epoch_mAP"] = epoch_mAP
        res_d["best_mAP"] = best_mAP
        res_d["f1_train_soundscape"] = f1_train_soundscape

        wandb.log(res_d)
        torch.save(model.state_dict(), f"{outdir}/birdclefnet_f{fold}_last_model.bin")

    wandb.finish()
    # break  # only Fold-0

In [None]:
!ls

In [None]:
train_losses, lrs = train_loop(train_data_loader, model, optimizer, scheduler)