In [None]:
!nvidia-smi

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%time

!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp "./drive/My Drive/Study/config/kaggle.json" ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

#!kaggle datasets download "birdcall-spectrogram-images"
#!unzip birdcall-spectrogram-images.zip > /dev/null
#!rm birdcall-spectrogram-images.zip

!kaggle datasets download "birdcall-spectrogram-images-cut"
!unzip birdcall-spectrogram-images-cut.zip > /dev/null
!rm -rf birdcall-spectrogram-images-cut.zip

In [None]:
import numpy as np
import pandas as pd
import os
import tqdm
import random
import time

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.optim import Adam, AdamW
from torchvision.models import resnet18
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau

import matplotlib.pyplot as plt

from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold

from contextlib import contextmanager
from typing import Optional
import logging
from numpy.random import beta

device = torch.device('cuda')

In [None]:
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = True  # type: ignore

class config:
    SEED = 416
    N_FOLDS = 5
    FOLD = 4
    PRETRAINED = True
    #VALID_INPUT =  "./train_jpg/"
    VALID_INPUT = "./cut_image_from_resnet18_08"
    TRAIN_INPUT = "./cut_image_from_resnet18_08"
    OUTPUT = "./drive/My Drive/Study/Bird/output/from_resnet18_12"
    N_LABEL = 264
    
    TRAIN_BS = 256
    VALID_BS = 256
    TRAIN_WORKS = 0
    VALID_WORKS = 0
    
    DROPOUT_RATE = 0.2
    N_UNIT = 512
    EPOCHS = 50
    RESNET_LR = 1e-3
    FC_LR = 1e-3
    WD = 0.01
    ALPHA = 0.2
    T_MAX = 10  

!mkdir -p "{config.OUTPUT}"

In [None]:
class FreqMask:
    def __init__(self, F=30, num_masks=1, replace_with_zero=True):
        self.F = F
        self.num_masks = num_masks
        self.replace_with_zero = replace_with_zero

    def __call__(self, spec):
        cloned = spec.clone()
        num_mel_channels = cloned.shape[1]
    
        for i in range(0, self.num_masks):        
            f = random.randrange(0, self.F)
            f_zero = random.randrange(0, num_mel_channels - f)

            # avoids randrange error if values are equal and range is empty
            if (f_zero == f_zero + f): return cloned

            mask_end = random.randrange(f_zero, f_zero + f) 
            if (self.replace_with_zero): cloned[:, f_zero:mask_end] = 0
            else: cloned[:, f_zero:mask_end] = cloned.mean()
    
        return cloned

def get_dataloder():
    train_transform = transforms.Compose([
        transforms.RandomCrop((128, 313), pad_if_needed=True, padding_mode="constant"),
        transforms.ToTensor(),
        FreqMask(num_masks=2),
    ])
    valid_transform = transforms.Compose([
        transforms.CenterCrop((128, 313)),
        transforms.ToTensor()
    ])
    

    train_datasets = datasets.ImageFolder(root=config.TRAIN_INPUT, transform=train_transform)
    valid_datasets = datasets.ImageFolder(root=config.VALID_INPUT, transform=valid_transform)

    skf = StratifiedKFold(n_splits=config.N_FOLDS, shuffle=True, random_state=config.SEED)

    _t = train_datasets.targets
    trn_idx, val_idx = [(trn_idx, val_idx) for trn_idx, val_idx in skf.split(_t, _t)][config.FOLD]

    train_datasets = torch.utils.data.Subset(train_datasets, trn_idx)
    valid_datasets = torch.utils.data.Subset(valid_datasets, val_idx)

    train_data_loader = torch.utils.data.DataLoader(train_datasets, batch_size=config.TRAIN_BS, shuffle=True, num_workers=config.TRAIN_WORKS)
    valid_data_loader = torch.utils.data.DataLoader(valid_datasets, batch_size=config.VALID_BS, shuffle=False, num_workers=config.VALID_WORKS)
    
    return train_data_loader, valid_data_loader

train_data_loader, _ = get_dataloder()
for d in train_data_loader:
    break
img = d[0][0]
plt.imshow(np.rollaxis(img.numpy(), 0, 3))

In [None]:
def loss_fn(output, target):
    loss = nn.BCEWithLogitsLoss()(output, target)
    return loss


def mixup(input, target, gamma):
    # target is onehot format!
    perm = torch.randperm(input.size(0))
    perm_input = input[perm]
    perm_target = target[perm]
    return input.mul_(gamma).add_(1 - gamma, perm_input), target.mul_(gamma).add_(1 - gamma, perm_target)


class BirdcallNet(nn.Module):
    def __init__(self):
        super(BirdcallNet, self).__init__()
        
        self.resnet = resnet18(pretrained=config.PRETRAINED)
        self.resnet_head = list(self.resnet.children())
        self.resnet_head = nn.Sequential(*self.resnet_head[:-1])
        
        self.dropout = nn.Dropout(p=config.DROPOUT_RATE)
        self.fc = nn.Linear(config.N_UNIT, config.N_LABEL)

    def forward(self, x):
        h = self.resnet_head(x)
        h = self.dropout(h.view(-1, config.N_UNIT))
        logits = self.fc(h)
        return logits

In [None]:
def train_fn(train_data_loader, model, optimizer, scheduler=None):
    losses, lrs = [], []
    model.train()
    t = tqdm.notebook.tqdm(train_data_loader, total=len(train_data_loader))
    for (X, y) in t:
    
        y_true = torch.eye(config.N_LABEL)[y]
        _X, _y = mixup(X, y_true, beta(config.ALPHA, config.ALPHA))
        y_pred = model(_X.to(device))
        loss = loss_fn(y_pred,  _y.to(device))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if scheduler is not None:
            scheduler.step()

        losses.append(loss.item())
        lrs.append(np.array([param_group["lr"] for param_group in optimizer.param_groups]).mean())
    
    return sum(losses)/len(losses), lrs


def get_single_label_from_multi_predict(y, y_pred, threshould):
    lst = []
    for idx in range(len(y_pred)):
        p = y_pred[idx]
        if sum(p >= threshould) < 2:
            _p = p.argmax().numpy()
        else:
            _p = np.where(p >= threshould)
            _p = _p[0]
            if y[idx].numpy() in _p:
                _p = y[idx].numpy()
            else:
                _p = p.argmax().numpy()
        lst.append(_p)
    return np.array(lst)
        
def valid_fn(valid_data_loader, model, threshould=0.5):
    losses, f1_lst = [], []
    model.eval()
    t = tqdm.notebook.tqdm(valid_data_loader, total=len(valid_data_loader))
    for (X, y) in t:

        with torch.no_grad():
            y_pred = model(X.to(device))

        _y = torch.eye(config.N_LABEL)[y]
        loss = loss_fn(y_pred,  _y.to(device))
        losses.append(loss.item())

        y_pred = get_single_label_from_multi_predict(y, y_pred.sigmoid().cpu(), threshould)
        f1 = f1_score(y, y_pred, average="micro")
        f1_lst.append(f1)

    return sum(f1_lst)/len(f1_lst), sum(losses)/len(losses)

In [None]:
print(f"### Fold-{config.FOLD} ###")

set_seed(config.SEED+config.FOLD)

train_data_loader, valid_data_loader = get_dataloder()

model = BirdcallNet()
model.to(device)

optimizer = Adam([{'params': model.resnet.parameters(), 'lr': config.RESNET_LR},
                            {'params': model.fc.parameters(), 'lr': config.FC_LR}])
scheduler = CosineAnnealingLR(optimizer, T_max=len(train_data_loader)*config.T_MAX, eta_min=0.0)

best_loss, best_score = 9999, 0
trn_losses, trn_lrs, val_losses, val_scores = [], [], [], []
for epoch in range(config.EPOCHS):
    print(f"{epoch} epoch")
    tloss, lrs = train_fn(train_data_loader, model, optimizer, scheduler)
    val_f1, vloss = valid_fn(valid_data_loader, model)

    # save best score model
    if best_score <= val_f1:
        best_score = val_f1
        torch.save(model.state_dict(), f"{config.OUTPUT}/birdcallnet_f{config.FOLD}_best_score.bin")
        print(f"Best Score Update!!! -> {best_score}")

    # save best loss model
    if best_loss >= vloss:
        best_loss = vloss
        torch.save(model.state_dict(), f"{config.OUTPUT}/birdcallnet_f{config.FOLD}_best_loss.bin")
        print(f"Best Loss Update!!! -> {best_loss}")

    # save training logs
    trn_losses.append(tloss)
    val_losses.append(vloss)
    val_scores.append(val_f1)
    trn_lrs.extend(lrs)
    log_df = pd.DataFrame(zip(trn_losses, val_losses, val_scores), columns=["train loss", "valid loss", "score"])
    log_df.to_csv(f"{config.OUTPUT}/valid_f1_fold{config.FOLD}_score.csv", index=True)

In [None]:
print(f"Best Score: {best_score} / Best Loss: {best_loss}")
plt.plot(trn_lrs); plt.show()
plt.plot(val_scores); plt.show()
plt.plot(trn_losses)
plt.plot(val_losses)
plt.show()
#display(log_df)

In [None]:
#from sklearn.metrics import confusion_matrix
#import seaborn as sns
#model.load_state_dict(torch.load(f"{config.OUTPUT}/birdcallnet_f{config.FOLD}_best_loss.bin"))
#model.eval()
#lst1, lst2 = [], []
#for X, y in valid_data_loader:
#    with torch.no_grad():
#        y_pred = model(X.to(device))
#    p = get_single_label_from_multi_predict(y, y_pred.cpu(), 0.5)
#    lst1.append(p)
#    lst2.append(y.numpy())
#cm = confusion_matrix(np.hstack(lst2), np.hstack(lst1))
#sns.heatmap(cm, annot=True, cmap='Blues')

In [None]:
#model.load_state_dict(torch.load(f"{config.OUTPUT}/birdcallnet_f{config.FOLD}_best_loss.bin"))
#lst = []
#for _thr in range(9):
#    thr = (_thr+1)*0.1
#    val_f1, _ = valid_fn(valid_data_loader, model, threshould=thr)
#    print(thr, val_f1)
#    lst.append((thr, val_f1))
#display(pd.DataFrame(lst, columns=["threshold", "score"]))
#plt.plot(np.array(lst)[:, 1])