In [None]:
!nvidia-smi

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install -q kaggle
!mkdir -p .kaggle
!cp "./drive/My Drive/Study/config/kaggle.json" .kaggle/
!chmod 600 .kaggle/kaggle.json
!mv .kaggle /root

#!kaggle datasets download "birdcall-spectrogram-images"
#!unzip birdcall-spectrogram-images.zip > /dev/null
#!rm birdcall-spectrogram-images.zip

#!kaggle datasets download "birdcall-spectrogram-images-cut"
#!unzip birdcall-spectrogram-images-cut.zip > /dev/null
#!rm -rf birdcall-spectrogram-images-cut.zip

In [None]:
!kaggle datasets download "birdcall-spectrogram-images-cut-multi"
!unzip birdcall-spectrogram-images-cut-multi.zip > /dev/null
!rm -rf birdcall-spectrogram-images-cut-multi.zip

from PIL import Image
from pathlib import Path

for directory in Path("train_img_2").iterdir():
    if directory.name == ".DS_Store":
        continue
    file_paths = [f for f in directory.iterdir() if f.name != ".DS_Store"]
    for path in file_paths:
        try:
            with open(path, 'rb') as f: img = Image.open(f)
        except:
            print(path)
            !rm {path}

In [None]:
import numpy as np
import pandas as pd
import os
import tqdm
import random
import time

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.optim import Adam, AdamW
from torchvision.models import resnet18
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import CosineAnnealingLR

import matplotlib.pyplot as plt

from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold

from contextlib import contextmanager
from typing import Optional
import logging
from numpy.random import beta

device = torch.device('cuda')

In [None]:
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = True  # type: ignore

class config:
    SEED = 416
    N_FOLDS = 5
    FOLD = 0
    PRETRAINED = True
    #TRAIN_INPUT =  "./train_jpg/"
    #VALID_INPUT =  "./train_jpg/"
    TRAIN_INPUT = "./train_img_2"
    VALID_INPUT = "./train_img_2"
    OUTPUT = "./drive/My Drive/Study/Bird/output/from_resnet18_28"
    N_LABEL = 264
    
    TRAIN_BS = 256
    VALID_BS = 256
    TRAIN_WORKS = 0
    VALID_WORKS = 0
    
    DROPOUT_RATE = 0.2
    N_UNIT = 512
    EPOCHS = 55
    LR = 1e-3
    ALPHA = 0.2
    T_MAX = 10

!mkdir -p "{config.OUTPUT}"

In [None]:
"""train_df = pd.read_csv("./drive/My Drive/Study/Bird/birdsong-recognition/train.csv")
train_df = train_df[["filename", "rating"]]

_transform = transforms.Compose([])
_datasets = datasets.ImageFolder(root=config.VALID_INPUT, transform=_transform)
sound_files = [s[0].split("/")[-1].split(".")[0] for s in _datasets.samples]
rating_lst = [train_df.query(f"filename=='{s}.mp3'")["rating"].iloc[0] for s in tqdm.notebook.tqdm(sound_files)]"""

In [None]:
class FreqMask:
    def __init__(self, F=30, num_masks=1, replace_with_zero=True):
        self.F = F
        self.num_masks = num_masks
        self.replace_with_zero = replace_with_zero

    def __call__(self, spec):
        cloned = spec.clone()
        num_mel_channels = cloned.shape[1]
    
        for i in range(0, self.num_masks):        
            f = random.randrange(0, self.F)
            f_zero = random.randrange(0, num_mel_channels - f)

            # avoids randrange error if values are equal and range is empty
            if (f_zero == f_zero + f): return cloned

            mask_end = random.randrange(f_zero, f_zero + f) 
            if (self.replace_with_zero): cloned[:, f_zero:mask_end] = 0
            else: cloned[:, f_zero:mask_end] = cloned.mean()
    
        return cloned

def get_dataloder():
    train_transform = transforms.Compose([
        transforms.RandomCrop((128, 313), pad_if_needed=True, padding_mode="constant"),
        transforms.RandomApply([
            transforms.Lambda(lambda img: transforms.functional.adjust_gamma(img, gamma=2, gain=1)),
        ], p=0.5),
        transforms.ToTensor(),
        #transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        transforms.RandomApply([
            FreqMask(F=30, num_masks=1, replace_with_zero=False),
        ], p=0.5), 
    ])
    valid_transform = transforms.Compose([
        transforms.CenterCrop((128, 313)),
        transforms.ToTensor(),
        #transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ])

    train_datasets = datasets.ImageFolder(root=config.TRAIN_INPUT, transform=train_transform)
    valid_datasets = datasets.ImageFolder(root=config.VALID_INPUT, transform=valid_transform)

    #new_targets = [(t, rating_lst[i]) for i, t in enumerate(valid_datasets.targets)]
    #path_lst = [s[0] for s in valid_datasets.samples]
    #train_datasets.samples = [(p, t) for p, t in zip(path_lst, new_targets)]
    #valid_datasets.samples = [(p, t) for p, t in zip(path_lst, new_targets)]

    skf = StratifiedKFold(n_splits=config.N_FOLDS, shuffle=True, random_state=config.SEED)
    _t = train_datasets.targets
    trn_idx, val_idx = [(trn_idx, val_idx) for trn_idx, val_idx in skf.split(_t, _t)][config.FOLD]

    train_datasets = torch.utils.data.Subset(train_datasets, trn_idx)
    valid_datasets = torch.utils.data.Subset(valid_datasets, val_idx)

    train_data_loader = torch.utils.data.DataLoader(train_datasets, batch_size=config.TRAIN_BS, shuffle=True, num_workers=config.TRAIN_WORKS)
    valid_data_loader = torch.utils.data.DataLoader(valid_datasets, batch_size=config.VALID_BS, shuffle=False, num_workers=config.VALID_WORKS)
    
    return train_data_loader, valid_data_loader

data_loader, _ = get_dataloder()
#_, data_loader = get_dataloder()
for d in data_loader:
    break
img = d[0][0]
plt.imshow(np.rollaxis(img.numpy(), 0, 3))

In [None]:
def loss_fn(output, target):
    #loss = nn.BCEWithLogitsLoss(reduce=False)(output, target)
    #loss = loss.mean(1)
    #w = y_weight * 0.2
    #loss = loss * w
    #return loss.mean()
    loss = nn.BCEWithLogitsLoss()(output, target)
    return loss

def mixup(input, target, gamma):
    # target is onehot format!
    perm = torch.randperm(input.size(0))
    perm_input = input[perm]
    perm_target = target[perm]
    return input.mul_(gamma).add_(1 - gamma, perm_input), target.mul_(gamma).add_(1 - gamma, perm_target)

def rand_bbox(size, lam):
    #W = size[3]
    #H = size[2]
    #cut_rat = np.sqrt(1. - lam)
    #cut_h = np.int(H * cut_rat)

    # uniform
    #cy = np.random.randint(H)

    #bby1 = np.clip(cy - cut_h // 2, 0, H)
    #bby2 = np.clip(cy + cut_h // 2, 0, H)
    bby1, bby2 = 0, 313//2
    bbx1, bbx2 = 0, 128

    return bbx1, bby1, bbx2, bby2

def cutmix(input, target, gamma):
    perm = torch.randperm(input.size(0))
    perm_input = input[perm]
    perm_target = target[perm]
    
    bbx1, bby1, bbx2, bby2 = rand_bbox(input.size(), gamma)
    input[:, :, bbx1:bbx2, bby1:bby2] = input[perm, :, bbx1:bbx2, bby1:bby2]
    lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (input.size()[-1] * input.size()[-2]))

    return input, target.add_(perm_target) # target.mul_(lam).add_(1 - lam, perm_target)


def last_layer_mixup(target, gamma):
    perm = torch.randperm(target.size(0))
    perm_target = target[perm]
    return perm, target.mul_(gamma).add_(1 - gamma, perm_target)


class BirdcallNet(nn.Module):
    def __init__(self):
        super(BirdcallNet, self).__init__()
        resnet = resnet18(pretrained=config.PRETRAINED)
        self.resnet_head = list(resnet.children())
        self.resnet_head = nn.Sequential(*self.resnet_head[:-2])
        self.pool = nn.AdaptiveAvgPool2d((1, 1))

        self.dropout = nn.Dropout(p=config.DROPOUT_RATE)
        #self.fc = nn.Linear(config.N_UNIT, config.N_LABEL)
        self.fc = nn.Sequential(
            nn.Linear(config.N_UNIT, config.N_UNIT), nn.ReLU(), nn.Dropout(p=config.DROPOUT_RATE),
            #nn.Linear(config.N_UNIT, config.N_UNIT), nn.ReLU(), nn.Dropout(p=config.DROPOUT_RATE),
            nn.Linear(config.N_UNIT, config.N_LABEL))

    def forward(self, x, perm=None, gamma=None):
        h = self.resnet_head(x)
        if perm is not None:
            h = gamma * h + (1 - gamma) * h[perm]
        h = self.pool(h)
        h = h.view(-1, config.N_UNIT)
        h = self.dropout(h)
        logits = self.fc(h)
        return logits

In [None]:
def train_fn(train_data_loader, model, optimizer, scheduler=None):
    losses, lrs = [], []
    model.train()
    t = tqdm.notebook.tqdm(train_data_loader, total=len(train_data_loader))
    for (X, y) in t:
    
        y_true = torch.eye(config.N_LABEL)[y]
        #y_true = torch.eye(config.N_LABEL)[y[0]]
        #y_weight = y[1]

        #_y = y_true
        #y_pred = model(X.to(device))

        gamma = beta(config.ALPHA, config.ALPHA)

        _X, _y = mixup(X, y_true, gamma)

        #if np.random.random() >= 0.5:
        #    _X, _y = mixup(X, y_true, gamma)
        #else:
        #    _X, _y = X, y_true
        #_X, _y = cutmix(X, y_true, gamma)
        
        y_pred = model(_X.to(device))

        #perm, _y = last_layer_mixup(y_true, gamma)
        #y_pred = model(X.to(device), perm, gamma)

        #loss = loss_fn(y_pred,  _y.to(device), y_weight.to(device))
        loss = loss_fn(y_pred,  _y.to(device))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if scheduler is not None:
            scheduler.step()

        losses.append(loss.item())
        lrs.append(np.array([param_group["lr"] for param_group in optimizer.param_groups]).mean())
    
    return sum(losses)/len(losses), lrs


def get_single_label_from_multi_predict(y, y_pred, threshould):
    lst = []
    for idx in range(len(y_pred)):
        p = y_pred[idx]
        if sum(p >= threshould) < 2:
            _p = p.argmax().numpy()
        else:
            _p = np.where(p >= threshould)
            _p = _p[0]
            if y[idx].numpy() in _p:
                _p = y[idx].numpy()
            else:
                _p = p.argmax().numpy()
        lst.append(_p)
    return np.array(lst)
        
def valid_fn(valid_data_loader, model, threshould=0.5):
    losses, f1_lst_a, f1_lst_b = [], [], []
    model.eval()
    t = tqdm.notebook.tqdm(valid_data_loader, total=len(valid_data_loader))
    for (X, y) in t:

        lst = []
        with torch.no_grad():
            y_pred = model(X.to(device), None, None)
            #for idx in range(6):
            #    _y_pred = model(X[:,:,:,idx*52:(idx+1)*52].to(device), None, None)
            #    lst.append(_y_pred)
        #y_pred = sum(lst)

        _y = torch.eye(config.N_LABEL)[y]
        #_y = torch.eye(config.N_LABEL)[y[0]]

        loss = loss_fn(y_pred,  _y.to(device))
        #loss = nn.BCEWithLogitsLoss()(y_pred,  _y.to(device))
        losses.append(loss.item())

        y_pred_a = get_single_label_from_multi_predict(y, y_pred.sigmoid().cpu(), threshould)
        y_pred_b = y_pred.argmax(1).cpu()

        f1_a = f1_score(y, y_pred_a, average="micro")
        f1_b = f1_score(y, y_pred_b, average="micro")
        f1_lst_a.append(f1_a)
        f1_lst_b.append(f1_b)

    return sum(f1_lst_a)/len(f1_lst_a), sum(f1_lst_b)/len(f1_lst_b), sum(losses)/len(losses)

In [None]:
print(f"### Fold-{config.FOLD} ###")

set_seed(config.SEED+config.FOLD)

train_data_loader, valid_data_loader = get_dataloder()

model = BirdcallNet()
model.to(device)

optimizer = Adam(model.parameters(), lr=config.LR)
scheduler = CosineAnnealingLR(optimizer, T_max=len(train_data_loader)*config.T_MAX, eta_min=0.0)

best_loss, best_score_a, best_score_b = 9999, 0, 0
trn_losses, trn_lrs, val_losses, val_scores_a, val_scores_b = [], [], [], [], []
for epoch in range(config.EPOCHS):
    print(f"{epoch} epoch")
    tloss, lrs = train_fn(train_data_loader, model, optimizer, scheduler)
    val_f1_a, val_f1_b, vloss = valid_fn(valid_data_loader, model)

    # save best score model
    if best_score_a <= val_f1_a:
        best_score_a = val_f1_a
        torch.save(model.state_dict(), f"{config.OUTPUT}/birdcallnet_f{config.FOLD}_best_score_a.bin")
        print(f"Best Score A Update!!! -> {best_score_a}")

    # save best score model
    if best_score_b <= val_f1_b:
        best_score_b = val_f1_b
        torch.save(model.state_dict(), f"{config.OUTPUT}/birdcallnet_f{config.FOLD}_best_score_b.bin")
        print(f"Best Score B Update!!! -> {best_score_b}")

    # save best loss model
    if best_loss >= vloss:
        best_loss = vloss
        torch.save(model.state_dict(), f"{config.OUTPUT}/birdcallnet_f{config.FOLD}_best_loss.bin")
        print(f"Best Loss Update!!! -> {best_loss}")

    torch.save(model.state_dict(), f"{config.OUTPUT}/birdcallnet_f{config.FOLD}_latest_model.bin")
    torch.save(optimizer.state_dict(), f"{config.OUTPUT}/birdcallnet_f{config.FOLD}_latest_optimizer.bin")
    torch.save(scheduler.state_dict(), f"{config.OUTPUT}/birdcallnet_f{config.FOLD}_latest_scheduler.bin")

    # save training logs
    trn_losses.append(tloss)
    val_losses.append(vloss)
    val_scores_a.append(val_f1_a)
    val_scores_b.append(val_f1_b)
    trn_lrs.extend(lrs)
    log_df = pd.DataFrame(zip(trn_losses, val_losses, val_scores_a, val_scores_b), columns=["train loss", "valid loss", "score_a", "score_b"])
    log_df.to_csv(f"{config.OUTPUT}/valid_f1_fold{config.FOLD}_score.csv", index=True)

In [None]:
print(f"Best Score　A: {best_score_a} / Best Score　B: {best_score_b} / Best Loss: {best_loss}")
plt.plot(trn_lrs); plt.show()
plt.plot(val_scores_a); plt.show()
plt.plot(val_scores_b); plt.show()
plt.plot(trn_losses)
plt.plot(val_losses)
plt.show()