In [None]:
!nvidia-smi

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp "./drive/My Drive/Study/config/kaggle.json" ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download "birdcall-spectrogram-images"
!unzip birdcall-spectrogram-images.zip > /dev/null
!rm birdcall-spectrogram-images.zip

In [None]:
import cv2
import numpy as np
import pandas as pd
import os
import tqdm
import random
import time

import torch
import torch.nn as nn
from torch.optim import Adam, AdamW
from torchvision.models import resnet18
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau

import matplotlib.pyplot as plt

from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold

from contextlib import contextmanager
from typing import Optional
import logging
from numpy.random import beta

device = torch.device('cuda')

In [None]:
class config:
    N_LABEL = 264
    DROPOUT_RATE = 0.2
    N_UNIT = 512
    PRETRAINED = False
    N_FOLDS = 5
    SEED = 416

In [None]:
def mono_to_color(X, mean=None, std=None, norm_max=None, norm_min=None, eps=1e-6):
    # Stack X as [X,X,X]
    X = np.stack([X, X, X], axis=-1)

    # Standardize
    mean = mean or X.mean()
    X = X - mean
    std = std or X.std()
    Xstd = X / (std + eps)
    _min, _max = Xstd.min(), Xstd.max()
    norm_max = norm_max or _max
    norm_min = norm_min or _min
    if (_max - _min) > eps:
        # Normalize to [0, 255]
        V = Xstd
        V[V < norm_min] = norm_min
        V[V > norm_max] = norm_max
        V = 255 * (V - norm_min) / (norm_max - norm_min)
        V = V.astype(np.uint8)
    else:
        # Just zero
        V = np.zeros_like(Xstd, dtype=np.uint8)
    return V

In [None]:
class BirdcallNet(nn.Module):
    def __init__(self):
        super(BirdcallNet, self).__init__()
        
        self.resnet = resnet18(pretrained=config.PRETRAINED)
        self.resnet_head = list(self.resnet.children())
        self.resnet_head = nn.Sequential(*self.resnet_head[:-1])
        
        self.dropout = nn.Dropout(p=config.DROPOUT_RATE)
        self.fc = nn.Linear(config.N_UNIT, config.N_LABEL)

    def forward(self, x):
        h = self.resnet_head(x)
        h = self.dropout(h.view(-1, config.N_UNIT))
        logits = self.fc(h)
        return logits

In [None]:
"""train = pd.read_csv("./drive/My Drive/Study/Bird/birdsong-recognition/train.csv")

train["multi_label"] = train.apply(lambda x: [x["primary_label"]] + eval(x["secondary_labels"]) ,axis=1)

primary_label2ebird_code = {
    df["primary_label"].unique()[0]: ebird_code 
    for ebird_code, df in train[["ebird_code", "primary_label"]].groupby("ebird_code")
}

lst = []
for multi_label in train["multi_label"]:
    _lst = []
    for lab in multi_label:
        try:
            code = primary_label2ebird_code[lab]
        except KeyError:
            continue
        _lst.append(code)
    lst.append(_lst)
train["multi_ebird_code"] = lst
train[["multi_ebird_code"]].sample(4)

model = BirdcallNet()
model.load_state_dict(torch.load(f"./drive/My Drive/Study/Bird/output/from_resnet18_10/birdcallnet_f0_best_loss.bin", map_location=torch.device('cpu')))
model.eval()
model.to(device)

train_transform = transforms.Compose([transforms.ToTensor(),])
train_datasets = datasets.ImageFolder(root="./train_jpg/", transform=train_transform)

skf = StratifiedKFold(n_splits=config.N_FOLDS, shuffle=True, random_state=config.SEED)

_t = train_datasets.targets
val_idx = [val_idx for _, val_idx in skf.split(_t, _t)]

window = 313

for fi, (img, y) in tqdm.notebook.tqdm(enumerate(train_datasets), total=len(train_datasets)):

    label = train_datasets.classes[y]
    multi_labels = train["multi_ebird_code"].iloc[fi]

    if img.shape[2] < window:
        pad = torch.zeros((3, 128, window - img.shape[2]))
        img = torch.cat([img, pad], dim=2)

    lst1, lst2 = [], []
    for wi in range(img.shape[2]//window):
        _img = img[:,:,wi*window:wi*window+window]
        with torch.no_grad():
            pred = model(_img.unsqueeze(0).to(device))
        lst1.append(pred)
        _p = sum(pred.sigmoid()[0] > 0.5)
        lst2.append(_p)

    multi_target_flag = False
    multi_targets = []
    if max(lst2) > 1 and len(multi_labels) > 1:
        for p1, p2 in zip(lst1, lst2):
            if p2 < 2:
                continue
            _, pred_idx = np.where(p1.cpu() >= 0.5)
            multi_target = [train_datasets.classes[i] for i in pred_idx if train_datasets.classes[i] in multi_labels]
            if len(multi_target) > 1 and label in multi_target:
                multi_target_flag = True
                multi_targets.append(multi_target)
                break

    if multi_target_flag:
        if len(multi_targets) > 1:
            print(2222)
            break
        print(111)"""

In [None]:
models = []
for fold in range(config.N_FOLDS):
    _m = BirdcallNet()
    _m.load_state_dict(torch.load(f"./drive/My Drive/Study/Bird/output/from_resnet18_12/birdcallnet_f{fold}_best_loss.bin", map_location=torch.device('cpu')))
    _m.to(device)
    _m.eval()
    models.append(_m)

In [None]:
OUTPUT = "./drive/My Drive/Study/Bird/input/cut_image_from_resnet18_12_nocall"
!mkdir -p "{OUTPUT}"

In [None]:
"""lst = []
for d in os.listdir(OUTPUT):
    for dd in os.listdir(f"{OUTPUT}/{d}"):
        lst.append(dd)

wav_lst = [l.split("_")[0] for l in lst]

train_transform = transforms.Compose([transforms.ToTensor(),])
train_datasets = datasets.ImageFolder(root="./train_jpg/", transform=train_transform)
for fi, d in enumerate(train_datasets.samples):
    fnames = train_datasets.imgs[fi][0].split("/")[3].replace(".jpg", "")
    if fnames in wav_lst:
        last_idx = fi
last_idx"""

In [None]:
train_transform = transforms.Compose([transforms.ToTensor(),])
train_datasets = datasets.ImageFolder(root="./train_jpg/", transform=train_transform)

skf = StratifiedKFold(n_splits=config.N_FOLDS, shuffle=True, random_state=config.SEED)

_t = train_datasets.targets
val_idx = [val_idx for _, val_idx in skf.split(_t, _t)]

window = 313

label = "0_nocall"
!mkdir -p "{OUTPUT}/{label}"
for fi, (img, y) in tqdm.notebook.tqdm(enumerate(train_datasets), total=len(train_datasets)):

    #if fi <= 12500: # 7313, 9841
    #    continue

    #label = train_datasets.classes[y]
    #!mkdir -p "{OUTPUT}/{label}"

    if img.shape[2]//window <= 2:
        continue

    if img.shape[2] < window:
        pad = torch.zeros((3, 128, window - img.shape[2]))
        img = torch.cat([img, pad], dim=2)
    
    for fold, idxs in enumerate(val_idx):
        if fi in idxs:
            model = models[fold]
            break

    lst = []
    for wi in range(img.shape[2]//window):
        _img = img[:,:,wi*window:wi*window+window]
        with torch.no_grad():
            pred = model(_img.unsqueeze(0).to(device))
        #pred = pred.sigmoid().detach().cpu().numpy()[0][y]
        pred = pred.sigmoid().detach().cpu().numpy()[0].max()
        lst.append(pred)

    #best_wi = np.array(lst).argmax()
    #best_img = img[:,:,best_wi*window:best_wi*window+window]
    #fname = train_datasets.imgs[fi][0].split("/")[3]
    #outpath = f"{OUTPUT}/{label}/{fname}"
    #cv2.imwrite(outpath, mono_to_color(best_img[0]))

    #if max(lst) < 0.5:
    #    continue

    #good_img_idx = np.where(np.array(lst) >= 0.5)[0]
    #for best_wi in good_img_idx:
    #    best_img = img[:,:,best_wi*window:best_wi*window+window]
    #    fname = train_datasets.imgs[fi][0].split("/")[3].replace(".jpg", "") + f"_{best_wi}.jpg"
    #    outpath = f"{OUTPUT}/{label}/{fname}"
    #    cv2.imwrite(outpath, mono_to_color(best_img[0]))

    if min(lst) >= 0.01:
        continue
    bad_img_idx = np.where(np.array(lst) == min(lst))[0][0]
    bad_img = img[:,:,bad_img_idx*window:bad_img_idx*window+window]
    fname = train_datasets.imgs[fi][0].split("/")[3].replace(".jpg", "") + f"_bad.jpg"
    outpath = f"{OUTPUT}/{label}/{fname}"
    cv2.imwrite(outpath, mono_to_color(bad_img[0])) 