In [None]:
!nvidia-smi

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%%time
# 大体10分くらい

!pip install -q kaggle
!mkdir -p .kaggle
!cp "./drive/My Drive/Study/config/kaggle.json" .kaggle/
!chmod 600 .kaggle/kaggle.json
!mv .kaggle /root

!kaggle datasets download "theoviel/rcfx-spectrograms-32-khz"
!unzip rcfx-spectrograms-32-khz.zip > /dev/null
!rm -rf rcfx-spectrograms-32-khz.zip 

!pip install -U iterative-stratification albumentations wandb  > /dev/null
!wandb login e0792bb688a0d18e359df7438c45da90f8794091

!pip install timm
!pip install imbalanced-learn

In [None]:
import gc
import os
import tqdm
import random
import pickle

from matplotlib import pyplot as plt
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from datetime import datetime

import pandas as pd
import numpy as np
from numpy.random import beta

import torch
from torchvision.models import resnet18, densenet121
import torch.nn as nn
from torch.optim import Adam, AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau, CosineAnnealingWarmRestarts
from torchvision import transforms
from torch.nn import functional as F

import albumentations as A

from sklearn.metrics import classification_report, roc_auc_score
from imblearn.under_sampling import RandomUnderSampler
import wandb
import timm

device = torch.device("cuda")

# Pseudo

In [None]:
EXP = "exp0153_resnet18_focal_mixup_pseudo0.5"
BEST_MODEL = f"./drive/MyDrive/Study/RFCX/output/{EXP}"
DATA_ROOT = "./drive/MyDrive/Study/RFCX/input"

PSEUDO_THR_P = 0.5
PSEUDO_THR_N = 0.01

MODEL_NAME = "resnet18"
N_LABEL = 24
N_SPLIT_IMG = 8
WINDOW = 512
COVER = 49

In [None]:
def mono_to_color(
    X: np.ndarray, mean=None, std=None,
    norm_max=None, norm_min=None, eps=1e-6
):
    # Stack X as [X,X,X]
    X = np.stack([X, X, X], axis=-1)

    # Standardize
    mean = mean or X.mean()
    X = X - mean
    std = std or X.std()
    Xstd = X / (std + eps)
    _min, _max = Xstd.min(), Xstd.max()
    norm_max = norm_max or _max
    norm_min = norm_min or _min
    if (_max - _min) > eps:
        # Normalize to [0, 255]
        V = Xstd
        V[V < norm_min] = norm_min
        V[V > norm_max] = norm_max
        V = 255 * (V - norm_min) / (norm_max - norm_min)
        V = V.astype(np.uint8)
    else:
        # Just zero
        V = np.zeros_like(Xstd, dtype=np.uint8)
    return V

def extract_seq_label(label, value):
    seq_label = np.zeros((N_LABEL, 3751))
    middle = np.ones(N_LABEL) * -1
    for species_id, t_min, f_min, t_max, f_max in label:
        h, t = int(3751*(t_min/60)), int(3751*(t_max/60))
        m = (t + h)//2
        middle[species_id] = m
        seq_label[species_id, h:t] = value
    return seq_label, middle.astype(int)

class SpectrogramFromNpz(torch.utils.data.Dataset):
    def __init__(self, fname, mode):
        self.fname = fname
        self.mode = mode
        self.to_tensor = transforms.ToTensor()
        self.norm = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))

    def __len__(self):
        return len(self.fname)

    def __getitem__(self, idx):
        fname = self.fname[idx]

        # load image        
        _fname = fname.replace("_posi", "").replace("_nega", "")
        path = f"./train/{_fname}.npy"
        mel = np.load(path)
        
        image = mono_to_color(mel)
        image = self.to_tensor(image)
        image = self.norm(image)

        return image

In [None]:
MODEL_HEADER_INFO = {
    "resnet18": (-2, 512),
    "densenet121": (-2, 1024),
    "efficientnet_b0": (-5, 320),
    "resnest50d": (-2, 2048),
    "mobilenetv2_100": (-2, 1280),
}

def interpolate(x: torch.Tensor, ratio: int):
    x = x.transpose(1, 2)
    (batch_size, time_steps, classes_num) = x.shape
    upsampled = x[:, :, None, :].repeat(1, 1, ratio, 1)
    upsampled = upsampled.reshape(batch_size, time_steps * ratio, classes_num)
    upsampled = upsampled.transpose(1, 2)
    return upsampled

class RFCXNet(nn.Module):
    def __init__(self, model_name):
        super(RFCXNet, self).__init__()
        self.n_label = N_LABEL

        base_model = timm.create_model(model_name, pretrained=True)
        h_idx, n_dense = MODEL_HEADER_INFO[model_name]        

        self.resnet_head = nn.Sequential(*list(base_model.children())[:h_idx])
        
        self.fc_a = nn.Conv1d(n_dense, self.n_label, 1, bias=False)
        self.fc_b = nn.Conv1d(n_dense, self.n_label, 1, bias=False)

    def forward(self, x, perm=None, gamma=None):  # input x: (batch, channel, Hz, time)
        frames_num = x.shape[3]
        x = x.transpose(3, 2)  # (batch, channel, time, Hz)

        h = self.resnet_head(x)  # (batch, unit, time, Hz)

        if perm is not None:
            h = gamma * h + (1 - gamma) * h[perm]
            
        h = F.relu(h)
        ti_pool = torch.mean(h, dim=3)  # (batch, unit, time)

        xa = self.fc_a(ti_pool)  # (batch, n_class, time)
        xb = self.fc_b(ti_pool)  # (batch, n_class, time)
        xb = torch.softmax(xb, dim=2)

        # time pool
        clipwise_preds_att_ti = torch.sum(xa * xb, dim=2)
        segmentwise_output_ti = interpolate(xa, 32)

        return {
            "clipwise_preds_att_ti": clipwise_preds_att_ti,
            "segmentwise_output_ti": segmentwise_output_ti,
        }

In [None]:
train_fp = pd.read_csv(f"{DATA_ROOT}/train_fp.csv")
train_tp = pd.read_csv(f"{DATA_ROOT}/train_tp.csv")

tp_fnames = [f"{i}_posi" for i in train_tp["recording_id"].unique()]
fp_fnames = [f"{i}_posi" for i in train_fp["recording_id"].unique()]
all_fnames = tp_fnames+fp_fnames
train_datasets = SpectrogramFromNpz(all_fnames, "valid")

model = RFCXNet(MODEL_NAME)
model.to(device)

slide_img_pos = [[0, WINDOW]]
for idx in range(1, N_SPLIT_IMG):
    h, t = slide_img_pos[idx-1][0], slide_img_pos[idx-1][1]
    h = t - COVER
    t = h + WINDOW
    slide_img_pos.append([h, t])

print(slide_img_pos)

In [None]:
pseudo_label_dict_lst = []
for fold in range(5):
    print(f"### {fold} ###")
    model.load_state_dict(torch.load(f"{BEST_MODEL}/rfcxnet_f{fold}_best_score_model.bin"))
    model.eval()

    pseudo_label_dict = {}
    each_fold_pred_clipwise = []
    each_fold_pred_framewise = []
    for fname, X in tqdm.tqdm_notebook(zip(all_fnames, train_datasets), total=len(train_datasets)):
        labels = []
        posi_labels, nega_labels = [], []
        raw_seqs, raw_clips = [], []
        for h, t in slide_img_pos:
            with torch.no_grad():
                outputs = model(X[:,:,h:t].unsqueeze(0).to(device))
            pred = outputs["clipwise_preds_att_ti"].sigmoid().cpu().numpy()[0]
            
            posi = (pred > PSEUDO_THR_P).astype(int)
            nega = (pred < PSEUDO_THR_N).astype(int) * -1
            posi_labels.append(posi)
            nega_labels.append(nega)

            raw_seqs.append(outputs["segmentwise_output_ti"])
            raw_clips.append(outputs["clipwise_preds_att_ti"])

        each_fold_pred_clipwise.append(torch.cat(raw_clips))
        each_fold_pred_framewise.append(torch.cat(raw_seqs))

        posi_labels = np.stack(posi_labels).astype(int)
        nega_labels = np.stack(nega_labels).astype(int)
        if posi_labels.sum() == 0 and nega_labels.sum() == 0:
            continue
        labels = np.stack([posi_labels, nega_labels])
        pseudo_label_dict[fname] = labels
    pseudo_label_dict_lst.append(pseudo_label_dict)

    each_fold_pred_clipwise = torch.stack(each_fold_pred_clipwise).cpu().numpy()
    each_fold_pred_framewise = torch.stack(each_fold_pred_framewise).cpu().numpy()
    np.save(f"./drive/MyDrive/Study/RFCX/2nd_stage/{EXP}_clipwise_fold_{fold}", each_fold_pred_clipwise)
    np.save(f"./drive/MyDrive/Study/RFCX/2nd_stage/{EXP}_seqwise_fold_{fold}", each_fold_pred_framewise)

In [None]:
pseudo_label_dict_5fold = {}
for fname in all_fnames:
    labels = []
    for pseudo_label_dict in pseudo_label_dict_lst:
        try:
            label = pseudo_label_dict[fname]
        except KeyError:
            label = np.zeros((2, 8, 24))
        labels.append(label)

    posi_label, nega_label = np.stack(labels).sum(0)
    posi_label = (posi_label > 2).astype(int)
    nega_label = (nega_label < -2).astype(int) * -1
    new_label = posi_label + nega_label

    if (new_label != 0).sum() == 0:
        continue
    pseudo_label_dict_5fold[fname] = new_label

In [None]:
lst = [(v==1).sum(0) > 0 for v in pseudo_label_dict_5fold.values()]
pd.DataFrame(lst).sum(0).plot.bar()

In [None]:
with open(f"./drive/MyDrive/Study/RFCX/2nd_stage/{EXP}_thr{PSEUDO_THR_P}.pkl", "wb") as f:
    pickle.dump(pseudo_label_dict_5fold, f)

In [None]:
!ls ./drive/MyDrive/Study/RFCX/2nd_stage/