In [None]:
!nvidia-smi

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%%time
# 大体10分くらい

!pip install -q kaggle
!mkdir -p .kaggle
!cp "./drive/My Drive/Study/config/kaggle.json" .kaggle/
!chmod 600 .kaggle/kaggle.json
!mv .kaggle /root

# image download
!kaggle datasets download "theoviel/rcfx-spectrograms-32-khz"
!unzip rcfx-spectrograms-32-khz.zip > /dev/null
!rm -rf rcfx-spectrograms-32-khz.zip 

# Pseudo Labelings
!kaggle datasets download "aerdem4/rainforest-labelling"
!unzip rainforest-labelling.zip > /dev/null
!rm -rf rainforest-labelling.zip 
!kaggle datasets download "takamichitoda/rfcx-oof-pseudo-labeling"
!unzip rfcx-oof-pseudo-labeling.zip > /dev/null
!rm -rf rfcx-oof-pseudo-labeling.zip 
!kaggle datasets download "kuto0633/oof-efficientnet"
!unzip oof-efficientnet.zip > /dev/null
!rm -rf oof-efficientnet.zip 

!pip install -U iterative-stratification albumentations wandb  > /dev/null
!wandb login e0792bb688a0d18e359df7438c45da90f8794091

!pip install timm
!pip install imbalanced-learn

In [None]:
import gc
import os
import tqdm
import random
import pickle

from matplotlib import pyplot as plt
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from datetime import datetime

import pandas as pd
import numpy as np
from numpy.random import beta

import torch
import torch.nn as nn
from torch.optim import Adam, AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau, CosineAnnealingWarmRestarts
from torchvision import transforms
from torch.nn import functional as F

import albumentations as A

from sklearn.metrics import classification_report, roc_auc_score
from imblearn.under_sampling import RandomUnderSampler
import wandb
import timm

from sklearn.linear_model import LogisticRegression

device = torch.device("cuda")

# Preparation

## Set Constant Parameter

In [None]:
# timm.list_models("resnet*")
N_EXP = "0214"
#MODEL_NAME = "resnet18"
MODEL_NAME = "efficientnet_b0"
#MODEL_NAME = "densenet121"
#MODEL_NAME = "resnest50d"
DETAIL = "my_best"  # gradient accumuration x mixup last layer
STAGE = "3rd"  # 1st, 2nd, 3rd, 4th

#BATCH_SZE = 64
#BATCH_SZE = 32
BATCH_SZE = 8
#N_ACCUMULATE = 1
N_ACCUMULATE = 4
WINDOW = 512
#WINDOW = 256

SEED = 416
N_FOLD = 5
WORKS = 0
N_LABEL = 24
MAX_LEN = 3751

EXP_NAME = f"exp{N_EXP}_{MODEL_NAME}_{DETAIL}_{STAGE}"
OUTPUT = f"./drive/MyDrive/Study/RFCX/output/{EXP_NAME}"
DATA_ROOT = "./drive/MyDrive/Study/RFCX/input"

print(EXP_NAME)
!mkdir -p {OUTPUT}

In [None]:
# Pre-Train Model
if MODEL_NAME == "resnet18":
    FIRST_ST = "exp0109_only_clip_loss"
elif MODEL_NAME == "densenet121":
    FIRST_ST = "exp0157_densenet121_for_ensemble_1st"
elif MODEL_NAME == "resnest50d":
    FIRST_ST = "exp0117_resnest_now_best"
elif MODEL_NAME == "efficientnet_b0":
    FIRST_ST = "exp0158_efficientnet_b0_for_ensemble_1st"

# Parameter
if STAGE == "1st":
    LEARNING_RATE = 1e-3
    NUM_EPOCHS = 50
    T_MAX = 10
else:
    LEARNING_RATE = 3e-4
    NUM_EPOCHS = 5
    T_MAX = 5

if WINDOW == 256:
    SHIFT_W = [0, 32, 64, 128]
    COVER = 23
    N_SPLIT_IMG = 16
elif WINDOW == 512:
    SHIFT_W = [0, 32, 64, 128, 256]
    COVER = 49
    N_SPLIT_IMG = 8

In [None]:
slide_img_pos = [[0, WINDOW]]
for idx in range(1, N_SPLIT_IMG):
    h, t = slide_img_pos[idx-1][0], slide_img_pos[idx-1][1]
    h = t - COVER
    t = h + WINDOW
    slide_img_pos.append([h, t])

print("train slide_img_pos")
print(slide_img_pos)

test_slide_img_pos = [[0, WINDOW]]
for idx in range(1, 14):
    h, t = test_slide_img_pos[idx-1][0], test_slide_img_pos[idx-1][1]
    h = t - 256
    t = h + WINDOW
    test_slide_img_pos.append([h, t])

print("test slide_img_pos")
print(test_slide_img_pos)

## Load Dataset

In [None]:
# Competition Data
sample_submission = pd.read_csv(f"{DATA_ROOT}/sample_submission.csv")
train_fp = pd.read_csv(f"{DATA_ROOT}/train_fp.csv")
train_tp = pd.read_csv(f"{DATA_ROOT}/train_tp.csv")

# OOF Pseudo Labels
oof_ahmet_v0 = pd.read_csv("oof_ahmet_v0.csv")
oof_toda_v1 = pd.read_csv(f"./drive/MyDrive/Study/RFCX/OOF/oof_toda_v1.csv")
oof_kuto_v0 = pd.read_csv("oof_kuto_eff_v0.csv")

### Extract Label

In [None]:
def _labeling(x):
    if x < 0.01:
        return -2  # difficult negative
    elif x < 0.5:
        return 0  # Don't know
    return 2  # Pseudo Positive

def _extract_seq_label(df):
    seq_label = np.zeros((N_LABEL, 3751))
    for species_id, t_min, f_min, t_max, f_max, type_value in df.values[:, [1, 3, 4, 5, 6, 7]]:
        h, t = int(3751*(t_min/60)), int(3751*(t_max/60))
        seq_label[species_id, h:t] = type_value
    return seq_label

def _put_soft_framewise_label(seq_label):
    soft_framewise_label = []
    for h, t in slide_img_pos:
        _posi = ((seq_label[:, h:t] == 1).sum(1) > 0).astype(int)
        _nega = ((seq_label[:, h:t] == -1).sum(1) > 0).astype(int)
        if ((_posi + _nega) > 1).sum() > 0:
            _nega = np.array([0 if _posi[i] == v == 1else v for i, v in enumerate(_nega)])
        _lab = _posi + _nega*-1
        soft_framewise_label.append(_lab)
    soft_framewise_label = np.stack(soft_framewise_label)
    return soft_framewise_label

def _merge_pseudo_labels(org_label, pseudo_label):
    lst = []
    for l1, l2 in zip(org_label, pseudo_label):
        org_zero_idx = np.where(l1 == 0)[0]
        l1[org_zero_idx] = l2[org_zero_idx]
        lst.append(l1)
    return np.array(lst)

def _merge_pseudo_labels_v2(org_label, pseudo_label):
    lst = []
    for l1, l2 in zip(org_label, pseudo_label):
        org_zero_idx = np.where(l1 == 0)[0]
        l1[org_zero_idx] = l2[org_zero_idx]
        lst.append(l1)
    return np.array(lst)

train_tp["type_value"] = 1  # Positive (TPs from data)
train_fp["type_value"] = -1  # False Positive (FP from the data)
train_all = pd.concat([train_tp, train_fp], axis=0).reset_index(drop=True)

new_label_dict = {}
for recording_id, org_df in tqdm.notebook.tqdm(train_all.groupby("recording_id"), total=len(train_all["recording_id"].unique())):
    # Pseudo Labels
    pseudo1 = oof_ahmet_v0.query(f"recording_id=='{recording_id}'")
    pseudo2 = oof_toda_v1.query(f"recording_id=='{recording_id}'")
    pseudo3 = oof_kuto_v0.query(f"recording_id=='{recording_id}'")
    pseudo_labels = np.stack([pseudo1.values[:, 2:],
                               pseudo2.values[:, 2:],
                               pseudo3.values[:, 2:]]).mean(0)
    pseudo_labels = np.array([[_labeling(xx) for xx in x] for x in pseudo_labels])

    # Origin Labels
    seq_label = _extract_seq_label(org_df)
    soft_framewise_label = _put_soft_framewise_label(seq_label)

    # Merge Both Label
    new_label = _merge_pseudo_labels(soft_framewise_label, pseudo_labels)
    new_label_dict[recording_id] = new_label 

### Make CV

In [None]:
# 1st stage data
tp_fnames, tp_labels = [], []
for recording_id, df in train_tp.groupby("recording_id"):
    v = sum([np.eye(N_LABEL)[i] for i in df["species_id"].tolist()])
    v = (v  == 1).astype(int).tolist()
    tp_fnames.append(recording_id)
    tp_labels.append(v)

# FP Data
all_fnames = list(new_label_dict.keys())
fp_only_fnames = [i for i in all_fnames if i not in tp_fnames]
fp_positive_labels = [((new_label_dict[i] == 1).sum(0) > 0).astype(int) for i in fp_only_fnames]

# Make CV
mskf1 = MultilabelStratifiedKFold(n_splits=5, shuffle=True, random_state=0)
tp_cv = [(np.array(tp_fnames)[train_index], np.array(tp_fnames)[valid_index]) for train_index, valid_index in mskf1.split(tp_fnames, tp_labels)]
mskf２ = MultilabelStratifiedKFold(n_splits=5, shuffle=True, random_state=0)
fp_cv = [(np.array(fp_only_fnames)[train_index], np.array(fp_only_fnames)[valid_index]) for train_index, valid_index in mskf2.split(fp_only_fnames, fp_positive_labels)]

# origin label fixのため再計算
tp_fnames, tp_labels = [], []
for recording_id, df in train_tp.groupby("recording_id"):
    v = sum([np.eye(N_LABEL)[i] for i in df["species_id"].tolist()])
    v = (v  >= 1).astype(int).tolist()  # fix
    tp_fnames.append(recording_id)
    tp_labels.append(v)

## Functions

### Utils

In [None]:
# https://www.kaggle.com/c/rfcx-species-audio-detection/discussion/198418

def LWLRAP(preds, labels):
    ranked_classes = torch.argsort(preds, dim=-1, descending=True)
    class_ranks = torch.zeros_like(ranked_classes)
    for i in range(ranked_classes.size(0)):
        for j in range(ranked_classes.size(1)):
            class_ranks[i, ranked_classes[i][j]] = j + 1
    ground_truth_ranks = class_ranks * labels + (1e6) * (1 - labels)
    sorted_ground_truth_ranks, _ = torch.sort(ground_truth_ranks, dim=-1, descending=False)
    num_labels = labels.sum(-1)
    pos_matrix = torch.tensor(np.array([i+1 for i in range(labels.size(-1))])).unsqueeze(0)
    score_matrix = pos_matrix / sorted_ground_truth_ranks
    score_mask_matrix, _ = torch.sort(labels, dim=-1, descending=True)
    scores = score_matrix * score_mask_matrix
    score = scores.sum() / labels.sum()
    return score.item()

def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = True  # type: ignore

### DataLoader

In [None]:
class TimeMask:
    def __init__(self, T=40, num_masks=1, replace_with_zero=True):
        self.T = T
        self.num_masks = num_masks
        self.replace_with_zero = replace_with_zero

    def __call__(self, spec):
        cloned = spec.clone()
        len_spectro = cloned.shape[2]
    
        for i in range(0, self.num_masks):
            t = random.randrange(0, self.T)
            t_zero = random.randrange(0, len_spectro - t)

            # avoids randrange error if values are equal and range is empty
            if (t_zero == t_zero + t): return cloned

            mask_end = random.randrange(t_zero, t_zero + t)
            if (self.replace_with_zero): cloned[:,:,t_zero:mask_end] = 0
            else: cloned[:,:,t_zero:mask_end] = cloned.mean()
        return cloned

class FreqMask:
    def __init__(self, F=30, num_masks=1, replace_with_zero=True):
        self.F = F
        self.num_masks = num_masks
        self.replace_with_zero = replace_with_zero

    def __call__(self, spec):
        cloned = spec.clone()
        num_mel_channels = cloned.shape[1]
    
        for i in range(0, self.num_masks):        
            f = random.randrange(0, self.F)
            f_zero = random.randrange(0, num_mel_channels - f)

            # avoids randrange error if values are equal and range is empty
            if (f_zero == f_zero + f): return cloned

            mask_end = random.randrange(f_zero, f_zero + f) 
            if (self.replace_with_zero): cloned[:, f_zero:mask_end] = 0
            else: cloned[:, f_zero:mask_end] = cloned.mean()
    
        return cloned

def mono_to_color(
    X: np.ndarray, mean=None, std=None,
    norm_max=None, norm_min=None, eps=1e-6
):
    # Stack X as [X,X,X]
    X = np.stack([X, X, X], axis=-1)

    # Standardize
    mean = mean or X.mean()
    X = X - mean
    std = std or X.std()
    Xstd = X / (std + eps)
    _min, _max = Xstd.min(), Xstd.max()
    norm_max = norm_max or _max
    norm_min = norm_min or _min
    if (_max - _min) > eps:
        # Normalize to [0, 255]
        V = Xstd
        V[V < norm_min] = norm_min
        V[V > norm_max] = norm_max
        V = 255 * (V - norm_min) / (norm_max - norm_min)
        V = V.astype(np.uint8)
    else:
        # Just zero
        V = np.zeros_like(Xstd, dtype=np.uint8)
    return V


class SpectrogramFromNpz(torch.utils.data.Dataset):
    def __init__(self, fname, mode):
        self.fname = fname
        self.mode = mode
        self.to_tensor = transforms.ToTensor()
        self.norm = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
        self.augument_funcs_b = transforms.RandomApply([
            TimeMask(),
            FreqMask(),
        ], p=0.5)

    def __len__(self):
        return len(self.fname)

    def __getitem__(self, idx):
        fname = self.fname[idx]

        # load label
        if self.mode in ["train", "valid"]:
            label = new_label_dict[fname]
            img_dir = "train"
        elif self.mode == "test":
            label = np.zeros(((N_SPLIT_IMG, N_LABEL)))
            img_dir = "test"

        # load image        
        path = f"./{img_dir}/{fname}.npy"
        mel = np.load(path)
        image = mono_to_color(mel)

        # augument
        if self.mode == "train":
            image = self.to_tensor(image)
            image = self.augument_funcs_b(image)
        elif self.mode in ["valid", "test"]:
            image = self.to_tensor(image)
        image = self.norm(image)

        return image, label

In [None]:
#_datasets = SpectrogramFromNpz(all_fnames[:2], "train")
#for d in _datasets:
   #break

### Main Loop

In [None]:
def mixup(input, gamma, perm):
    perm_input = input[perm]
    return input.mul_(gamma).add_(1 - gamma, perm_input)

def rfcx_3rd_criterion(outputs, targets, b=None, perm=None):
    clipwise_preds_att_ti = outputs["clipwise_preds_att_ti"]

    posi_label = (targets == 1).float().to(device)
    nega_label = (targets == -1).float().to(device)
    soft_posi_label = (targets == 2).float().to(device)
    posi_y = torch.ones(clipwise_preds_att_ti.shape).to(device)
    nega_y = torch.zeros(clipwise_preds_att_ti.shape).to(device)

    # mixup treat
    posi_label = mixup(posi_label, b, perm)
    soft_posi_label = mixup(soft_posi_label, b, perm)
 
    posi_loss = nn.BCEWithLogitsLoss(reduction="none")(clipwise_preds_att_ti, posi_y)
    nega_loss = nn.BCEWithLogitsLoss(reduction="none")(clipwise_preds_att_ti, nega_y)
    soft_posi_loss = nn.BCEWithLogitsLoss(reduction="none")(clipwise_preds_att_ti, posi_y)

    posi_loss = (posi_loss * posi_label).sum()
    nega_loss = (nega_loss * nega_label).sum()
    soft_posi_loss = (soft_posi_loss * soft_posi_label).sum()

    loss = posi_loss + nega_loss + soft_posi_loss*0.5

    return loss

def split_and_padding(X, y):
    X_lst, y_lst =[], []
    for idx, (h, t) in enumerate(slide_img_pos):
        _X = X[:, :, :, h:t]
        _y = y[:, idx, :]
        if _X.shape[3] != WINDOW:
            x_pad = torch.zeros(list(_X.shape[:-1]) + [WINDOW - _X.shape[3]])
            _X = torch.cat([_X, x_pad], axis=3)
        X_lst.append(_X)
        y_lst.append(_y)
    X = torch.cat(X_lst, axis=0)
    y = torch.cat(y_lst, axis=0)
    return X, y

def split_and_padding_test(X):
    X_lst =[]
    for idx, (h, t) in enumerate(test_slide_img_pos):
        _X = X[:, :, :, h:t]
        if _X.shape[3] != WINDOW:
            x_pad = torch.zeros(list(_X.shape[:-1]) + [WINDOW - _X.shape[3]])
            _X = torch.cat([_X, x_pad], axis=3)
        X_lst.append(_X)
    X = torch.cat(X_lst, axis=0)
    return X

def train_loop_3rd(train_data_loader, model, optimizer, scheduler):
    model.train()
    losses, lrs = [], []

    optimizer.zero_grad()
    for n_iter, (X, y) in tqdm.notebook.tqdm(enumerate(train_data_loader), total=len(train_data_loader)):
        _X, _y = split_and_padding(X, y)
        _X, _y = _X.to(device), _y.to(device)

        b = beta(0.1, 0.1)
        perm = torch.randperm(_X.size(0))
        outputs = model(_X, perm, b)
        loss = rfcx_criterion(outputs, _y, b, perm)

        loss.backward()
        if n_iter % N_ACCUMULATE == 0:
            optimizer.step()
            optimizer.zero_grad()

        if scheduler is not None:
            scheduler.step()

        losses.append(loss.item())
        lrs.append(np.array([param_group["lr"] for param_group in optimizer.param_groups]).mean())
    t_loss = np.array(losses).mean()
    lr =  np.array(lrs).mean()
    return t_loss, lr
    

def predict(data_loader, model):
    model.eval()
    clip_preds, clip_targets = [], []
    for X, y in valid_data_loader:
        clip_y = ((y >=1).sum(1) > 0).int().numpy()
        _X = split_and_padding_test(X)
        _X = _X.to(device)
        with torch.no_grad():
            outputs = model(_X)
        segmentwise_output_ti = outputs["segmentwise_output_ti"].sigmoid().cpu().numpy()

        org_shape = (len(test_slide_img_pos), y.shape[0], N_LABEL, WINDOW)
        _segmentwise_output_ti = segmentwise_output_ti.reshape(org_shape)
        _clip_pred = _segmentwise_output_ti.max(0).max(2)

        clip_preds.append(_clip_pred)
        clip_targets.append(clip_y)

    clip_preds = np.vstack(clip_preds)
    clip_targets = np.vstack(clip_targets)

    return clip_preds, clip_targets


def valid_loop_3rd(valid_data_loader, model, n_origin_tp_valid):
    clip_preds, clip_targets = predict(valid_data_loader, model)
    
    res_d = {}
    # 3rd Stage
    res_d["lwlrap"] = LWLRAP(torch.tensor(clip_preds), torch.tensor(clip_targets))
    res_d["valid_loss"] = nn.BCEWithLogitsLoss()(torch.tensor(clip_preds), torch.tensor(clip_targets).float()).numpy()
    lst = []
    for _true_y, _pred_y in zip(clip_targets.T, clip_preds.T):
        res = classification_report(_true_y.astype(int), (_pred_y > 0.5).astype(int), output_dict=True)
        res = res["1"]
        res["auc"] = roc_auc_score(_true_y, _pred_y)
        lst.append(res)
    res_df = pd.DataFrame(lst)
    res_d["precision"] = res_df.mean()["precision"]
    res_d["recall"] = res_df.mean()["recall"]
    res_d["auc"] = res_df.mean()["auc"]

    # Origin
    res_d["org_lwlrap"] = LWLRAP(torch.tensor(clip_preds[:n_origin_tp_valid]), torch.tensor(org_tp_labels))
    res_d["org_valid_loss"] = nn.BCEWithLogitsLoss()(torch.tensor(clip_preds[:n_origin_tp_valid]), torch.tensor(org_tp_labels).float()).numpy()
    lst = []
    for _true_y, _pred_y in zip(org_tp_labels.T, clip_preds[:n_origin_tp_valid].T):
        res = classification_report(_true_y.astype(int), (_pred_y > 0.5).astype(int), output_dict=True)
        res = res["1"]
        res["auc"] = roc_auc_score(_true_y, _pred_y)
        lst.append(res)
    res_df = pd.DataFrame(lst)
    res_d["org_precision"] = res_df.mean()["precision"]
    res_d["org_recall"] = res_df.mean()["recall"]
    res_d["org_auc"] = res_df.mean()["auc"]

    return res_d

### Model

In [None]:
# ヘッダーのインデックスと次元数
MODEL_HEADER_INFO = {
    "resnet18": (-2, 512),
    "densenet121": (-2, 1024),
    "efficientnet_b0": (-5, 320),
    "resnest50d": (-2, 2048),
    "mobilenetv2_100": (-2, 1280),
}

def interpolate(x: torch.Tensor, ratio: int):
    x = x.transpose(1, 2)
    (batch_size, time_steps, classes_num) = x.shape
    upsampled = x[:, :, None, :].repeat(1, 1, ratio, 1)
    upsampled = upsampled.reshape(batch_size, time_steps * ratio, classes_num)
    upsampled = upsampled.transpose(1, 2)
    return upsampled


class RFCXNet(nn.Module):
    def __init__(self, model_name):
        super(RFCXNet, self).__init__()
        self.model_name = model_name
        self.n_label = N_LABEL

        base_model = timm.create_model(model_name, pretrained=True)
        h_idx, n_dense = MODEL_HEADER_INFO[model_name]        

        # 過去学習に使ったモデルをロードするためヘッダーの名前を変える
        if self.model_name in ["resnet18", "efficientnet_b0"]:
            self.resnet_head = nn.Sequential(*list(base_model.children())[:h_idx])
        elif self.model_name == "resnest50d":
            self.resnest50d_head = nn.Sequential(*list(base_model.children())[:h_idx])
        else:
            self.model_head = nn.Sequential(*list(base_model.children())[:h_idx])
                

        self.fc_a = nn.Conv1d(n_dense, self.n_label, 1, bias=False)
        self.fc_b = nn.Conv1d(n_dense, self.n_label, 1, bias=False)

    def forward(self, x, perm=None, gamma=None):  # input x: (batch, channel, Hz, time)
        frames_num = x.shape[3]
        x = x.transpose(3, 2)  # (batch, channel, time, Hz)

        # (batch, unit, time, Hz)
        if self.model_name in ["resnet18", "efficientnet_b0"]:
            h = self.resnet_head(x)  
        elif self.model_name == "resnest50d":
            h = self.resnest50d_head(x)
        else:
            h = self.model_head(x)
        
        if perm is not None:
            h = gamma * h + (1 - gamma) * h[perm]
    
        h = F.relu(h)
        ti_pool = torch.mean(h, dim=3)  # (batch, unit, time)

        xa = self.fc_a(ti_pool)  # (batch, n_class, time)
        xb = self.fc_b(ti_pool)  # (batch, n_class, time)
        xb = torch.softmax(xb, dim=2)

        # time pool
        clipwise_preds_att_ti = torch.sum(xa * xb, dim=2)
        segmentwise_output_ti = interpolate(xa, 32)

        return {
            "clipwise_preds_att_ti": clipwise_preds_att_ti,
            "segmentwise_output_ti": segmentwise_output_ti,
        }

In [None]:
"""train_datasets = SpectrogramFromNpz(all_fnames, "train")
train_data_loader = torch.utils.data.DataLoader(train_datasets, batch_size=2, shuffle=True, num_workers=0)
for n_iter, (X, y) in tqdm.tqdm_notebook(enumerate(train_data_loader)):
    _X, _y = split_and_padding(X, y)
    break
model = RFCXNet("resnet18")
model.to(device)
model.eval()
with torch.no_grad():
    outputs = model(_X.to(device))
outputs["segmentwise_output_ti"].shape"""

## Test Dataset

In [None]:
test_fnames = sample_submission["recording_id"].values
test_datasets = SpectrogramFromNpz(test_fnames, "test")

# Train

In [None]:
for fold in range(5):
    #if fold in [0,1,2]:  # Set Skip Fold
    #    continue
    print(datetime.now(), f"\t: ### FOLD-{fold} ###")
    set_seed(SEED+fold)

    wandb.init(project="rfcx", name=f"{EXP_NAME}_f{fold}")

    config = wandb.config
    config.exp_name = EXP_NAME
    config.fold = fold
    config.seed = SEED
    config.learning_rate = LEARNING_RATE
    config.batch_size = BATCH_SZE
    config.num_epochs = NUM_EPOCHS
    config.t_max = T_MAX
    config.n_accumulate = N_ACCUMULATE

    tp_train, tp_valid = tp_cv[fold]
    fp_train, fp_valid = fp_cv[fold]
    train_fname = np.hstack([tp_train, fp_train])
    valid_fname = np.hstack([tp_valid, fp_valid])
    n_origin_tp_valid = len(tp_valid)
    org_tp_labels = np.array(tp_labels)[[tp_fnames.index(i) for i in tp_valid]]
    
    if STAGE == "3rd":
        rfcx_criterion = rfcx_3rd_criterion
        train_loop = train_loop_3rd
        valid_loop = valid_loop_3rd

    train_datasets = SpectrogramFromNpz(train_fname, "train")  
    train_data_loader = torch.utils.data.DataLoader(train_datasets, batch_size=config.batch_size, shuffle=True, num_workers=WORKS)
    valid_datasets = SpectrogramFromNpz(valid_fname, "valid")
    valid_data_loader = torch.utils.data.DataLoader(valid_datasets, batch_size=config.batch_size, shuffle=False, num_workers=WORKS)

    model = RFCXNet(MODEL_NAME)
    if STAGE in ["2nd", "3rd"]:
        model.load_state_dict(torch.load(f"./drive/MyDrive/Study/RFCX/output/{FIRST_ST}/rfcxnet_f{config.fold}_best_score_model.bin"))
    model.to(device)

    optimizer = Adam(model.parameters(), lr=config.learning_rate)
    scheduler = CosineAnnealingLR(optimizer, T_max=len(train_data_loader)*config.t_max, eta_min=0.0)

    wandb.watch(model)

    print(datetime.now(), "\t: start train")
    best_lwlrap, best_precision, best_auc, best_recall = 0, 0, 0, 0
    for epoch in range(config.num_epochs):
        t_loss, lr = train_loop(train_data_loader, model, optimizer, scheduler)
        valid_d = valid_loop(valid_data_loader, model, n_origin_tp_valid)

        if best_lwlrap < valid_d["lwlrap"]:
            print(f"epoch {epoch}: best score update !!!")
            torch.save(model.state_dict(), f"{OUTPUT}/rfcxnet_f{config.fold}_best_score_model.bin")
            best_lwlrap = valid_d["lwlrap"]
            best_precision = valid_d["precision"]
            best_auc = valid_d["auc"]
            best_recall = valid_d["recall"]

        valid_d["best_lwlrap"] = best_lwlrap
        valid_d["best_precision"] = best_precision
        valid_d["best_auc"] = best_auc
        valid_d["best_recall"] = best_recall
        valid_d["train_loss"] = t_loss
        valid_d["lr"] = lr
        wandb.log(valid_d)

    print(datetime.now(), "\t: finish train")
    wandb.finish()

    # predict test data
    model.load_state_dict(torch.load(f"{OUTPUT}/rfcxnet_f{config.fold}_best_score_model.bin"))
    model.eval()

    lst = []
    for idx, (X, _) in tqdm.tqdm_notebook(enumerate(test_datasets), total=1992):
        preds = []
        for h, t in test_slide_img_pos:
            _X = X[:,:,h:t].unsqueeze(0)
            if _X.shape[3] != WINDOW:
                x_pad = torch.zeros(list(_X.shape[:-1]) + [WINDOW - _X.shape[3]])
                _X = torch.cat([_X, x_pad], axis=3)
            with torch.no_grad():
                outputs = model(_X.to(device))
            pred, _ = outputs["segmentwise_output_ti"].sigmoid().max(2)
            preds.append(pred)
        max_pred, _  = torch.max(torch.stack(preds), dim=0)
        pred = max_pred.cpu().numpy()[0].tolist()

        row = [test_fnames[idx]] + pred
        lst.append(row)

    fold_sub = pd.DataFrame(lst, columns=["recording_id"]+[f"s{i}" for i in range(N_LABEL)])
    fold_sub.to_csv(f"{OUTPUT}/rfcxnet_f{config.fold}_predict.csv", index=None)

# Test

In [None]:
all_v_lst = []
for fold in range(5):
    df = pd.read_csv(f"{OUTPUT}/rfcxnet_f{fold}_predict.csv")
    ids, v_lst = [], []
    for row in df.values:
        recording_id = row[0]
        ids.append(recording_id)
        v = torch.Tensor(row[1:].astype(float))
        v_lst.append(v)
    all_v_lst.append(torch.stack(v_lst, axis=0))

all_preds = torch.stack(all_v_lst, axis=2).mean(2)
sub = pd.DataFrame(all_preds.tolist(), columns=df.columns[1:])
sub = pd.concat([df[["recording_id"]], sub], axis=1)
sub.to_csv(f"./submission_{EXP_NAME}_avg.csv", index=None)

!cp "./submission_{EXP_NAME}_avg.csv" "{OUTPUT}"

# Pseudo

In [None]:
pseudo_version = 7
#MODEL_NAME = "resnet18"
#OUTPUT = "./drive/MyDrive/Study/RFCX/output/exp0211_resnet18_mixup_posilab_3rd"

## OOF

In [None]:
model = RFCXNet(MODEL_NAME)
model.to(device)

valid_dfs = []
for fold in range(5):
    print(datetime.now(), f"\t: ### FOLD-{fold} ###")
    set_seed(SEED+fold)

    tp_train, tp_valid = tp_cv[fold]
    fp_train, fp_valid = fp_cv[fold]
    valid_fname = np.hstack([tp_valid, fp_valid])

    valid_datasets = SpectrogramFromNpz(valid_fname, "valid")
    model.load_state_dict(torch.load(f"{OUTPUT}/rfcxnet_f{fold}_best_score_model.bin"))
    model.eval()

    clip_targets_org, clip_targets_new, valid_preds = [], [], []
    for idx, (X, y) in tqdm.notebook.tqdm(enumerate(valid_datasets), total=len(valid_datasets)):
        clip_y_org = ((y == 1).sum(0) > 0).astype(int)
        clip_y_new = ((y >= 1).sum(0) > 0).astype(int)
        clip_targets_org.append(clip_y_org)
        clip_targets_new.append(clip_y_new)

        for patch, (h, t) in enumerate(test_slide_img_pos):
        #for patch, (h, t) in enumerate(slide_img_pos):
            _X = X[:,:,h:t].unsqueeze(0)
            if _X.shape[3] != WINDOW:
                x_pad = torch.zeros(list(_X.shape[:-1]) + [WINDOW - _X.shape[3]])
                _X = torch.cat([_X, x_pad], axis=3)
            with torch.no_grad():
                outputs = model(_X.to(device))
            pred, _ = outputs["segmentwise_output_ti"].sigmoid().max(2)
            row = [valid_fname[idx], patch] + pred.cpu().tolist()[0]
            valid_preds.append(row)
        
    valid_df = pd.DataFrame(valid_preds, columns=["recording_id", "patch"] + [f"s{i}" for i in range(24)])
    valid_dfs.append(valid_df)

    clip_targets_org = np.stack(clip_targets_org)
    clip_targets_new = np.stack(clip_targets_new)
    clip_preds = valid_df.groupby("recording_id").max().drop("patch", axis=1).loc[valid_fname].values
    lwlrap_org = LWLRAP(torch.tensor(clip_preds), torch.tensor(clip_targets_org))
    lwlrap_new = LWLRAP(torch.tensor(clip_preds), torch.tensor(clip_targets_new))
    print(f"LWLRAP] org={lwlrap_org}, new={lwlrap_new}")
oof_pseudo = pd.concat(valid_dfs).reset_index(drop=True)
oof_pseudo.to_csv(f"oof_toda_v{pseudo_version}.csv", index=None)

!cp oof_toda_v{pseudo_version}.csv ./drive/MyDrive/Study/RFCX/OOF/

## Test

In [None]:
model = RFCXNet(MODEL_NAME)
model.to(device)

test_preds_dfs = []
for fold in range(5):
    print(datetime.now(), f"\t: ### FOLD-{fold} ###")
    set_seed(SEED+fold)
    model.load_state_dict(torch.load(f"{OUTPUT}/rfcxnet_f{fold}_best_score_model.bin"))
    model.eval()

    test_preds = []
    for idx, (X, _) in tqdm.notebook.tqdm(enumerate(test_datasets), total=1992):
        for patch, (h, t) in enumerate(test_slide_img_pos):
        #for patch, (h, t) in enumerate(slide_img_pos):
            _X = X[:,:,h:t].unsqueeze(0)
            if _X.shape[3] != WINDOW:
                x_pad = torch.zeros(list(_X.shape[:-1]) + [WINDOW - _X.shape[3]])
                _X = torch.cat([_X, x_pad], axis=3)
            with torch.no_grad():
                outputs = model(_X.to(device))
            pred, _ = outputs["segmentwise_output_ti"].sigmoid().max(2)
            row = [test_datasets.fname[idx], patch] + pred.cpu().tolist()[0]
            test_preds.append(row)
    test_preds_df = pd.DataFrame(test_preds, columns=["recording_id", "patch"] + [f"s{i}" for i in range(24)])
    test_preds_dfs.append(test_preds_df)

lst = []
for fold in range(5):
    v = test_preds_dfs[fold].values[:, 2:]
    lst.append(v)
pred_v = np.array(lst)
pd.DataFrame(np.hstack([test_preds_dfs[0].values[:, :2], pred_v.mean(0)]),
                         columns=test_preds_df.columns).to_csv(f"test_toda_v{pseudo_version}.csv", index=None)

!cp test_toda_v{pseudo_version}.csv ./drive/MyDrive/Study/RFCX/OOF/