In [25]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
from nnAudio.Spectrogram import CQT1992v2
from torch.utils.data import DataLoader, Dataset
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from pathlib import Path
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm
import matplotlib.pyplot as plt
import sys
from functools import partial
sys.path.insert(0, "../")
import matplotlib.pyplot as plt
%matplotlib inline

In [26]:
from src.models import CustomModel_v1, Andrewnet_v3_true
from src.dataset import TrainDataset
from src.transforms import minmax_bandpass_transform

In [27]:
class CFG:
    def __init__(self):
        self.qtransform_params = {
            'sr': 2048,
            'fmin': 30,
            'fmax': 400,
            'hop_length': 64,
            'bins_per_octave': 12,
            'filter_scale': 0.5,
        }

        self.bandpass_param = {
          'lf': 30,
          'hf': 400,
          'order': 8, 
          'sr': 2048,
        }
cfg = CFG()

class CFG:
    def __init__(self):
        self.qtransform_params = {
            'sr': 2048,
            'fmin': 20,
            'fmax': 1024,
            'hop_length': 32,
            'bins_per_octave': 8,
        }

        self.bandpass_param = {
          'lf': 30,
          'hf': 400,
        }
cfg = CFG()

In [28]:
INPUT_PATH = Path("/home/trytolose/rinat/kaggle/grav_waves_detection/input")

df = pd.read_csv(INPUT_PATH / "training_labels.csv")

files = list((INPUT_PATH / "train").rglob("*.npy"))
FILE_PATH_DICT = {x.stem: str(x) for x in files}
df["path"] = df["id"].apply(lambda x: FILE_PATH_DICT[x])

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=69)
df["fold"] = -1
for f, (train_ids, val_ids) in enumerate(skf.split(df.index, y=df["target"])):
    df.loc[val_ids, "fold"] = f

In [29]:
!ls -lt ../weights/CustomModel_v1/015_effnetb0_overfitted_OOF/fold_0

total 100104
-rw-rw-r-- 1 trytolose trytolose 17082923 сен  3 01:34 cp_epoch05_score0.85804.pth
-rw-rw-r-- 1 trytolose trytolose 17082923 сен  3 01:24 cp_epoch04_score0.86247.pth
-rw-rw-r-- 1 trytolose trytolose 17082923 сен  3 01:13 cp_epoch03_score0.86800.pth
-rw-rw-r-- 1 trytolose trytolose 17082923 сен  3 01:03 cp_epoch02_score0.86906.pth
-rw-rw-r-- 1 trytolose trytolose 17082923 сен  3 00:52 cp_epoch01_score0.86738.pth
-rw-rw-r-- 1 trytolose trytolose 17082923 сен  3 00:42 cp_epoch00_score0.86348.pth


In [30]:
def get_model_paths(exp_path, crietion="last"):
    folds = sorted(os.listdir(exp_path), key=lambda x: int(x.split("_")[-1]))
    result = []
    for f in folds:
        weights = list((Path(exp_path) / f).glob("*.pth"))
        if crietion=="last":
            weights = sorted(weights, key=lambda x: int(x.stem.split("epoch")[-1].split("_")[0]))
            result.append(weights[-1])
        if crietion=="best":
            weights = sorted(weights, key=lambda x: float(x.stem.split("score")[-1]))
            result.append(weights[-1])
    return result 

In [31]:
paths = get_model_paths("../weights/CustomModel_v1/015_effnetb0_overfitted_OOF")

In [32]:
paths

[PosixPath('../weights/CustomModel_v1/015_effnetb0_overfitted_OOF/fold_0/cp_epoch05_score0.85804.pth'),
 PosixPath('../weights/CustomModel_v1/015_effnetb0_overfitted_OOF/fold_1/cp_epoch05_score0.86121.pth'),
 PosixPath('../weights/CustomModel_v1/015_effnetb0_overfitted_OOF/fold_2/cp_epoch05_score0.85531.pth'),
 PosixPath('../weights/CustomModel_v1/015_effnetb0_overfitted_OOF/fold_3/cp_epoch05_score0.85375.pth'),
 PosixPath('../weights/CustomModel_v1/015_effnetb0_overfitted_OOF/fold_4/cp_epoch05_score0.85808.pth')]

In [33]:
paths = [
    "../weights/Wavenet/010_wavenet_bandpass_fp32_cos_10_2021-08-30-05-38-05/fold_0/cp_epoch08_score0.86815.pth",
    "../weights/Wavenet/028_wavenet_bandpass_fp32_cos_10_best.yaml/fold_2/cp_epoch11_score0.86913.pth",
    "../weights/Wavenet/028_wavenet_bandpass_fp32_cos_10_best.yaml/fold_3/cp_epoch10_score0.86845.pth",
]

In [38]:
# model = CustomModel_v1().cuda()
model = Andrewnet_v3_true().cuda()
transform_f = partial(minmax_bandpass_transform)
loss_fn = torch.nn.BCEWithLogitsLoss(reduction="none")
dfs = []
for f, w_path in zip([0,2,3], paths):
#     model.load_state_dict(torch.load(f"../weights/cos_bandpass_minmax_fold_{f}_w.pt"))
    model.load_state_dict(torch.load(w_path))
    df_fold = df[df["fold"] == f].reset_index(drop=True)
    val_ds = TrainDataset(
        df_fold,
        mode="val",
        transform=transform_f,
    )

    val_loader = DataLoader(val_ds, shuffle=False, num_workers=12, batch_size=128, pin_memory=False)
    losses = []
    val_pred = []
    model.eval()
    with torch.no_grad():
        for x, y in tqdm(val_loader, ncols=50, leave=True):
            x = x.cuda().float()
            y = y.cuda().float().unsqueeze(1)
            pred = model(x)
            loss = loss_fn(pred, y).cpu().data.numpy()
            pred = pred.sigmoid().cpu().data.numpy()
            val_pred.append(pred)
            losses.append(loss)
            
    val_pred = np.concatenate(val_pred).reshape(-1,)
    losses = np.concatenate(losses).reshape(-1,)
    df_fold['pred'] = val_pred
    df_fold['loss'] = losses
    dfs.append(df_fold)

100%|███████████| 875/875 [01:56<00:00,  7.53it/s]
100%|███████████| 875/875 [01:53<00:00,  7.70it/s]
100%|███████████| 875/875 [01:57<00:00,  7.46it/s]


In [39]:
df_pred_oof = pd.concat(dfs, ignore_index=True)

In [40]:
len(df_pred_oof)

336000

In [41]:
df_pred_oof.to_csv("train_oof_wavenet_f023.csv", index=False)

In [18]:
df_pred_oof['pred_binary'] = (df_pred_oof['pred'] > 0.5).astype(int)

In [19]:
tn, fp, fn, tp = metrics.confusion_matrix(df_pred_oof['target'], df_pred_oof['pred_binary']).ravel()
(tn, fp, fn, tp)

(248830, 31240, 84541, 195389)

In [20]:
fp_fn_mask = ((df_pred_oof["target"]==0) & (df_pred_oof["pred"]>0.5)) | ((df_pred_oof["target"]==1) & (df_pred_oof["pred"]<0.5)) 
tn_mask = (df_pred_oof["target"]==0) & (df_pred_oof["pred"]<0.5)
tp_mask = (df_pred_oof["target"]==1) & (df_pred_oof["pred"]>0.5)

In [34]:
df_pred_oof['weight'] = 0
df_pred_oof.loc[fp_fn_mask, 'weight'] = (fp_fn_mask.sum()/len(df_pred_oof)) * 0.4
df_pred_oof.loc[tn_mask, 'weight'] = (tn_mask.sum()/len(df_pred_oof)) * 0.4
df_pred_oof.loc[tp_mask, 'weight'] = (tp_mask.sum()/len(df_pred_oof)) * 0.2

In [22]:
df_pred_oof.loc[tp_mask, 'loss'].mean()

0.07592027379660685

In [23]:
df_pred_oof.loc[tn_mask, 'loss'].mean()

0.2413377247016903

In [24]:
df_pred_oof.loc[fp_fn_mask, 'loss'].mean()

1.5053996327786083

In [35]:
df_pred_oof['weight'].value_counts()

0.177736    248830
0.069782    195389
0.082701    115781
Name: weight, dtype: int64

In [36]:
df_pred_oof.to_csv("train_oof_overfit.csv", index=False)

In [1]:
df_pred_oof

NameError: name 'df_pred_oof' is not defined

In [26]:
df_pred = df_pred.drop("path", axis=1)

NameError: name 'df_pred' is not defined

In [10]:
df_pred.to_csv("OOF_pred_turkey_bandpass.csv", index=False)

In [24]:
f0 = df_pred[df_pred['fold']==0]

metrics.roc_auc_score(f0['target'], f0['pred'])

0.8671333603182839

In [3]:
model = Andrewnet_v3_true()

In [4]:
waves = torch.rand(1, 3, 4096)
model(waves)

tensor([[-0.3139]], grad_fn=<AddmmBackward>)

In [16]:
df_pred_oof = pd.read_csv("train_oof_overfit.csv")

In [17]:
df["loss"] = df["loss"].round(4)

In [12]:
df['log_loss'] = np.log(df['pred'].values).round(4)