In [None]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
from nnAudio.Spectrogram import CQT1992v2
from torch.utils.data import DataLoader, Dataset
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from pathlib import Path
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm
BS = 64

In [None]:
BS = 64
FOLD = 0
qtransform_params={"sr": 2048, "fmin": 20, "fmax": 1024, "hop_length": 32, "bins_per_octave": 8}
INPUT_PATH = Path("/home/trytolose/rinat/kaggle/grav_waves_detection/input")

In [None]:
class TrainDataset(Dataset):
    def __init__(self, df, transform=None, steps_per_epoch=150, mode='train'):
        self.df = df
        self.file_names = df['path'].values
        self.wave_transform = CQT1992v2(**qtransform_params)
        self.transform = transform
        self.steps_per_epoch = steps_per_epoch*BS
        self.mode = mode

        
    def __len__(self):
        if self.mode=='train':
            return self.steps_per_epoch
        else:
            return len(self.df)

    def __getitem__(self, idx):
        file_path = self.file_names[idx]
        waves = np.load(file_path)
        waves = np.hstack(waves)
        waves = waves / np.max(waves)
        return waves

In [None]:
class CustomModel(nn.Module):
    def __init__(self, pretrained=True):
        super().__init__()
        self.model = timm.create_model("efficientnet_b0", pretrained=pretrained, in_chans=1)
        self.n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(self.n_features, 1)
        self.cqt = CQT1992v2(**qtransform_params) #qtransform_params={"sr": 2048, "fmin": 20, "fmax": 1024, "hop_length": 32, "bins_per_octave": 8}

    def forward(self, x):
        x = self.cqt(x).unsqueeze(1)
        x = nn.functional.interpolate(x, (256, 386))
        output = self.model(x)
        return output

In [None]:
df = pd.read_csv(INPUT_PATH / "sample_submission.csv")

files = list((INPUT_PATH / "test").rglob("*.npy"))
FILE_PATH_DICT = {x.stem: str(x) for x in files}
df["path"] = df["id"].apply(lambda x: FILE_PATH_DICT[x])

In [None]:
df = pd.read_csv(INPUT_PATH / "training_labels.csv")

files = list((INPUT_PATH / "train").rglob("*.npy"))
FILE_PATH_DICT = {x.stem: str(x) for x in files}
df["path"] = df["id"].apply(lambda x: FILE_PATH_DICT[x])

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=69)
df["fold"] = -1
for f, (train_ids, val_ids) in enumerate(skf.split(df.index, y=df["target"])):
    df.loc[val_ids, "fold"] = f
    
df = df[df["fold"] == FOLD].reset_index(drop=True)

In [None]:
val_ds = TrainDataset(df, mode="val")


val_loader = DataLoader(
    val_ds, shuffle=False, num_workers=12, batch_size=BS*2, pin_memory=False
)

model = CustomModel(pretrained=False)
model.cuda()
model.load_state_dict(torch.load("baseline_f0.pt"))

val_pred = []
model.eval()
with torch.no_grad():
    for x in tqdm(val_loader, ncols=50):
        x = x.cuda().float().unsqueeze(1)
        pred = model(x)
        pred = pred.sigmoid().cpu().data.numpy()
        val_pred.append(pred)


    val_pred = np.concatenate(val_pred).reshape(-1,)

In [None]:
val_pred

In [None]:
df['target_pred'] = val_pred

In [None]:
df = df.drop("path", axis=1)

In [None]:
df.to_csv("submission.csv", index=False)

In [None]:
!kaggle competitions submit -c g2net-gravitational-wave-detection -f submission.csv -m "public baseline"

In [None]:
df_top_fp=df[df['target']==0].sort_values("target_pred", ascending=False)[:10]
df_top_fp.to_csv("top_10_fp.csv", index=False)

In [None]:
df_min = df.sort_values("target_pred")[:10].copy()
df_max = df.sort_values("target_pred", ascending=False)[:10].copy()

In [None]:
df_max

In [None]:
df_total = pd.concat([df_max, df_min], ignore_index=True)
df_total.to_csv("top_min_max.csv", index=False)

In [None]:
!nvidia-smi