In [22]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
from nnAudio.Spectrogram import CQT1992v2
from torch.utils.data import DataLoader, Dataset
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from pathlib import Path
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm
BS = 64

In [23]:
BS = 64
FOLD = 0
qtransform_params={"sr": 2048, "fmin": 20, "fmax": 1024, "hop_length": 32, "bins_per_octave": 8}
INPUT_PATH = Path("/home/trytolose/rinat/kaggle/g2net/input")

In [24]:
class TrainDataset(Dataset):
    def __init__(self, df, transform=None, steps_per_epoch=150, mode='train'):
        self.df = df
        self.file_names = df['path'].values
        self.wave_transform = CQT1992v2(**qtransform_params)
        self.transform = transform
        self.steps_per_epoch = steps_per_epoch*BS
        self.mode = mode

        
    def __len__(self):
        if self.mode=='train':
            return self.steps_per_epoch
        else:
            return len(self.df)

    def __getitem__(self, idx):
        file_path = self.file_names[idx]
        waves = np.load(file_path)
        waves = np.hstack(waves)
        waves = waves / np.max(waves)
        return waves

In [25]:
class CustomModel(nn.Module):
    def __init__(self, pretrained=True):
        super().__init__()
        self.model = timm.create_model("efficientnet_b0", pretrained=pretrained, in_chans=1)
        self.n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(self.n_features, 1)
        self.cqt = CQT1992v2(**qtransform_params) #qtransform_params={"sr": 2048, "fmin": 20, "fmax": 1024, "hop_length": 32, "bins_per_octave": 8}

    def forward(self, x):
        x = self.cqt(x).unsqueeze(1)
        x = nn.functional.interpolate(x, (256, 386))
        output = self.model(x)
        return output

In [26]:
df = pd.read_csv(INPUT_PATH / "sample_submission.csv")

files = list((INPUT_PATH / "test").rglob("*.npy"))
FILE_PATH_DICT = {x.stem: str(x) for x in files}
df["path"] = df["id"].apply(lambda x: FILE_PATH_DICT[x])

In [27]:
!ls -l

total 38032
-rw-rw-r-- 1 trytolose trytolose 17079787 Aug 16 23:14 baseline_f0.pt
-rw-rw-r-- 1 trytolose trytolose 17079787 Aug 17 20:42 baseline_f0_part_epoch.pt
-rw-rw-r-- 1 trytolose trytolose     6901 Aug 17 12:10 inference.ipynb
-rw-rw-r-- 1 trytolose trytolose     5556 Aug 17 18:20 main.py
drwxrwxr-x 2 trytolose trytolose     4096 Aug 16 17:23 src
-rw-rw-r-- 1 trytolose trytolose  4760185 Aug 17 12:09 submission.csv


In [28]:
val_ds = TrainDataset(df, mode="val")

val_loader = DataLoader(
    val_ds, shuffle=False, num_workers=12, batch_size=BS*2, pin_memory=False
)

model = CustomModel(pretrained=False)
model.cuda()
model.load_state_dict(torch.load("baseline_f0_part_epoch.pt"))

val_pred = []
model.eval()
with torch.no_grad():
    for x in tqdm(val_loader, ncols=50):
        x = x.cuda().float().unsqueeze(1)
        pred = model(x)
        pred = pred.sigmoid().cpu().data.numpy()
        val_pred.append(pred)


    val_pred = np.concatenate(val_pred).reshape(-1,)

  0%|                    | 0/1766 [00:00<?, ?it/s]

CQT kernels created, time used = 0.0169 seconds
CQT kernels created, time used = 0.0050 seconds


100%|█████████| 1766/1766 [02:36<00:00, 11.27it/s]


In [29]:
val_pred

array([1.        , 0.97921145, 0.33347988, ..., 0.24788715, 0.999997  ,
       0.07999594], dtype=float32)

In [30]:
df['target'] = val_pred

In [31]:
df = df.drop("path", axis=1)

In [32]:
df.to_csv("submission.csv", index=False)

In [33]:
!kaggle competitions submit -c g2net-gravitational-wave-detection -f submission.csv -m "public baseline"

100%|███████████████████████████████████████| 4.57M/4.57M [00:07<00:00, 669kB/s]
Successfully submitted to G2Net Gravitational Wave Detection