In [None]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
from nnAudio.Spectrogram import CQT1992v2
from torch.utils.data import DataLoader, Dataset
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from pathlib import Path
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm
import matplotlib.pyplot as plt
import sys
import cv2
sys.path.insert(0, "../")

In [None]:
from src.transforms import min_max_scale

In [None]:
INPUT_PATH = Path("/home/trytolose/rinat/kaggle/grav_waves_detection/input")

In [None]:
# df = pd.read_csv(INPUT_PATH / "training_labels.csv")
df = pd.read_csv("OOF_pred_turkey_bandpass.csv")
files = list((INPUT_PATH / "train").rglob("*.npy"))
FILE_PATH_DICT = {x.stem: str(x) for x in files}
df["path"] = df["id"].apply(lambda x: FILE_PATH_DICT[x])

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=69)
df["fold"] = -1
for f, (train_ids, val_ids) in enumerate(skf.split(df.index, y=df["target"])):
    df.loc[val_ids, "fold"] = f

In [None]:
xx = df[df['target']==1].sort_values("pred", ascending=True).reset_index(drop=True)
xx

In [None]:
class CFG:
    def __init__(self):
        self.qtransform_params = {
            'sr': 2048,
            'fmin': 30,
            'fmax': 400,
            'hop_length': 64,
            'bins_per_octave': 12,
            'filter_scale': 0.5,
        }

        self.bandpass_param = {
          'lf': 30,
          'hf': 400,
          'order': 8, 
          'sr': 2048,
        }
cfg = CFG()

In [None]:
cqt = CQT1992v2(**cfg.qtransform_params)

In [None]:
widths = np.arange(10, 90, 0.5)
cwt = CWT(widths, "cmorlet", 3, 4096).cuda()

In [None]:
TOTAL_MAX_VAL = np.array([4.16750054e-20, 4.16596419e-20, 1.11610637e-20])
TOTAL_MIN_VAL = np.array([-4.42943562e-20, -4.23039083e-20, -1.08631992e-20])

def min_max_scale(waves, min_val=-1, max_val=1):
    X_std = (waves.T - TOTAL_MIN_VAL) / (TOTAL_MAX_VAL - TOTAL_MIN_VAL)
    print(X_std.max(axis=0), X_std.min(axis=0))
    X_scaled = X_std * (max_val - min_val) + min_val
    return X_scaled.T

In [None]:
x = np.load(df.loc[0, "path"])
print(x.min(axis=1))
x.max(axis=1)
min_max_scale(x, -1, 1).max()

In [None]:
x = np.load(df.loc[0, "path"])

x = stack_bandpass_turkey_transform(x, cfg.bandpass_param)
x = torch.tensor(x).float().unsqueeze(0)
print(x.shape)
out = cwt(x)
out = torch.absolute(out).squeeze().cpu().numpy()

plt.figure(figsize=(20,20))
img = cv2.resize(out[0], (512, 256))
plt.imshow(img)
plt.show()

plt.imshow(
    out[0],
    aspect="auto",
    vmax=out.max(),
    vmin=out.min(),
)

In [None]:
out.shape

In [None]:
from scipy import signal
def stack_bandpass_turkey_transform(waves, params):
#     waves = waves / np.max(waves)
#     waves = apply_win(waves)
    waves = apply_bandpass(waves, **params)
    # waves = np.hstack(waves)
    return waves

def apply_bandpass(x, lf=30, hf=400, order=8, sr=2048):
    sos = signal.butter(order, [lf, hf], btype="bandpass", output="sos", fs=sr)
    normalization = np.sqrt((hf - lf) / (sr / 2))
    return signal.sosfiltfilt(sos, x) / normalization
def apply_win(x):
    xr = x*signal.tukey(4096, 0.1)
    return xr

In [None]:
# for _, row in df[(df['target']==1) & (df['pred']<0.2)].sample(30, replace=False, random_state=1).iterrows():
for _, row in df[(df['target']==0) & (df['pred']>.9)].sample(40, replace=False, random_state=1).iterrows():
    
    x = np.load(row["path"])
    x = stack_bandpass_turkey_transform(x, cfg.bandpass_param)
    x = cqt(torch.tensor(x).float())
    
    x = x.squeeze().numpy()
    print(x.mean(axis=(1,2)))
    plt.figure(figsize=(20,20))
    plt.subplot(131)
    plt.title(f"target: {row['target']} pred: {row['pred']}")
    plt.imshow(x[0])
    plt.subplot(132)
    plt.imshow(x[1])
    plt.subplot(133)
    plt.imshow(x[2])
    
    plt.show()
    break

In [None]:
[0.00930675 0.00949222 0.01424521]
[0.01025635 0.01018105 0.01501465]

[2.5128476e-22 2.4947712e-22 3.6827268e-22]

In [None]:
from gwpy.timeseries import TimeSeries
from gwpy.plot import Plot
from matplotlib import pyplot as plt
%matplotlib inline

In [None]:
data = np.load("../input/train/0/0/0/000a5b6e5c.npy")

d1 = TimeSeries(data[2,:], sample_rate=2048)

In [None]:
hq = d1.q_transform(qrange=(16,32), frange=(30,400), logf=True, whiten=False)
fig4 = hq.plot(figsize=[12, 10])
ax = fig4.gca()
fig4.colorbar(label="Normalised energy")
ax.grid(False)
ax.set_yscale('log')
ax.set_xlabel('Time [s]');

In [None]:
hq = d1.q_transform(qrange=(16,32), frange=(20,600), logf=False, whiten=False)
fig4 = hq.plot(figsize=[12, 10])
ax = fig4.gca()
fig4.colorbar(label="Normalised energy")
ax.grid(False)
ax.set_yscale('log')
ax.set_xlabel('Time [s]');

In [None]:
np.array(hq).shape

In [None]:
tt.shape

In [None]:
plt.figure(figsize=(10,10))
plt.imshow(tt)

In [None]:
from scipy import signal

TOTAL_MAX_VAL = np.array([4.61521162e-20, 4.16596419e-20, 1.11610637e-20])
TOTAL_MIN_VAL = np.array([-4.42943562e-20, -4.23039083e-20, -1.08631992e-20])


def apply_bandpass(x, lf=30, hf=400, order=8, sr=2048):
    sos = signal.butter(order, [lf, hf], btype="bandpass", output="sos", fs=sr)
    normalization = np.sqrt((hf - lf) / (sr / 2))
    return signal.sosfiltfilt(sos, x) / normalization


def min_max_scale(waves, min_val=-1, max_val=1):
    X_std = (waves.T - TOTAL_MIN_VAL) / (TOTAL_MAX_VAL - TOTAL_MIN_VAL)
    X_scaled = X_std * (max_val - min_val) + min_val
    return X_scaled.T

def create_cqt(wave):
    d1 = TimeSeries(wave, sample_rate=2048)
    hq = d1.q_transform(qrange=(16,32), frange=(20,600), logf=False, whiten=False)
    return np.array(hq)

def get_cqt_image(path):
    waves = np.load(path)
#     waves = min_max_scale(waves)
#     waves = apply_bandpass(waves)
    
    img = np.stack([create_cqt(w) for w in waves], axis=2)
    return img

In [None]:
img = get_cqt_image("../input/train/0/0/0/000a5b6e5c.npy")

In [None]:
plt.figure(figsize=(10,10))
plt.imshow(img[:,:,1])
plt.show()

In [None]:
df

In [None]:
df[df['fold']==0]

In [None]:
for f in range(5):
    df_f = df[df['fold']==f].reset_index(drop=True)
    scaled_fp16 = []
    for i in tqdm(range(df_f.shape[0])):
        wave_scaled = min_max_scale(np.load(df.loc[i, 'path'])).astype(np.float16)
        scaled_fp16.append(wave_scaled)
    scaled_fp16 = np.stack(scaled_fp16)
    np.save(f"../input/fp16/train/fold_{f}.npy", scaled_fp16)

In [None]:
for f in range(5):
    df_f = df[df['fold']==f].reset_index(drop=True)
    target = df_f["target"].values.astype(np.int8)
    np.save(f"../input/fp16/train/fold_{f}_target.npy", target)

In [None]:
!ls -lh ../input/fp16/train/

In [None]:
from src.dataset import InMemoryDataset
from torch.utils.data import DataLoader

In [None]:
ds = InMemoryDataset("../input/fp16/train", [0,1,2,3])

In [None]:
xx = [0, 1, 2, 3, 4]

xx.remove(3)

In [None]:
xx.remove(1)

In [None]:
xx