In [10]:

import numpy as np
import glob 
import pandas as pd
import h5py # import to read hdf5
from pathlib import Path
import pyfstat
from scipy import stats
import os
from joblib import Parallel, delayed
import shutil
from tqdm import tqdm
import sys 


In [11]:
PREPROC_TRAIN_DATA = False

In [12]:


# root = "/Volumes/T7/gravitational-waves/kaggle-data"
root = "/media/viktor/T7/gravitational-waves-kaggle-2022"

def load_trained_files(train=True):
   if train:
      df = pd.read_csv(f'{root}/kaggle-data/train_labels.csv')
      df['filename'] = f'{root}/kaggle-data/train/' + df['id'].astype(str) + ".hdf5"
   else:
      test_files = glob.glob(f'{root}/kaggle-data/test/*.hdf5')
      df = pd.DataFrame({'filename': test_files})
      
   return df
print("[INFO] Loading files ...")
df = load_trained_files(train=PREPROC_TRAIN_DATA)
df


[INFO] Loading files ...


Unnamed: 0,filename
0,/media/viktor/T7/gravitational-waves-kaggle-20...
1,/media/viktor/T7/gravitational-waves-kaggle-20...
2,/media/viktor/T7/gravitational-waves-kaggle-20...
3,/media/viktor/T7/gravitational-waves-kaggle-20...
4,/media/viktor/T7/gravitational-waves-kaggle-20...
...,...
7970,/media/viktor/T7/gravitational-waves-kaggle-20...
7971,/media/viktor/T7/gravitational-waves-kaggle-20...
7972,/media/viktor/T7/gravitational-waves-kaggle-20...
7973,/media/viktor/T7/gravitational-waves-kaggle-20...


In [13]:
df["filename"].iloc[0]

'/media/viktor/T7/gravitational-waves-kaggle-2022/kaggle-data/test/00054c878.hdf5'

In [14]:
if PREPROC_TRAIN_DATA:
    df = df[df["target"] != -1].reset_index(drop=True)

In [15]:
df["name"] = df["filename"].apply(lambda x: x.split("/")[-1].split(".")[0])
df

Unnamed: 0,filename,name
0,/media/viktor/T7/gravitational-waves-kaggle-20...,00054c878
1,/media/viktor/T7/gravitational-waves-kaggle-20...,0007285a3
2,/media/viktor/T7/gravitational-waves-kaggle-20...,00076c5a6
3,/media/viktor/T7/gravitational-waves-kaggle-20...,001349290
4,/media/viktor/T7/gravitational-waves-kaggle-20...,001a52e92
...,...,...
7970,/media/viktor/T7/gravitational-waves-kaggle-20...,ffbce04ef
7971,/media/viktor/T7/gravitational-waves-kaggle-20...,ffc2d976b
7972,/media/viktor/T7/gravitational-waves-kaggle-20...,ffc905909
7973,/media/viktor/T7/gravitational-waves-kaggle-20...,ffe276f3e


In [16]:
if PREPROC_TRAIN_DATA:
    df["preprocessed_filename"] = df["name"].apply(lambda x: f"{root}/train/{x}.npy")
else:
    df["preprocessed_filename"] = df["name"].apply(lambda x: f"{root}/test/{x}.npy")

In [17]:
df.to_csv("train-preprocessed.csv" if PREPROC_TRAIN_DATA else "test-preprocessed.csv")
df

Unnamed: 0,filename,name,preprocessed_filename
0,/media/viktor/T7/gravitational-waves-kaggle-20...,00054c878,/media/viktor/T7/gravitational-waves-kaggle-20...
1,/media/viktor/T7/gravitational-waves-kaggle-20...,0007285a3,/media/viktor/T7/gravitational-waves-kaggle-20...
2,/media/viktor/T7/gravitational-waves-kaggle-20...,00076c5a6,/media/viktor/T7/gravitational-waves-kaggle-20...
3,/media/viktor/T7/gravitational-waves-kaggle-20...,001349290,/media/viktor/T7/gravitational-waves-kaggle-20...
4,/media/viktor/T7/gravitational-waves-kaggle-20...,001a52e92,/media/viktor/T7/gravitational-waves-kaggle-20...
...,...,...,...
7970,/media/viktor/T7/gravitational-waves-kaggle-20...,ffbce04ef,/media/viktor/T7/gravitational-waves-kaggle-20...
7971,/media/viktor/T7/gravitational-waves-kaggle-20...,ffc2d976b,/media/viktor/T7/gravitational-waves-kaggle-20...
7972,/media/viktor/T7/gravitational-waves-kaggle-20...,ffc905909,/media/viktor/T7/gravitational-waves-kaggle-20...
7973,/media/viktor/T7/gravitational-waves-kaggle-20...,ffe276f3e,/media/viktor/T7/gravitational-waves-kaggle-20...


In [18]:
df = pd.read_csv("train-preprocessed.csv")
# sort by name
df = df.sort_values(by=["name"]).reset_index(drop=True)
df

Unnamed: 0.1,Unnamed: 0,id,target,filename,name,preprocessed_filename
0,0,001121a05,1,/media/viktor/T7/gravitational-waves-kaggle-20...,001121a05,/media/viktor/T7/gravitational-waves-kaggle-20...
1,1,004f23b2d,1,/media/viktor/T7/gravitational-waves-kaggle-20...,004f23b2d,/media/viktor/T7/gravitational-waves-kaggle-20...
2,2,00a6db666,1,/media/viktor/T7/gravitational-waves-kaggle-20...,00a6db666,/media/viktor/T7/gravitational-waves-kaggle-20...
3,3,00f36a6ac,1,/media/viktor/T7/gravitational-waves-kaggle-20...,00f36a6ac,/media/viktor/T7/gravitational-waves-kaggle-20...
4,4,010a387db,1,/media/viktor/T7/gravitational-waves-kaggle-20...,010a387db,/media/viktor/T7/gravitational-waves-kaggle-20...
...,...,...,...,...,...,...
595,595,fe38dbe64,1,/media/viktor/T7/gravitational-waves-kaggle-20...,fe38dbe64,/media/viktor/T7/gravitational-waves-kaggle-20...
596,596,feafd0d16,1,/media/viktor/T7/gravitational-waves-kaggle-20...,feafd0d16,/media/viktor/T7/gravitational-waves-kaggle-20...
597,597,feeca844e,1,/media/viktor/T7/gravitational-waves-kaggle-20...,feeca844e,/media/viktor/T7/gravitational-waves-kaggle-20...
598,598,ff5ad023f,1,/media/viktor/T7/gravitational-waves-kaggle-20...,ff5ad023f,/media/viktor/T7/gravitational-waves-kaggle-20...
