In [12]:
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
import os,cv2,gc

import soundfile
import librosa
import random
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold

from fastai.vision.all import *
from radam import *

#import torchaudio

In [13]:
PATH_VAL = 'data/train_soundscapes/'
LABELS = 'data/train_metadata.csv'
LABELS_VAL = 'data/train_soundscape_labels.csv'
NUM_WORKERS = 12
SEED = 2021
OUT = 'data'
bs = 160

class config:
    sampling_rate = 32000
    duration = 5.0075#10.015#20.03#5
    sz = 16
    samples = int(sampling_rate*duration)
    top_db = 60 # Noise filtering, default = 60
    
    # Frequencies kept in spectrograms
    fmin = 50
    fmax =  14000

    # Spectrogram parameters
    n_mels = 128#128 # = spec_height
    n_fft = 1024
    hop_length = 313
    
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

os.makedirs(OUT, exist_ok=True)
seed_everything(SEED)

In [20]:
#folds 0-3 are based on files
#folds 4-5 correspond to time <= and > 300
noise_split = [['7954_COR_20190923.ogg','11254_COR_20190904.ogg','54955_SSW_20170617.ogg',
 '51010_SSW_20170513.ogg','18003_COR_20190904.ogg'],
['7019_COR_20190904.ogg','14473_SSW_20170701.ogg','57610_COR_20190904.ogg',
'44957_COR_20190923.ogg','21767_COR_20190904.ogg'],
['31928_COR_20191004.ogg','50878_COR_20191004.ogg','42907_SSW_20170708.ogg',
 '26709_SSW_20170701.ogg','28933_SSW_20170408.ogg'],
['20152_SSW_20170805.ogg','7843_SSW_20170325.ogg','26746_COR_20191004.ogg',
 '10534_SSW_20170429.ogg','2782_SSW_20170701.ogg']]

In [21]:
class BirdDatasetVal(Dataset):
    def __init__(self, fold):
        df = pd.read_csv(LABELS)
        label_map = {p:i for i,p in enumerate(sorted(df.primary_label.unique()))}
        df = pd.read_csv(LABELS_VAL)
        df['file'] = ['_'.join(row.row_id.split('_')[:-1]) for idx,row in df.iterrows()]
        df['birds'] = [[label_map[b] for b in row.birds.split(' ') if b in label_map] for idx,row in df.iterrows()]
        file_map = {'_'.join(f.split('_')[:-1]):f for f in os.listdir(PATH_VAL)}
        df['file'] = df.file.map(file_map)
        
        if fold < len(noise_split): self.df = df[df.file.isin(noise_split[fold])]
        elif fold == 4: self.df = df.loc[df.seconds <= 300].reset_index(drop=True)
        elif fold == 5: self.df = df.loc[df.seconds > 300].reset_index(drop=True)
        else: self.df = df
        self.data = {fname:soundfile.read(os.path.join(PATH_VAL,fname))[0] for fname in list(df.file.unique())}
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        label,fname,end = self.df.iloc[idx][['birds','file','seconds']]
        end *= config.sampling_rate
        length = config.samples
        start = max(0, end - length)

        wave = self.data[fname][start:start+length]
        return wave,label,fname

In [27]:
for fold in [3]:#range(6):
    ds = BirdDatasetVal(fold=fold)
    bg = []
    for x,y,name in ds: 
        if len(y) == 0: bg.append(x)
    bg = np.concatenate(bg)
    np.save(f'data/noise_{fold}', bg)