In [80]:
import warnings
from pathlib import Path
from joblib import delayed, Parallel

import librosa
import audioread
import soundfile as sf

import pandas as pd

import IPython
from scipy.io import wavfile
import scipy.signal
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
%matplotlib inline


In [2]:
TRAIN_AUDIO_DIR = Path("../../../input/birdsong-recognition/train_audio_resampled/")
TRAIN_5S_DIR = Path("../../../input/birdsong-recognition/train_audio_5s/")

NUM_THREAD = 8  # for joblib.Parallel

# # read train.csv
train = pd.read_csv("../../../input/birdsong-recognition/train_audio_resampled/train_mod.csv")

# # extract "ebird_code" and  "filename"
train_audio_infos = train[["ebird_code", "resampled_filename"]].values.tolist()

# # make directories for saving denoised audio
TRAIN_5S_DIR.mkdir(parents=True)
for ebird_code in train.ebird_code.unique():
    ebird_dir = TRAIN_5S_DIR / ebird_code
    ebird_dir.mkdir()

In [14]:
# define <ebirdcode: [filename1, filename2, ...]> dictionary
#    -> (excluding 'nocall')

file_dict = {}
for ebird_code, file_name in train_audio_infos:
    if(ebird_code == 'nocall'): continue
    
    if(file_dict.get(ebird_code) is None):
        file_dict[ebird_code] = [file_name]
    else:
        file_dict[ebird_code].append(file_name)

len(file_dict)

264

In [120]:
# define "nocall extractor"
PERIOD = 5 # in second
ISCALLING = 20 # coefficient decides 'iscalling'
NOCALL_DIR = TRAIN_5S_DIR / 'nocall'
NOCALL_SIZE = 60 # nocall sequence length in second
THR = NOCALL_SIZE // PERIOD # for ligal threshold
fn_nocall = []


def extract(ebird_code):
    out_dir = TRAIN_5S_DIR / ebird_code
    calling = np.empty(0) # calling audio data list
    nocalling = np.empty(0) # no calling audio data list
    num_files = len(file_dict[ebird_code])
    
    
    for file_name in tqdm(file_dict[ebird_code]):
        data, sr = sf.read(TRAIN_AUDIO_DIR / ebird_code / file_name)
        seq_in_sec = data.shape[0] // sr # sequence length in second
        n_proc = seq_in_sec // PERIOD # number of extractions
        
        for i in range(n_proc):
            start = i * sr * PERIOD
            end = start + sr*PERIOD
            
            clip = data[start: end] # [0, 5), [5, 10), ...
            mx = np.abs(clip).max()
            mean = np.abs(clip).mean()
            
            if(ISCALLING * mean < mx): # is calling!
                calling = np.r_[calling, clip]
#                 print('1', end='')
            else:
                nocalling = np.r_[nocalling, clip]
#                 print('2', end='')
    
    print(ebird_code, 
          "---calling:", calling.shape[0]//sr, 
          "nocall:", nocalling.shape[0]//sr, "\n")

    ## generate 'calling' files
    n_blocks = calling.shape[0] // sr // PERIOD # number of 'calling' blocks in 5s range
    blocks_perfile = n_blocks // num_files # generate the same num of files
    
    seq_len = blocks_perfile * sr * PERIOD
    for i, fn in enumerate(file_dict[ebird_code]):
        start = seq_len * i
        end = start + seq_len
        
        if(i == num_files-1): data = calling[start: ] # the last file
        else: data = calling[start: end]
        
        sf.write(out_dir / fn, data, sr) # save in the same name
    
    ## generate 'nocall' files
    '''
    randomly choose 3 indices (avoid overlapping)
    clip 1 min sequences from the indices
    save the clip in the global 'nocall' directory
    '''

    n_blocks_nocall = nocalling.shape[0] // sr // PERIOD # number of 'nocall' blocks in 5s range
    if(n_blocks_nocall < 3*(THR+2)): # too small 'nocall' blocks
        if(nocalling.shape[0] == 0): return
        sf.write(NOCALL_DIR / ebird_code / '.wav', nocalling, sr)
        return
    
    indices = (-1, -1, -1)
    def legal(idx):
        return abs(idx[0]-idx[1])>THR and abs(idx[1]-idx[2])>THR and abs(idx[2]-idx[0])>THR
    
    while(not legal(indices)):
        indices = np.random.choice(n_blocks_nocall-13, 3)
    
    for idx in indices:
        start = idx * sr * PERIOD
        end = start + NOCALL_SIZE*sr
        
        data = nocalling[start: end]
        fn = ebird_code + str(idx) + '.wav'
        sf.write(NOCALL_DIR / fn, data, sr)
        fn_nocall.append(fn)


In [119]:
def solve(debug = False):
    for ebird_code in train.ebird_code.unique():
        if(ebird_code == 'nocall'): continue
        extract(ebird_code)

        if(debug): break

solve()

100%|██████████| 100/100 [02:57<00:00,  1.78s/it]


aldfly ---calling: 4270 nocall: 1860 



100%|██████████| 38/38 [00:07<00:00,  5.32it/s]


ameavo ---calling: 565 nocall: 715 



100%|██████████| 44/44 [00:46<00:00,  1.06s/it]


amebit ---calling: 535 nocall: 2310 



 24%|██▍       | 24/100 [00:06<00:21,  3.54it/s]


KeyboardInterrupt: 

In [None]:
columns = ['ebird_code', 'filename', 'resampled_filename']
ebird_code = ['nocall'] * len(fn_nocall)
nocall_df = pd.DataFrame(zip(ebird_code, fn_nocall, fn_nocall), columns=columns)

train = train.append(nocall_df)
train.tail()

In [None]:
train.to_csv(TRAIN_5S_DIR / "train_mod.csv", index=False)