In [3]:
import warnings
from pathlib import Path
from joblib import delayed, Parallel

import librosa
import audioread
import soundfile as sf

import pandas as pd

import IPython
from scipy.io import wavfile
import scipy.signal
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
%matplotlib inline


In [4]:
TRAIN_AUDIO_DIR = Path("../../../input/birdsong-recognition/train_audio_resampled/")
TRAIN_5S_DIR = Path("../../../input/birdsong-recognition/train_audio_5s/")

# # read train.csv
train = pd.read_csv("../../../input/birdsong-recognition/train_audio_resampled/train_mod.csv")

# # extract "ebird_code" and  "filename"
train_audio_infos = train[["ebird_code", "resampled_filename"]].values.tolist()

# # make directories for saving denoised audio
TRAIN_5S_DIR.mkdir(parents=True)
for ebird_code in train.ebird_code.unique():
    ebird_dir = TRAIN_5S_DIR / ebird_code
    ebird_dir.mkdir()

In [5]:
# define <ebirdcode: [filename1, filename2, ...]> dictionary
#    -> (excluding 'nocall')

file_dict = {}
for ebird_code, file_name in train_audio_infos:
    if(ebird_code == 'nocall'): continue
    
    if(file_dict.get(ebird_code) is None):
        file_dict[ebird_code] = [file_name]
    else:
        file_dict[ebird_code].append(file_name)

len(file_dict)

264

In [8]:
# define "nocall extractor"
PERIOD = 5 # in second
#ISCALLING = 20 # coefficient decides 'iscalling'
NOCALL_DIR = TRAIN_5S_DIR / 'nocall'
fn_nocall = []


def brute_force(data, ebird_code, file_name, sr=32000):
    n_proc = data.shape[0] // sr // PERIOD
    
    range_mx = 0
    mx_idx = -1
    for i in range(n_proc):
        start = i * sr * PERIOD
        end = start + sr*PERIOD
        
        sm = abs(data[start: end]).sum()
        if(sm > range_mx):
            range_mx = sm
            mx_idx = start
    
    # save the max range
    sf.write(TRAIN_5S_DIR / ebird_code / file_name, data[mx_idx: mx_idx+sr*PERIOD], sr)

def extract(ebird_code, coefficient, nocall=False):
    out_dir = TRAIN_5S_DIR / ebird_code
    num_files = len(file_dict[ebird_code])
    
    
    for file_name in file_dict[ebird_code]:
        try:
            data, sr = sf.read(TRAIN_AUDIO_DIR / ebird_code / file_name)
            seq_in_sec = data.shape[0] // sr # sequence length in second
            n_proc = seq_in_sec // PERIOD # number of extractions
            
            calling = np.empty(0) # calling audio data list
            nocalling = np.empty(0) # no calling audio data list

            for i in range(n_proc):
                start = i * sr * PERIOD
                end = start + sr*PERIOD

                clip = data[start: end] # [0, 5), [5, 10), ...
                mx = np.abs(clip).max()
                mean = np.abs(clip).mean()

                if(coefficient * mean < mx): # is calling!
                    if(nocall): pass
                    else: calling = np.r_[calling, clip]
                else:
                    if(nocall): nocalling = np.r_[nocalling, clip]
                    else: pass

#             print(ebird_code, 
#                   file_name, 
#                   " --- calling:", calling.shape[0]//sr, 
#                   "nocall:", nocalling.shape[0]//sr, "\n")
        
            if(not nocall): ## generate 'calling' files
                n_blocks = calling.shape[0] // sr // PERIOD # number of 'calling' blocks in 5s range
                
                if(n_blocks < 1): # not found 'callling' section -> search max range and save it
#                     print("(*'~') < not found...")
                    brute_force(data, ebird_code, file_name)

                else:
                    sf.write(out_dir / file_name, calling, sr) # save in the same name
                
                    

            if(nocall): ## generate 'nocall' files

                n_blocks_nocall = nocalling.shape[0] // sr // PERIOD # number of 'nocall' blocks in 5s range
                if(n_blocks_nocall < 12): # at least 60s nocall section
                    pass
                
                else:
                    fn = 'nocall_' + file_name
                    sf.write(NOCALL_DIR / fn, nocalling, sr)
                    fn_nocall.append(fn)
                

    
        except:
            print('skipped', ebird_code, file_name)






In [7]:
# 'calling' data extraction
def solve(debug=False):
    for ebird_code in tqdm(train.ebird_code.unique()):
        if(ebird_code == 'nocall'): continue
        extract(ebird_code, coefficient=20, nocall=False)

        if(debug): break

solve()

 56%|█████▌    | 147/264 [10:04<09:42,  4.98s/it]

skipped lotduc XC195038.wav


100%|██████████| 264/264 [17:11<00:00,  3.91s/it]


In [9]:
# 'nocall' data extraction

NOCALL_DIR.mkdir()

def solve(debug = False):
    for ebird_code in tqdm(train.ebird_code.unique()):
        if(ebird_code == 'nocall'): continue
        extract(ebird_code, coefficient=5, nocall=True)

        if(debug): break

solve()
print(len(fn_nocall))

 56%|█████▌    | 147/264 [01:41<01:24,  1.39it/s]

skipped lotduc XC195038.wav


100%|██████████| 264/264 [02:56<00:00,  1.49it/s]

28





In [11]:
columns = ['ebird_code', 'filename', 'resampled_filename']
ebird_code = ['nocall'] * len(fn_nocall)
nocall_df = pd.DataFrame(zip(ebird_code, fn_nocall, fn_nocall), columns=columns)

train = train.append(nocall_df)
train.tail()

Unnamed: 0,rating,playback_used,ebird_code,channels,date,pitch,duration,filename,speed,species,...,author,primary_label,longitude,length,time,recordist,license,resampled_sampling_rate,resampled_filename,resampled_channels
23,,,nocall,,,,,nocall_XC205945.wav,,,...,,,,,,,,,nocall_XC205945.wav,
24,,,nocall,,,,,nocall_XC313078.wav,,,...,,,,,,,,,nocall_XC313078.wav,
25,,,nocall,,,,,nocall_XC329834.wav,,,...,,,,,,,,,nocall_XC329834.wav,
26,,,nocall,,,,,nocall_XC329836.wav,,,...,,,,,,,,,nocall_XC329836.wav,
27,,,nocall,,,,,nocall_XC329880.wav,,,...,,,,,,,,,nocall_XC329880.wav,


In [12]:
train.to_csv(TRAIN_5S_DIR / "train_mod.csv", index=False)