In [1]:
%matplotlib inline
import matplotlib.pylab as plt
import IPython.display as ipd
import numpy as np
import pandas as pd
import scipy
import sklearn
import librosa
import librosa.display
from ast import literal_eval
from joblib import Parallel, delayed
from tqdm import tqdm
import seaborn as sns
import pickle
import os.path

np.random.seed(37)

In [2]:
recording_df = pd.read_csv('recording.csv', converters={'types': literal_eval})
print(recording_df.shape)

(41291, 12)


In [3]:
recording_df.head(5)

Unnamed: 0,id,state,en,sp,ssp,gen,lat,lng,types,mp3,mp3_size,duration
0,408991,michigan,black-capped chickadee,atricapillus,atricapillus,poecile,42.1696,-84.2051,[call],./mp3/408991.mp3,1056878,26.31955
1,408988,michigan,black-capped chickadee,atricapillus,atricapillus,poecile,42.1696,-84.2051,[call],./mp3/408988.mp3,1718318,42.85555
2,407827,new york,black-capped chickadee,atricapillus,,poecile,42.033,-79.4222,[song],./mp3/407827.mp3,281570,17.342125
3,406662,illinois,black-capped chickadee,atricapillus,,poecile,41.326,-89.8926,[song],./mp3/406662.mp3,3878909,96.810275
4,394059,new jersey,black-capped chickadee,atricapillus,,poecile,40.7958,-74.0492,"[alarm call, call, flight call]",./mp3/394059.mp3,154378,7.272


In [4]:
def get_spectral_features(y, sr):
    centroid = librosa.feature.spectral_centroid(y, sr=sr)
    bandwidth = librosa.feature.spectral_bandwidth(y, sr=sr)
    flatness = librosa.feature.spectral_flatness(y)
    rolloff = librosa.feature.spectral_rolloff(y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    
    return (centroid, bandwidth, flatness, rolloff, zcr)

def get_chroma_features(y, sr, n_mfcc=12):
    mfcc = librosa.feature.mfcc(y, sr=sr, n_mfcc=n_mfcc).T
    stft = librosa.feature.chroma_stft(y, sr=sr).T
    cqt = librosa.feature.chroma_cqt(y, sr=sr, n_chroma=n_mfcc).T
    cens = librosa.feature.chroma_cens(y, sr=sr, n_chroma=n_mfcc).T
    poly = librosa.feature.poly_features(y, sr=sr, order=n_mfcc - 1).T
    
    scaler = sklearn.preprocessing.StandardScaler()
    mfcc = scaler.fit_transform(mfcc)
    
    return np.hstack((mfcc, stft, cqt, cens, poly))

def pickle_data(fname, data):
    pickle.dump(data, open(fname, 'wb'))
    
def save_features(idx, fname, duration=60.0):
    result = 1
    sname = './features/spectral/{}.p'.format(idx)
    cname = './features/chroma/{}.p'.format(idx)
    
    if os.path.exists(sname) is True and os.path.exists(cname) is True:
        return result
    
    try:
        y, sr = librosa.load(fname, duration=duration)

        if os.path.exists(sname) is False:
            try:
                spectral_features = get_spectral_features(y, sr)
                pickle_data(sname, spectral_features)
            except:
                result = -1

        if os.path.exists(cname) is False:
            try:
                chroma_features = get_chroma_features(y, sr)
                pickle_data(cname, chroma_features)
            except:
                result = -2
    except:
        result = -3
    
    return result

In [5]:
results = Parallel(n_jobs=-1, verbose=5)(delayed(save_features)(r['id'], r['mp3']) for i, r in recording_df.iterrows())

[Parallel(n_jobs=-1)]: Done 2008 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 8848 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done 18424 tasks      | elapsed:    3.8s
[Parallel(n_jobs=-1)]: Done 30736 tasks      | elapsed:    6.1s
[Parallel(n_jobs=-1)]: Done 41291 out of 41291 | elapsed:   10.1s finished


In [6]:
print('{} files processed'.format(np.sum(results)))

41162 files processed
