### Imports

In [1]:
import librosa  # need pip install librosa
import time
import pickle
import numpy as np
from random import random, shuffle
import matplotlib.pyplot as plt
from pydub import AudioSegment
import os
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, cohen_kappa_score
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.feature_selection import SelectKBest, SelectPercentile, f_classif

### Load data

In [2]:
# Adjust to user
audio_path = '../Train_Segments/'

In [3]:
#List of all songs in the audio directory, without distinction between chorus or verse 
song_names = list(set([f.split('_')[0] for f in os.listdir(audio_path) if f.endswith('.mp3')]))
print(song_names[:5])
#Load audio segments
start = time.time()
print('Loading Segments...')
chorusses = [librosa.core.load(audio_path+name+'_chorus.mp3') for name in song_names]
print('Chorusses Loaded')
verses = [librosa.core.load(audio_path+name+'_verse.mp3') for name in song_names]
t = time.time()-start
print('Verses Loaded\nAll {} songs loaded ({:02.0f}:{:02.0f})'.format(len(song_names),t//60,t%60))

assert len(verses) == len(chorusses) #we need the same number of chorusses as verses

['ItsTooLate', 'Help', 'YouReallyGotAHoldOnMe', 'ImGoingSlightlyMad', 'BabysInBlack']
Loading Segments...
Chorusses Loaded
Verses Loaded
All 170 songs loaded (00:17)


### Feature extraction functions

In [4]:
def extract_mfcc(y, sr):
    mfcc_mat = librosa.feature.mfcc(y, sr, n_mfcc=13)
    mfcc_vec = mfcc_mat.mean(axis=1)
    return mfcc_vec

In [5]:
def extract_chroma(y, sr):
    chroma_mat = librosa.feature.chroma_stft(y, sr)
    return chroma_mat.mean(axis=1)

In [6]:
def extract_spectrogram(y, sr):
    spectro_mat = librosa.feature.melspectrogram(y, sr)
    return spectro_mat.mean(axis=1)

In [7]:
def extract_centroid(y, sr):
    centroid_all = librosa.feature.spectral_centroid(y, sr)
    return centroid_all.mean(axis=1)

In [8]:
def extract_tempo(y, sr):
    return librosa.beat.tempo(y, sr)

In [9]:
def extract_contrast(y, sr):
    contrast = librosa.feature.spectral_contrast(y, sr)
    return contrast.mean(axis=1)

In [10]:
def extract_flatness(y, sr):
    flatness = librosa.feature.spectral_flatness(y, hop_length=65536)
    return np.ravel(flatness)

In [11]:
def extract_rolloff(y, sr):
    rolloff = librosa.feature.spectral_rolloff(y, sr)
    return rolloff.mean(axis=1)

In [12]:
def extract_poly_feats(y, sr):
    poly = librosa.feature.poly_features(y, sr, order=3)
    return poly.mean(axis=1)

### Combining feature extractors and group chorus and verse features

In [13]:
def extract_features(songs):    
    '''
    Extract all features for each song in songs
    '''
    feature_names = []
    
    mfcc_list = [extract_mfcc(y, sr) for y, sr in songs]
    feature_names += ['mfcc_{}'.format(i) for i in range(1,np.shape(mfcc_list)[-1]+1)]
    
    chroma_list = [extract_chroma(y, sr) for y, sr in songs]
    feature_names += ['chroma_{}'.format(i) for i in range(1,np.shape(chroma_list)[-1]+1)]

    spectro_list = [extract_spectrogram(y, sr) for y, sr in songs]
    feature_names += ['spectogram_{}'.format(i) for i in range(1,np.shape(spectro_list)[-1]+1)]
    
    centroid_list = [extract_centroid(y, sr) for y, sr in songs]
    feature_names += ['centroid']
    
    tempo_list = [extract_tempo(y, sr) for y, sr in songs]
    feature_names += ['tempo']
    
    contrast_list = [extract_contrast(y, sr) for y, sr in songs]
    feature_names += ['contrast_{}'.format(i) for i in range(1,np.shape(contrast_list)[-1]+1)]
    
    rolloff_list = [extract_rolloff(y, sr) for y, sr in songs]
    feature_names += ['rolloff']
    
    poly_list = [extract_poly_feats(y, sr) for y, sr in songs]
    feature_names += ['polynomial_coef_{}'.format(i) for i in range(1,np.shape(poly_list)[-1]+1)]
    
    len_list = [[len(y)] for y,_ in songs]
    feature_names += ['segment length']
    
    
    features = np.concatenate((mfcc_list, 
                               chroma_list,
                               spectro_list,
                               centroid_list,
                               tempo_list,
                               contrast_list,
                               rolloff_list,
                               poly_list,
                               len_list,
                              ), axis=1)
    
    return features, feature_names

##### Extraction

In [14]:
start = time.time()

print('Extracting chorus features...')
c_features, feature_names = extract_features(chorusses)

print('Extracting verse features...')
v_features,_ = extract_features(verses)

t = int(time.time()-start)
print('Done! ({:02.0f}:{:02.0f})'.format(t//60,t%60))

with open('../chorus_features.p', 'wb') as f:
    pickle.dump(c_features, f)
with open('../verse_features.p', 'wb') as f:
    pickle.dump(v_features, f)
with open('../feature_names.p', 'wb') as f:
    pickle.dump(feature_names, f)
with open('../song_names.p', 'wb') as f:
    pickle.dump(song_names, f)

Extracting chorus features...
Extracting verse features...
Done! (02:02)
