## Data Preparation & Feature Extractions

In this notebook, I extract key features, create a dataframe, then export as csv for modeling purposes. There is a Python module called Librosa which is suitable for music/audio/sound data analysis.

In [5]:
# Imports

import numpy as np
import pandas as pd
import librosa
import librosa.display
from librosa import feature
import IPython.display as ipd
from glob import glob
from sklearn.preprocessing import scale
import scipy

In [6]:
## Function that loops through each genre folder and then compiles all the audio file names

def compile_audio_files(genre_list):
    
    compiled_list = []
    
    for genre in genre_list:
        audio_data_dir = f'./genres/{genre}/'
        audio_files = glob(audio_data_dir + '*.wav')
        compiled_list += audio_files
        
    return compiled_list

In [7]:
# Loop through audio files in each genre and add the audio file name to the list

genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

final_list = compile_audio_files(genres)
final_list[:20] # Showing the first 20 to save space

['./genres/blues/blues.00093.wav',
 './genres/blues/blues.00087.wav',
 './genres/blues/blues.00050.wav',
 './genres/blues/blues.00044.wav',
 './genres/blues/blues.00078.wav',
 './genres/blues/blues.00079.wav',
 './genres/blues/blues.00045.wav',
 './genres/blues/blues.00051.wav',
 './genres/blues/blues.00086.wav',
 './genres/blues/blues.00092.wav',
 './genres/blues/blues.00084.wav',
 './genres/blues/blues.00090.wav',
 './genres/blues/blues.00047.wav',
 './genres/blues/blues.00053.wav',
 './genres/blues/blues.00052.wav',
 './genres/blues/blues.00046.wav',
 './genres/blues/blues.00091.wav',
 './genres/blues/blues.00085.wav',
 './genres/blues/blues.00081.wav',
 './genres/blues/blues.00095.wav']

In [9]:
# Function that loops through each audio file and then repeats the following process

# Loads the audio file
# Separates harmonic and percussive signals
# Extract Tempo, Chroma Enegery Normalized (CENS), Mel-frequency cepstral coefficients (MFCCs), Spectral Centroid, Contrast, Rolloff, Zero Crossing Rate
# Create a dataframe to house all features

def feature_extraction(files):
    
    tempo_final = []
    chroma_mean_0 = []
    chroma_mean_1 = []
    chroma_mean_2 = []
    chroma_mean_3 = []
    chroma_mean_4 = []
    chroma_mean_5 = []
    chroma_mean_6 = []
    chroma_mean_7 = []
    chroma_mean_8 = []
    chroma_mean_9 = []
    chroma_mean_10 = []
    chroma_mean_11 = []
    chroma_std_0 = []
    chroma_std_1 = []
    chroma_std_2 = []
    chroma_std_3 = []
    chroma_std_4 = []
    chroma_std_5 = []
    chroma_std_6 = []
    chroma_std_7 = []
    chroma_std_8 = []
    chroma_std_9 = []
    chroma_std_10 = []
    chroma_std_11 = []
    mfccs_mean_0 = []
    mfccs_mean_1 = []
    mfccs_mean_2 = []
    mfccs_mean_3 = []
    mfccs_mean_4 = []
    mfccs_mean_5 = []
    mfccs_mean_6 = []
    mfccs_mean_7 = []
    mfccs_mean_8 = []
    mfccs_mean_9 = []
    mfccs_mean_10 = []
    mfccs_mean_11 = []
    mfccs_mean_12 = []
    mfccs_std_0 = []
    mfccs_std_1 = []
    mfccs_std_2 = []
    mfccs_std_3 = []
    mfccs_std_4 = []
    mfccs_std_5 = []
    mfccs_std_6 = []
    mfccs_std_7 = []
    mfccs_std_8 = []
    mfccs_std_9 = []
    mfccs_std_10 = []
    mfccs_std_11 = []
    mfccs_std_12 = []
    s_centroid_mean = []
    s_centroid_std = []
    s_centroid_skew = []
    s_contrast_mean = []
    s_contrast_std = []
    s_rolloff_mean = []
    s_rolloff_std = []
    s_rolloff_skew = []
    z_cr_mean = []
    z_cr_std = []
    z_cr_skew = []
    file_name = []
    
    
    # Load the audio
    for file in files:
        # y is signal (1D numpy array), sr is the sample rate
        y, sr = librosa.load(file)
        
        # Separate harmonic and percussive signals
        y_harmonic, y_percussive = librosa.effects.hpss(y)
        
        # Beat Extraction
        tempo, beat_frames = librosa.beat.beat_track(y_harmonic, sr)
        tempo_final.append(tempo)
        
        # Chroma Energy Normalized (CENS)
        chroma = librosa.feature.chroma_cens(y_harmonic, sr)
        chroma_mean_0.append(np.mean(chroma[0]))
        chroma_mean_1.append(np.mean(chroma[1]))
        chroma_mean_2.append(np.mean(chroma[2]))
        chroma_mean_3.append(np.mean(chroma[3]))
        chroma_mean_4.append(np.mean(chroma[4]))
        chroma_mean_5.append(np.mean(chroma[5]))
        chroma_mean_6.append(np.mean(chroma[6]))
        chroma_mean_7.append(np.mean(chroma[7]))
        chroma_mean_8.append(np.mean(chroma[8]))
        chroma_mean_9.append(np.mean(chroma[9]))
        chroma_mean_10.append(np.mean(chroma[10]))
        chroma_mean_11.append(np.mean(chroma[11]))
        
        chroma_std_0.append(np.std(chroma[0]))
        chroma_std_1.append(np.std(chroma[1]))
        chroma_std_2.append(np.std(chroma[2]))
        chroma_std_3.append(np.std(chroma[3]))
        chroma_std_4.append(np.std(chroma[4]))
        chroma_std_5.append(np.std(chroma[5]))
        chroma_std_6.append(np.std(chroma[6]))
        chroma_std_7.append(np.std(chroma[7]))
        chroma_std_8.append(np.std(chroma[8]))
        chroma_std_9.append(np.std(chroma[9]))
        chroma_std_10.append(np.std(chroma[10]))
        chroma_std_11.append(np.std(chroma[11]))
        
        # Mel-frequency cepstral coefficients (MFCCs)
        mfccs = librosa.feature.mfcc(y_harmonic, sr, n_mfcc=13)
        mfccs_mean_0.append(np.mean(mfccs[0]))
        mfccs_mean_1.append(np.mean(mfccs[1]))
        mfccs_mean_2.append(np.mean(mfccs[2]))
        mfccs_mean_3.append(np.mean(mfccs[3]))
        mfccs_mean_4.append(np.mean(mfccs[4]))
        mfccs_mean_5.append(np.mean(mfccs[5]))
        mfccs_mean_6.append(np.mean(mfccs[6]))
        mfccs_mean_7.append(np.mean(mfccs[7]))
        mfccs_mean_8.append(np.mean(mfccs[8]))
        mfccs_mean_9.append(np.mean(mfccs[9]))
        mfccs_mean_10.append(np.mean(mfccs[10]))
        mfccs_mean_11.append(np.mean(mfccs[11]))
        mfccs_mean_12.append(np.mean(mfccs[12]))
    
        mfccs_std_0.append(np.std(mfccs[0]))
        mfccs_std_1.append(np.std(mfccs[1]))
        mfccs_std_2.append(np.std(mfccs[2]))
        mfccs_std_3.append(np.std(mfccs[3]))
        mfccs_std_4.append(np.std(mfccs[4]))
        mfccs_std_5.append(np.std(mfccs[5]))
        mfccs_std_6.append(np.std(mfccs[6]))
        mfccs_std_7.append(np.std(mfccs[7]))
        mfccs_std_8.append(np.std(mfccs[8]))
        mfccs_std_9.append(np.std(mfccs[9]))
        mfccs_std_10.append(np.std(mfccs[10]))
        mfccs_std_11.append(np.std(mfccs[11]))
        mfccs_std_12.append(np.std(mfccs[12]))

        # Spectral Centroid
        s_centroid = librosa.feature.spectral_centroid(y, sr)
        s_centroid_mean.append(np.mean(s_centroid))
        s_centroid_std.append(np.std(s_centroid))
        s_centroid_skew.append(scipy.stats.skew(s_centroid, axis=1)[0])
        
        # Spectral Contrast
        s_contrast = librosa.feature.spectral_contrast(y_harmonic, sr)
        s_contrast_mean.append(np.mean(s_contrast))
        s_contrast_std.append(np.std(s_contrast))
        
        # Spectral Rolloff
        s_rolloff = librosa.feature.spectral_rolloff(y, sr)
        s_rolloff_mean.append(np.mean(s_rolloff))
        s_rolloff_std.append(np.std(s_rolloff))
        s_rolloff_skew.append(scipy.stats.skew(s_rolloff, axis=1)[0])
        
        # Zero Crossing Rate
        z_cr = librosa.feature.zero_crossing_rate(y_harmonic)
        z_cr_mean.append(np.mean(z_cr))
        z_cr_std.append(np.std(z_cr))
        z_cr_skew.append(scipy.stats.skew(z_cr, axis=1)[0])
        
        # Audio file
        file_name.append(file)
    
    
    feature_dict = {
        'tempo' : tempo_final,
        'chroma_mean_0' : chroma_mean_0,
        'chroma_mean_1' : chroma_mean_1,
        'chroma_mean_2' : chroma_mean_2,
        'chroma_mean_3' : chroma_mean_3,
        'chroma_mean_4' : chroma_mean_4,
        'chroma_mean_5' : chroma_mean_5,
        'chroma_mean_6' : chroma_mean_6,
        'chroma_mean_7' : chroma_mean_7,
        'chroma_mean_8' : chroma_mean_8,
        'chroma_mean_9' : chroma_mean_9,
        'chroma_mean_10' : chroma_mean_10,
        'chroma_mean_11' : chroma_mean_11,
        'chroma_std_0' : chroma_std_0,
        'chroma_std_1' : chroma_std_1,
        'chroma_std_2' : chroma_std_2,
        'chroma_std_3' : chroma_std_3,
        'chroma_std_4' : chroma_std_4,
        'chroma_std_5' : chroma_std_5,
        'chroma_std_6' : chroma_std_6,
        'chroma_std_7' : chroma_std_7,
        'chroma_std_8' : chroma_std_8,
        'chroma_std_9' : chroma_std_9,
        'chroma_std_10' : chroma_std_10,
        'chroma_std_11' : chroma_std_11,
        'mfccs_mean_0' : mfccs_mean_0,
        'mfccs_mean_1' : mfccs_mean_1,
        'mfccs_mean_2' : mfccs_mean_2,
        'mfccs_mean_3' : mfccs_mean_3,
        'mfccs_mean_4' : mfccs_mean_4,
        'mfccs_mean_5' : mfccs_mean_5,
        'mfccs_mean_6' : mfccs_mean_6,
        'mfccs_mean_7' : mfccs_mean_7,
        'mfccs_mean_8' : mfccs_mean_8,
        'mfccs_mean_9' : mfccs_mean_9,
        'mfccs_mean_10' : mfccs_mean_10,
        'mfccs_mean_11' : mfccs_mean_11,
        'mfccs_mean_12' : mfccs_mean_12,
        'mfccs_std_0' : mfccs_std_0,
        'mfccs_std_1' : mfccs_std_1,
        'mfccs_std_2' : mfccs_std_2,
        'mfccs_std_3' : mfccs_std_3,
        'mfccs_std_4' : mfccs_std_4,
        'mfccs_std_5' : mfccs_std_5,
        'mfccs_std_6' : mfccs_std_6,
        'mfccs_std_7' : mfccs_std_7,
        'mfccs_std_8' : mfccs_std_8,
        'mfccs_std_9' : mfccs_std_9,
        'mfccs_std_10' : mfccs_std_10,
        'mfccs_std_11' : mfccs_std_11,
        'mfccs_std_12' : mfccs_std_12,
        's_centroid_mean' : s_centroid_mean,
        's_centroid_std' : s_centroid_std,
        's_centroid_skew' : s_centroid_skew,
        's_contrast_mean' : s_contrast_mean,
        's_contrast_std' : s_contrast_std,
        's_rolloff_mean' : s_rolloff_mean,
        's_rolloff_std' : s_rolloff_std,
        's_rolloff_skew' : s_rolloff_skew,
        'z_cr_mean' : z_cr_mean,
        'z_cr_std' : z_cr_std,
        'z_cr_skew' : z_cr_skew,
        'file_name' : file_name
        }
        
    return pd.DataFrame(feature_dict)

In [None]:
# Dataframe 

final_df = feature_extraction(final_list)

In [None]:
# Function that labels genres

def genre_name(file):
    if 'blues' in file:
        return 'blues'
    elif 'classical' in file:
        return 'classical'
    elif 'country' in file:
        return 'country'
    elif 'disco' in file:
        return 'disco'
    elif 'hiphop' in file:
        return 'hiphop'
    elif 'jazz' in file:
        return 'jazz'
    elif 'metal' in file:
        return 'metal'
    elif 'pop' in file:
        return 'pop'
    elif 'reggae' in file:
        return 'reggae'
    elif 'rock' in file:
        return 'rock'

In [None]:
# Assign genre to each row

final_df['genre'] = final_df['file_name'].apply(genre_name)

In [None]:
# Dictionary that categorizes genres in numbers.
# Not ordinal values. Simply for classification purposes.

genre_num_dict = {
    'blues' : 0,
    'classical' : 1,
    'country' : 2,
    'disco' : 3,
    'hiphop' : 4,
    'jazz' : 5,
    'metal' : 6,
    'pop' : 7,
    'reggae' : 8,
    'rock' : 9
}

In [None]:
# Assign numbers to genres (no specific order, only for classification purpose)

final_df['genre_num'] = final_df['genre'].map(genre_num_dict)

In [None]:
# Saving the dataframe as csv

final_df.to_csv('./data/final_data.csv', index=False)