In [1]:
import warnings
warnings.filterwarnings('ignore')
import os
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.stats import kurtosis
from scipy.stats import skew
from numpy import mean, std

from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import pickle

In [2]:
gtzan_dir = 'gtzan/'

genres = {'metal': 0, 
          'disco': 1, 
          'classical': 2, 
          'hiphop': 3, 
          'jazz': 4, 
          'country': 5, 
          'pop': 6, 
          'blues': 7, 
          'reggae': 8, 
          'rock': 9}

In [3]:
def get_moments_of_features(features):
    result = {}
    for feature, value in features.items():
        result['{}_mean'.format(feature)] = mean(value)
        result['{}_std'.format(feature)] = std(value)
        result['{}_kurtosis'.format(feature)] = kurtosis(value)
        result['{}_skew'.format(feature)] = skew(value)
    return result

In [4]:
def get_moments_of_signal(features, signal):
    features['signal_mean'] = mean(signal)
    features['signal_std'] = std(signal)
    features['signal_kurtosis'] = kurtosis(signal)
    features['signal_skew'] = skew(signal)
    return features

In [5]:
def extract_features(y, 
                     sr, 
                     n_fft = 1024, 
                     hop_length = 512):

    features = {'chroma': None,
                'centr': None, 
                'roloff': None, 
                'flux': None, 
                'rmse': None, 
                'zcr': None,
                'bandwith': None,
                'contrast': None}
    
    features['chroma'] = librosa.feature.chroma_stft(y, sr, n_fft=n_fft, hop_length=hop_length).ravel()
    features['centr'] = librosa.feature.spectral_centroid(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    features['roloff'] = librosa.feature.spectral_rolloff(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    features['zcr'] = librosa.feature.zero_crossing_rate(y, frame_length=n_fft, hop_length=hop_length).ravel()
    features['rmse'] = librosa.feature.rmse(y, frame_length=n_fft, hop_length=hop_length).ravel()
    features['flux'] = librosa.onset.onset_strength(y=y, sr=sr).ravel()
    features['bandwith'] = librosa.feature.spectral_bandwidth(y, sr, n_fft=n_fft, hop_length=hop_length).ravel()
    features['contrast'] = librosa.feature.spectral_contrast(y = y, sr = sr, n_fft=n_fft).ravel()
    
    
    mfcc = librosa.feature.mfcc(y, n_fft = n_fft, hop_length = hop_length, n_mfcc=13)
    
    for idx, v_mfcc in enumerate(mfcc):
        features['mfcc_{}'.format(idx)] = v_mfcc.ravel()
    
    features = get_moments_of_features(features)
    
    features = get_moments_of_signal(features, y)
    
    features['tempo'] = librosa.beat.tempo(y, sr=sr)[0]
    
    return features

In [6]:
def split_songs(X, genre, window = 0.1, overlap = 0.5):
    # Empty lists to save results
    song_parts = []
    parts_genres = []

    # Get song shape
    xshape = X.shape[0]
    chunk = int(xshape*window)
    offset = int(chunk*(1.-overlap))
    
    # Split song and create new by windows
    parts = [X[i:i+chunk] for i in range(0, xshape - chunk + offset, offset)]
    for part in parts:
        song_parts.append(part)
        parts_genres.append(genre)

    return np.array(song_parts), np.array(parts_genres)

In [7]:
def read_and_extract_features(src_dir, genres, song_samples = 660000):    
    # Empty array of dicts with the processed features from all files
    arr_specs = []
    arr_genres = []
    arr_features = []

    # Read files from the folders
    for x,_ in genres.items():
        folder = src_dir + x
        
        for root, subdirs, files in os.walk(folder):
            for file in files:
                # Read the audio file
                file_name = folder + "/" + file
                signal, sr = librosa.load(file_name)
                signal = signal[:song_samples]
                
                # Debug process
                print("Reading file: {}".format(file_name))
                
                # Convert to dataset of spectograms/melspectograms
                signals, y = split_songs(signal, genres[x])
                
                # extract features for every 1/19 part of every song.
                for cut_signal in signals:
                    features = extract_features(cut_signal, sr)
                    features['genre'] = genres[x]
                    arr_features.append(features)
                    
    return arr_features

In [8]:
features = read_and_extract_features(gtzan_dir, genres)
pickle.dump(features, open('cached_music_features.pkl', "wb"))

Reading file: gtzan/metal/metal.00086.wav
Reading file: gtzan/metal/metal.00000.wav
Reading file: gtzan/metal/metal.00059.wav
Reading file: gtzan/metal/metal.00096.wav
Reading file: gtzan/metal/metal.00091.wav
Reading file: gtzan/metal/metal.00034.wav
Reading file: gtzan/metal/metal.00098.wav
Reading file: gtzan/metal/metal.00045.wav
Reading file: gtzan/metal/metal.00033.wav
Reading file: gtzan/metal/metal.00065.wav
Reading file: gtzan/metal/metal.00030.wav
Reading file: gtzan/metal/metal.00007.wav
Reading file: gtzan/metal/metal.00092.wav
Reading file: gtzan/metal/metal.00077.wav
Reading file: gtzan/metal/metal.00097.wav
Reading file: gtzan/metal/metal.00026.wav
Reading file: gtzan/metal/metal.00031.wav
Reading file: gtzan/metal/metal.00043.wav
Reading file: gtzan/metal/metal.00010.wav
Reading file: gtzan/metal/metal.00099.wav
Reading file: gtzan/metal/metal.00044.wav
Reading file: gtzan/metal/metal.00074.wav
Reading file: gtzan/metal/metal.00009.wav
Reading file: gtzan/metal/metal.00

Reading file: gtzan/disco/disco.00005.wav
Reading file: gtzan/disco/disco.00026.wav
Reading file: gtzan/disco/disco.00058.wav
Reading file: gtzan/disco/disco.00000.wav
Reading file: gtzan/classical/classical.00016.wav
Reading file: gtzan/classical/classical.00044.wav
Reading file: gtzan/classical/classical.00064.wav
Reading file: gtzan/classical/classical.00038.wav
Reading file: gtzan/classical/classical.00024.wav
Reading file: gtzan/classical/classical.00046.wav
Reading file: gtzan/classical/classical.00092.wav
Reading file: gtzan/classical/classical.00079.wav
Reading file: gtzan/classical/classical.00042.wav
Reading file: gtzan/classical/classical.00063.wav
Reading file: gtzan/classical/classical.00051.wav
Reading file: gtzan/classical/classical.00025.wav
Reading file: gtzan/classical/classical.00023.wav
Reading file: gtzan/classical/classical.00018.wav
Reading file: gtzan/classical/classical.00013.wav
Reading file: gtzan/classical/classical.00081.wav
Reading file: gtzan/classical/cl

Reading file: gtzan/hiphop/hiphop.00004.wav
Reading file: gtzan/hiphop/hiphop.00052.wav
Reading file: gtzan/hiphop/hiphop.00097.wav
Reading file: gtzan/hiphop/hiphop.00051.wav
Reading file: gtzan/hiphop/hiphop.00008.wav
Reading file: gtzan/hiphop/hiphop.00034.wav
Reading file: gtzan/hiphop/hiphop.00095.wav
Reading file: gtzan/hiphop/hiphop.00074.wav
Reading file: gtzan/hiphop/hiphop.00070.wav
Reading file: gtzan/hiphop/hiphop.00084.wav
Reading file: gtzan/hiphop/hiphop.00073.wav
Reading file: gtzan/hiphop/hiphop.00066.wav
Reading file: gtzan/hiphop/hiphop.00092.wav
Reading file: gtzan/hiphop/hiphop.00050.wav
Reading file: gtzan/hiphop/hiphop.00087.wav
Reading file: gtzan/hiphop/hiphop.00001.wav
Reading file: gtzan/hiphop/hiphop.00056.wav
Reading file: gtzan/hiphop/hiphop.00086.wav
Reading file: gtzan/hiphop/hiphop.00013.wav
Reading file: gtzan/hiphop/hiphop.00049.wav
Reading file: gtzan/hiphop/hiphop.00030.wav
Reading file: gtzan/hiphop/hiphop.00096.wav
Reading file: gtzan/hiphop/hipho

Reading file: gtzan/country/country.00039.wav
Reading file: gtzan/country/country.00084.wav
Reading file: gtzan/country/country.00077.wav
Reading file: gtzan/country/country.00049.wav
Reading file: gtzan/country/country.00068.wav
Reading file: gtzan/country/country.00064.wav
Reading file: gtzan/country/country.00088.wav
Reading file: gtzan/country/country.00033.wav
Reading file: gtzan/country/country.00031.wav
Reading file: gtzan/country/country.00081.wav
Reading file: gtzan/country/country.00032.wav
Reading file: gtzan/country/country.00019.wav
Reading file: gtzan/country/country.00046.wav
Reading file: gtzan/country/country.00058.wav
Reading file: gtzan/country/country.00048.wav
Reading file: gtzan/country/country.00085.wav
Reading file: gtzan/country/country.00034.wav
Reading file: gtzan/country/country.00038.wav
Reading file: gtzan/country/country.00050.wav
Reading file: gtzan/country/country.00097.wav
Reading file: gtzan/country/country.00001.wav
Reading file: gtzan/country/countr

Reading file: gtzan/blues/blues.00028.wav
Reading file: gtzan/blues/blues.00056.wav
Reading file: gtzan/blues/blues.00095.wav
Reading file: gtzan/blues/blues.00092.wav
Reading file: gtzan/blues/blues.00066.wav
Reading file: gtzan/blues/blues.00038.wav
Reading file: gtzan/blues/blues.00085.wav
Reading file: gtzan/blues/blues.00074.wav
Reading file: gtzan/blues/blues.00058.wav
Reading file: gtzan/blues/blues.00010.wav
Reading file: gtzan/blues/blues.00040.wav
Reading file: gtzan/blues/blues.00012.wav
Reading file: gtzan/blues/blues.00026.wav
Reading file: gtzan/blues/blues.00076.wav
Reading file: gtzan/blues/blues.00083.wav
Reading file: gtzan/blues/blues.00089.wav
Reading file: gtzan/blues/blues.00084.wav
Reading file: gtzan/blues/blues.00072.wav
Reading file: gtzan/blues/blues.00034.wav
Reading file: gtzan/blues/blues.00043.wav
Reading file: gtzan/blues/blues.00078.wav
Reading file: gtzan/blues/blues.00075.wav
Reading file: gtzan/blues/blues.00041.wav
Reading file: gtzan/blues/blues.00

Reading file: gtzan/rock/rock.00006.wav
Reading file: gtzan/rock/rock.00070.wav
Reading file: gtzan/rock/rock.00050.wav
Reading file: gtzan/rock/rock.00076.wav
Reading file: gtzan/rock/rock.00097.wav
Reading file: gtzan/rock/rock.00000.wav
Reading file: gtzan/rock/rock.00056.wav
Reading file: gtzan/rock/rock.00042.wav
Reading file: gtzan/rock/rock.00009.wav
Reading file: gtzan/rock/rock.00014.wav
Reading file: gtzan/rock/rock.00053.wav
Reading file: gtzan/rock/rock.00018.wav
Reading file: gtzan/rock/rock.00067.wav
Reading file: gtzan/rock/rock.00058.wav
Reading file: gtzan/rock/rock.00047.wav
Reading file: gtzan/rock/rock.00063.wav
Reading file: gtzan/rock/rock.00038.wav
Reading file: gtzan/rock/rock.00094.wav
Reading file: gtzan/rock/rock.00034.wav
Reading file: gtzan/rock/rock.00003.wav
Reading file: gtzan/rock/rock.00098.wav
Reading file: gtzan/rock/rock.00073.wav
Reading file: gtzan/rock/rock.00054.wav
Reading file: gtzan/rock/rock.00019.wav
Reading file: gtzan/rock/rock.00028.wav
