In [32]:
import librosa, librosa.display
import numpy as np
import pandas as pd
from pathlib import Path
%matplotlib inline
import scipy, sklearn
import os
from tqdm import tqdm
from scipy.stats import kurtosis
from scipy.stats import skew

In [33]:
# Reading signals from .wav files
blues = [librosa.load(p)[0] for p in Path().glob('data_gtzan/genres_original/blues/*.wav')]
classical = [librosa.load(p)[0] for p in Path().glob('data_gtzan/genres_original/classical/*.wav')]
country = [librosa.load(p)[0] for p in Path().glob('data_gtzan/genres_original/country/*.wav')]
disco = [librosa.load(p)[0] for p in Path().glob('data_gtzan/genres_original/disco/*.wav')]
hiphop = [librosa.load(p)[0] for p in Path().glob('data_gtzan/genres_original/hiphop/*.wav')]
jazz = [librosa.load(p)[0] for p in Path().glob('data_gtzan/genres_original/jazz/*.wav')]
metal = [librosa.load(p)[0] for p in Path().glob('data_gtzan/genres_original/metal/*.wav')]
pop = [librosa.load(p)[0] for p in Path().glob('data_gtzan/genres_original/pop/*.wav')]
reggae = [librosa.load(p)[0] for p in Path().glob('data_gtzan/genres_original/reggae/*.wav')]
rock = [librosa.load(p)[0] for p in Path().glob('data_gtzan/genres_original/rock/*.wav')]

In [34]:
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()

In [1]:
#Function for extracting all features
def extract_features(signal, derivative=0, sr=22050, n_fft=2048, hop_length=512):
    #STFT of signal
    S, phase = librosa.magphase(librosa.stft(y=signal, n_fft=n_fft, hop_length=hop_length))
    
    features = {'centroid': librosa.feature.spectral_centroid(S=S, sr=sr).ravel(),
                'bandwidth': librosa.feature.spectral_bandwidth(S=S, sr=sr).ravel(),
                'contrast': librosa.feature.spectral_contrast(S=S, sr=sr).ravel(),
                'flatness': librosa.feature.spectral_flatness(S=S, power=2).ravel(),
                'rolloff': librosa.feature.spectral_rolloff(S=S, sr=sr).ravel(),
                'zcr': librosa.feature.zero_crossing_rate(signal, frame_length=n_fft, hop_length=hop_length).ravel(),
                'rmse': librosa.feature.rms(S=S).ravel(),}
    
    #pre-computed power spectrogram and mfcc
    D = np.abs(S)**2
    melspec = librosa.feature.melspectrogram(S=D, sr=sr, n_fft=n_fft, hop_length=hop_length)
    mfcc = librosa.feature.mfcc(S=librosa.power_to_db(melspec), sr=sr, n_mfcc=13)
    
    for index, value in enumerate(mfcc):
        features['mfcc_{}'.format(index)] = value.ravel()
        
    chroma_cqt = librosa.feature.chroma_cqt(signal, sr=sr, hop_length=hop_length)
    for index,value in enumerate(chroma_cqt):
        features['chroma_cqt_{}'.format(index)] = value.ravel()
    
    #Poly features
    p0 = librosa.feature.poly_features(S=S, order=0)
    p1 = librosa.feature.poly_features(S=S, order=1)
    p2 = librosa.feature.poly_features(S=S, order=2)
    features['poly_features_0_0'] = p0[0].ravel()
    features['poly_features_1_0'] = p1[0].ravel()
    features['poly_features_1_1'] = p1[1].ravel()
    features['poly_features_2_0'] = p2[0].ravel()
    features['poly_features_2_1'] = p2[1].ravel()
    features['poly_features_2_2'] = p2[2].ravel() 
    
    if(derivative != 0):
        for key, value in features.items():
            features[key] = librosa.feature.delta(value, order=derivative)
   
        
    def get_feature_stats(features):
        result = {}
        for k, v in features.items():
            result['{}_mean'.format(k)] = np.mean(v)
            result['{}_std'.format(k)] = np.std(v)
            result['{}_kurtosis'.format(k)] = kurtosis(v)
            result['{}_skew'.format(k)] = skew(v)
            result['{}_median'.format(k)] = np.median(v)
            result['{}_min'.format(k)] = np.min(v)
            result['{}_max'.format(k)] = np.max(v)
            
        return result
    
    return get_feature_stats(features)


In [36]:
#Assiging feature stats to variables
#Features without derivative
features = {'blues_features': ([extract_features(x) for x in tqdm(blues)]),
    'classical_features': ([extract_features(x) for x in tqdm(classical)]),
    'country_features': ([extract_features(x) for x in tqdm(country)]),
    'disco_features': ([extract_features(x) for x in tqdm(disco)]),
    'hiphop_features': ([extract_features(x) for x in tqdm(hiphop)]),           
    'jazz_features': ([extract_features(x) for x in tqdm(jazz)]),
    'metal_features': ([extract_features(x) for x in tqdm(metal)]),
    'pop_features': ([extract_features(x) for x in tqdm(pop)]),
    'reggae_features': ([extract_features(x) for x in tqdm(reggae)]),
    'rock_features': ([extract_features(x) for x in tqdm(rock)]),}


#Features with derivative 1
features_delta1 = {'blues_features': ([extract_features(x, derivative=1) for x in tqdm(blues)]),
    'classical_features': ([extract_features(x, derivative=1) for x in tqdm(classical)]),
    'country_features': ([extract_features(x, derivative=1) for x in tqdm(country)]),
    'disco_features': ([extract_features(x, derivative=1) for x in tqdm(disco)]),
    'hiphop_features': ([extract_features(x, derivative=1) for x in tqdm(hiphop)]),
    'jazz_features': ([extract_features(x, derivative=1) for x in tqdm(jazz)]),
    'metal_features': ([extract_features(x, derivative=1) for x in tqdm(metal)]),
    'pop_features': ([extract_features(x, derivative=1) for x in tqdm(pop)]),
    'reggae_features': ([extract_features(x, derivative=1) for x in tqdm(reggae)]),
    'rock_features': ([extract_features(x, derivative=1) for x in tqdm(rock)]),}

#Featurest with derivative 2
features_delta2 = {'blues_features': ([extract_features(x, derivative=2) for x in tqdm(blues)]),
    'classical_features': ([extract_features(x, derivative=2) for x in tqdm(classical)]),
    'country_features': ([extract_features(x, derivative=2) for x in tqdm(country)]),
    'disco_features': ([extract_features(x, derivative=2) for x in tqdm(disco)]),
    'hiphop_features': ([extract_features(x, derivative=2) for x in tqdm(hiphop)]),
    'jazz_features': ([extract_features(x, derivative=2) for x in tqdm(jazz)]),
    'metal_features': ([extract_features(x, derivative=2) for x in tqdm(metal)]),
    'pop_features': ([extract_features(x, derivative=2) for x in tqdm(pop)]),
    'reggae_features': ([extract_features(x, derivative=2) for x in tqdm(reggae)]),
    'rock_features': ([extract_features(x, derivative=2) for x in tqdm(rock)]),}

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:41<00:00,  1.01s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:53<00:00,  1.13s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:53<00:00,  1.13s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:52<00:00,  1.12s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:52<00:00,  1.13s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [02:20<00:00,  1.41s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [02:17<00:00,  1.37s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [02:15<00:00,  1.36s/it]
100%|███████████████████████████████████

In [45]:
#Concatenating features in dataframes
#Features without derivatives
features_df = pd.DataFrame()
for genre in genres:
    tmp_df = pd.DataFrame.from_dict(features['{}_features'.format(genre)])
    features_df = pd.concat([features_df, tmp_df])    

features_df.to_csv('outputs/features.csv',index=False)

#Derivative 1 
features_df_delta1= pd.DataFrame()
for genre in genres:
    tmp_df = pd.DataFrame.from_dict(features_delta1['{}_features'.format(genre)])
    features_df_delta1 = pd.concat([features_df_delta1, tmp_df])

features_df_delta1 = features_df_delta1.add_suffix('_d1') #Adding suffix to features with delta2
features_df_delta1.to_csv('outputs/features_delta1.csv',index=False)

#Derivative 2
features_df_delta2= pd.DataFrame()
for genre in genres:
    tmp_df = pd.DataFrame.from_dict(features_delta2['{}_features'.format(genre)])
    features_df_delta2 = pd.concat([features_df_delta2, tmp_df])

features_df_delta2 = features_df_delta2.add_suffix('_d2') #Adding suffix to features with delta2
features_df_delta2.to_csv('outputs/features_delta2.csv',index=False)

#turevsiz ve 1. turevli featurelari iceren dataframe
features_wd1 = pd.concat([features_df, features_df_delta1], axis=1)
features_wd1.to_csv('outputs/features_wd1.csv',index=False)
#turevsiz, 1. turevli ve 2.turevli featurelari iceren dataframe
features_wd1n2 = pd.concat([features_wd1, features_df_delta2], axis=1)
features_wd1n2.to_csv('outputs/features_wd1n2.csv',index=False)


In [46]:
# Scaling all values for machine learning algorithm
scaler = sklearn.preprocessing.MinMaxScaler(feature_range=(0,1))

scaled_features = pd.DataFrame(scaler.fit_transform(features_df.values), columns=features_df.columns)
scaled_features_wd1 = pd.DataFrame(scaler.fit_transform(features_wd1.values), columns=features_wd1.columns)
scaled_features_wd1n2 = pd.DataFrame(scaler.fit_transform(features_wd1n2.values), columns=features_wd1n2.columns)

# Adding labels to audio features for clustering
labels_list = [index for index, genre in enumerate(genres) for i in range(len(blues))]
scaled_features['label'] = labels_list
scaled_features_wd1['label'] = labels_list
scaled_features_wd1n2['label'] = labels_list

scaled_features.to_csv('outputs/scaled_features.csv',index=False)
scaled_features_wd1.to_csv('outputs/scaled_features_wd1.csv',index=False)
scaled_features_wd1n2.to_csv('outputs/scaled_features_wd1n2.csv',index=False)