In [218]:
import pandas as pd 
import numpy as np

In [219]:
df = pd.read_csv('Data/features_30_sec.csv')
df.head()

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.wav,661794,0.350088,0.088757,0.130228,0.002827,1784.16585,129774.064525,2002.44906,85882.761315,...,52.42091,-1.690215,36.524071,-0.408979,41.597103,-2.303523,55.062923,1.221291,46.936035,blues
1,blues.00001.wav,661794,0.340914,0.09498,0.095948,0.002373,1530.176679,375850.073649,2039.036516,213843.755497,...,55.356403,-0.731125,60.314529,0.295073,48.120598,-0.283518,51.10619,0.531217,45.786282,blues
2,blues.00002.wav,661794,0.363637,0.085275,0.17557,0.002746,1552.811865,156467.643368,1747.702312,76254.192257,...,40.598766,-7.729093,47.639427,-1.816407,52.382141,-3.43972,46.63966,-2.231258,30.573025,blues
3,blues.00003.wav,661794,0.404785,0.093999,0.141093,0.006346,1070.106615,184355.942417,1596.412872,166441.494769,...,44.427753,-3.319597,50.206673,0.636965,37.31913,-0.619121,37.259739,-3.407448,31.949339,blues
4,blues.00004.wav,661794,0.308526,0.087841,0.091529,0.002303,1835.004266,343399.939274,1748.172116,88445.209036,...,86.099236,-5.454034,75.269707,-0.916874,53.613918,-4.404827,62.910812,-11.703234,55.19516,blues


### Preprocessing

In [220]:
df['label'].unique()

array(['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz',
       'metal', 'pop', 'reggae', 'rock'], dtype=object)

In [221]:
# creating a list of labels
df['label']=df['label'].astype('category')
# creating a list of index
df['class_label'] = df['label'].cat.codes

In [241]:
# creating a json type object dictionary to store labels
genre_dict = dict(zip(df.class_label.unique(), df.label.unique()))   
print(genre_dict[0])

blues


In [223]:
df['label'].unique()

['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
Categories (10, object): ['blues', 'classical', 'country', 'disco', ..., 'metal', 'pop', 'reggae', 'rock']

In [224]:
# removed the output values from the list
cols = list(df.columns)
cols.remove('label')

### Splitting data for training and testing

In [225]:
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
X = df.iloc[:,1:28]
y = df['label']
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=3)

### Min-max Normalization

In [226]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

### Feature importance using Random Forest

In [227]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(random_state = 0, n_jobs = -1).fit(X_train_scaled,y_train)
importances = clf.feature_importances_
#[::x] selects every value at x distance apart
## argsort returns the sorted indexes of the array
indices =  np.argsort(importances)[::-1]
# list of labels of the features
features = [X.columns.values[i] for i in indices]
plt.figure()
plt.title("Feature Importance")
plt.bar(range(X.shape[1]),importances[indices])
plt.xticks(range(X.shape[1]),features,rotation=90)
plt.show()

<IPython.core.display.Javascript object>

### Feature importance using Decision Tree

In [228]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(random_state=0).fit(X_train_scaled, y_train)
importances = clf.feature_importances_
indices = np.argsort(importances)[::-1]
names = [X.columns.values[i] for i in indices]
plt.figure()
plt.title("Feature Importance")
plt.bar(range(X.shape[1]), importances[indices])
plt.xticks(range(X.shape[1]), names, rotation=90)
plt.show()

<IPython.core.display.Javascript object>

### Fetching file metadata for prediction

In [230]:
test_df = pd.read_csv('Data/test set.csv')
test_df.head(28)

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,Automatic_Teller(Metal).wav,661794,0.472276,0.068043,0.328868,0.000742,2436.997961,74557.06,2322.447571,32898.95714,...,22.453011,-1.932485,23.373866,-2.266622,21.111026,-6.016707,21.485832,0.404295,20.331105,metal
1,A_Night_In_Tunisia.wav,661794,0.389212,0.085853,0.198375,0.006135,1672.625585,404920.9,1741.816412,172258.6886,...,91.261609,2.133521,122.966744,0.904816,56.161487,-4.106335,44.813595,-4.778181,68.486442,jazz
2,A_Thousand_Years(Pop).wav,661794,0.305757,0.080404,0.105666,0.000832,2077.288883,427163.1,2456.309073,254105.2405,...,48.0328,-8.341142,48.896518,-0.506528,35.377095,-10.093406,46.958441,-2.527459,47.267469,pop
3,Beethoven's_5th_Symphony(Classical).wav,661794,0.2644,0.085957,0.028906,0.000659,1319.89305,427200.6,1568.05611,62944.0999,...,60.257497,-0.840738,60.501465,2.410644,64.40769,2.020258,71.69949,3.961606,82.107269,classical
4,Beethoven_Moonlight_Sonata(Classical).wav,661794,0.183556,0.077236,0.018925,5.6e-05,666.156117,7302.777,1124.088135,21718.22926,...,81.123768,1.752388,81.223624,3.056346,84.86045,1.632566,125.267001,3.197324,155.554954,classical
5,Blues_In_My_Bottle(Blues).wav,661794,0.342137,0.086963,0.133153,0.002487,816.055835,317066.8,1264.146562,252182.6109,...,42.406577,2.979003,51.574792,2.352099,68.332593,-2.847933,57.514106,1.322084,53.45997,blues
6,Bullet_For_My_Valentine(Metal).wav,661794,0.502803,0.071302,0.220305,0.002228,2913.87258,155971.3,2565.699134,68005.94143,...,30.527641,-5.685763,24.790983,-0.624401,30.146755,-4.189709,30.667439,1.526623,29.908243,metal
7,CheapThrills.wav,661794,0.432287,0.081026,0.303281,0.003242,2827.887482,289644.5,2815.635015,87400.64549,...,69.932825,1.7815,91.557152,12.323173,102.643571,4.020115,70.967798,9.92791,99.97335,pop
8,Daylight.wav,661794,0.361848,0.091339,0.246549,0.009677,2166.914733,400633.3,2466.575724,140884.6618,...,37.051616,-2.529102,57.85505,9.978457,54.459723,3.008523,56.395135,6.480777,69.093571,pop
9,Franz_Liszt_Hungarian_Rhapsody_no2(Classical).wav,661794,0.255456,0.083324,0.051834,0.001137,989.289619,36495.01,1161.351364,63401.8499,...,76.574307,-4.700067,57.597026,-3.044525,65.671917,-0.586001,75.261912,6.225117,118.340085,classical


### K-nearest neighbours

In [260]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 13)
knn.fit(X_train_scaled,y_train)
knn.score(X_test_scaled,y_test)

0.64

### Prediction using KNN

In [253]:
d1 = test_df.iloc[0,:28]
d1 = d1.drop(['filename'])
data1 = scaler.transform([d1])
genre_prediction = knn.predict(data1)
genre_prediction.score()
genre_prediction[0]


'metal'

### Accuracy of  SVM

In [254]:
from sklearn.svm import SVC
clf = SVC(kernel = 'linear', C=10).fit(X_train_scaled, y_train)
clf.score(X_test_scaled, y_test)

0.624

### Prediction using SVM

In [256]:
d1 = test_df.iloc[0,:28]
d1 = d1.drop(['filename'])
data1 = scaler.transform([d1])
genre_prediction = clf.predict(data1)
genre_prediction[0]

'disco'