In [1]:
import warnings
warnings.filterwarnings('ignore')
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

from sklearn.neighbors import KNeighborsClassifier

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.cluster import KMeans

  from numpy.core.umath_tests import inner1d


In [2]:
# Genre dictionary
genres = {'metal': 0, 'disco': 1, 'classical': 2, 'hiphop': 3, 'jazz': 4, 
          'country': 5, 'pop': 6, 'blues': 7, 'reggae': 8, 'rock': 9}

In [3]:
features = pickle.load(open("cached_music_features.pkl", "rb"))

In [4]:
df_features = pd.DataFrame(features)

In [5]:
df_features.head()

Unnamed: 0,bandwith_kurtosis,bandwith_mean,bandwith_skew,bandwith_std,centr_kurtosis,centr_mean,centr_skew,centr_std,chroma_kurtosis,chroma_mean,...,roloff_std,signal_kurtosis,signal_mean,signal_skew,signal_std,tempo,zcr_kurtosis,zcr_mean,zcr_skew,zcr_std
0,-0.736019,2207.978432,0.384195,341.854074,1.396051,2656.998687,1.271128,618.443869,-0.804307,0.586958,...,1098.88432,0.75229,-0.00315,-0.0214,0.089359,129.199219,2.527677,0.173154,1.461315,0.060971
1,-0.411049,2392.311129,-0.142085,303.218685,0.165163,2689.610054,0.992572,703.366405,-1.042674,0.545495,...,1100.310599,1.262312,-0.003201,-0.050187,0.081575,123.046875,0.883735,0.15921,1.186363,0.074393
2,-0.558755,2347.775718,0.460658,283.266363,2.532054,2596.928106,1.565968,663.691883,-0.951767,0.475191,...,1040.116071,1.253679,-0.003217,0.017817,0.085141,123.046875,3.143121,0.149573,1.656892,0.069112
3,0.089714,2284.183064,0.754692,271.201367,5.118727,2639.472902,2.041764,569.197247,-0.826419,0.426117,...,949.001644,0.811596,-0.003261,0.01479,0.091118,123.046875,3.409404,0.163639,1.535117,0.062564
4,0.053184,2240.116552,0.738403,233.250826,0.229051,2420.675268,0.226451,415.01819,-0.856493,0.462298,...,822.153657,1.126058,-0.003208,-0.011495,0.084603,123.046875,-0.100503,0.146954,0.209864,0.048865


In [6]:
X = df_features.drop(['genre'], axis=1).values
y = df_features['genre'].ravel()

Below is splitting data on train and test (70/30) in such a way that every audio track (all of 19 parts of it) will be either only in train or only in test dataset:

In [7]:
scale = StandardScaler()
std_x = scale.fit_transform(X)
X_train, X_test, y_train, y_test = [],[],[],[]
for i in range(0,19000,19 * 100):
    for j in range(0, 19 * 30):
        X_test.append(std_x[i + j])
        y_test.append(y[i + j])
    for j in range(19 * 30, 19 * 100):
        X_train.append(std_x[i + j])
        y_train.append(y[i + j])
        
X_train = np.array(X_train)
X_test= np.array(X_test)
y_train= np.array(y_train)
y_test= np.array(y_test)

In [8]:
X_train.shape

(13300, 89)

Here is the function for prediction by majority voting:

In [9]:
def predict_by_voting(cls, X_train, X_test, y_train, y_test, title):
    
    cls.fit(X_train, y_train)
    y_hat = cls.predict_proba(X_test)
    
    cnt = 0
    for i in range(0,len(X_test),19):
        vec = y_hat[i].copy()
        for j in range(1,19):
            vec += y_hat[i + j]
        if np.argmax(vec) == y_test[i]:
            cnt += 1

    print(title, ": ", cnt / (len(X_test) / 19))

In [10]:
predict_by_voting(KNeighborsClassifier(n_neighbors=5), X_train, X_test, y_train, y_test, "KNN: ")

KNN:  :  0.82


In [12]:
predict_by_voting(KNeighborsClassifier(n_neighbors=10), X_train, X_test, y_train, y_test, "KNN 10:")

KNN 10: :  0.8166666666666667


In [37]:
predict_by_voting(RandomForestClassifier(n_estimators=500, criterion='gini'), X_train, X_test, y_train, y_test, "Random forest: ")

Random forest :  0.7933333333333333


In [11]:
predict_by_voting(SVC(C=2,kernel='rbf', probability=True), X_train, X_test, y_train, y_test, "SVM rbf: ")

SVM rbf:  :  0.87


In [15]:
predict_by_voting(SVC(C=2,kernel='poly', probability=True), X_train, X_test, y_train, y_test, "SVM poly: ")

SVM rbf, C=2 :  0.8


In [16]:
predict_by_voting(SVC(C=2,kernel='linear', probability=True), X_train, X_test, y_train, y_test, "SVM linear: ")

SVM rbf, C=2 :  0.81


In [10]:
import xgboost as xgb

In [12]:
predict_by_voting(xgb.XGBClassifier(learning_rate=0.1, objective='multi:softprob', 
                                   n_estimators=500, sub_sample = 0.8, num_class = len(genres)), X_train, X_test, y_train, y_test, "XGB: ")

XGB:  :  0.8466666666666667


In [12]:
from sklearn.neural_network import MLPClassifier

In [13]:
predict_by_voting(MLPClassifier(max_iter=1000,random_state=2,hidden_layer_sizes=[512,256,256,64]), X_train, X_test, y_train, y_test, "Perceptron: ")

Perceptron:  :  0.87


In [20]:
predict_by_voting(MLPClassifier(max_iter=1000,random_state=2,hidden_layer_sizes=[512,256,64], activation = 'tanh'), X_train, X_test, y_train, y_test, "Perceptron: ")

Perceptron:  :  0.8366666666666667


In [21]:
predict_by_voting(MLPClassifier(max_iter=1000,random_state=2,hidden_layer_sizes=[512,256,256,64], activation = 'relu'), X_train, X_test, y_train, y_test, "Perceptron: ")

Perceptron:  :  0.8666666666666667


In [22]:
predict_by_voting(MLPClassifier(max_iter=5000,random_state=2,hidden_layer_sizes=[512,256,256,64], activation = 'relu'), X_train, X_test, y_train, y_test, "Perceptron: ")

Perceptron:  :  0.8666666666666667
