In [1]:
import librosa
import os
import time
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score

# Load data from numpy file
X =  np.load('data/features.npy')
y =  np.load('data/label.npy').ravel()

In [18]:
from sklearn.svm import SVC
from sklearn.svm import LinearSVC


class SVMClassifier():
    
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def classify(self, tuned=False, baseline=False):
        if (tuned):
            parameters = {'kernel':('linear','rbf'), 'C':[1,10]}
            svc = SVC()
            model = GridSearchCV(svc, parameters)
        else:
            model = LinearSVC(C=10, loss='squared_hinge', penalty='l2', tol=0.00001)
        model.fit(X,y)
        score = cross_val_score(model, X, y, cv=5)
        return np.mean(score)

In [29]:
# SVM
print("Support Vector Machine:")
start=time.time()
model = SVMClassifier(X, y)
SVM_score=model.classify()
end = time.time()
SVM_time = end - start
print("Accuracy: ", SVM_score * 100,"%")
print("Time: ", round(SVM_time,2), " seconds.")

# SVM Tuned
print("\nSupport Vector Machine tuned:")
start=time.time()
model = SVMClassifier(X, y)
SVM_score_tuned=model.classify(tuned=True)
end = time.time()
SVM_time_tuned = end - start
print("Accuracy: ", SVM_score_tuned * 100,"%")
print("Time: ", round(SVM_time_tuned,2), " seconds.")

Support Vector Machine:
Accuracy:  57.8 %
Time:  9.07  seconds.

Support Vector Machine Tuned:
Accuracy:  65.1 %
Time:  22.07  seconds.


In [5]:
from sklearn.neighbors import KNeighborsClassifier


class KNNClassifier:
    
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def classify(self, tuned=False):
        if (tuned):
            metrics       = ['minkowski','euclidean','manhattan'] 
            weights       = ['uniform','distance']
            numNeighbors  = np.arange(5,10)
            parameters    = dict(metric=metrics,weights=weights,n_neighbors=numNeighbors)
            knn = KNeighborsClassifier()
            model = GridSearchCV(knn,parameters)
        else:
            model = KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=30, p=2,
                                   metric='minkowski', metric_params=None, n_jobs=1)
        model.fit(X, y)
        score = cross_val_score(model, X, y, cv=5)
        return np.mean(score)

In [6]:
# KNN
print("K-Nearest Neighbor:")
start=time.time()
model = KNNClassifier(X, y)
KNN_score=model.classify()
end = time.time()
KNN_time = end - start
print("Accuracy: ", KNN_score * 100,"%")
print("Time: ", round(KNN_time,2), " seconds.")

# KNN tuned
print("\nK-Nearest Neighbor tuned:")
start=time.time()
model = KNNClassifier(X, y)
KNN_score_tuned=model.classify(tuned=True)
end = time.time()
KNN_time_tuned = end - start
print("Accuracy: ", KNN_score_tuned * 100,"%")
print("Time: ", round(KNN_time_tuned,2), " seconds.")

K-Nearest Neighbor:
Accuracy:  56.0 %
Time:  0.19  seconds.

K-Nearest Neighbor tuned:
Accuracy:  63.5 %
Time:  72.02  seconds.


In [11]:
from sklearn.tree import DecisionTreeClassifier

class DecisionTree:
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def classify(self, tuned=False):
        if (tuned):
            tree = DecisionTreeClassifier()
            parameters = {'criterion':['gini','entropy'],
                          'max_depth':[4,5,6,7,8,9,10,11,12,15,20,30,40,50,70,90,120,150]}
            model = GridSearchCV(tree, parameters)
        else:
            model = DecisionTreeClassifier(criterion='entropy', max_depth=6)
        model.fit(X,y)
        score = cross_val_score(model, X, y, cv=5)
        return np.mean(score)

In [13]:
# Decision Tree
print("Decision Tree:")
start=time.time()
model = DecisionTree(X, y)
Tree_score=model.classify()
end = time.time()
Tree_time = end - start
print("Accuracy: ", Tree_score * 100,"%")
print("Time: ", round(Tree_time,2), " seconds.")

# Decision Tree tuned
print("\nDecision Tree tuned:")
start=time.time()
model = DecisionTree(X, y)
Tree_score_tuned=model.classify(tuned=True)
end = time.time()
Tree_time_tuned = end - start
print("Accuracy: ", Tree_score_tuned * 100,"%")
print("Time: ", round(Tree_time_tuned,2), " seconds.")

Decision Tree:
Accuracy:  48.6 %
Time:  3.38  seconds.

Decision Tree tuned:
Accuracy:  49.7 %
Time:  165.8  seconds.


In [22]:
from sklearn.linear_model import LogisticRegression


class LogisticRegressionClassifier:
    def __init__(self, X, y, tuned=False):
        self.X = X
        self.y = y

    def classify(self, tuned=False):
        if (tuned):
            log = LogisticRegression()
            parameters = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000] }
            model = GridSearchCV(log, parameters)
            
        else:
            model = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1,
                                     class_weight=None, random_state=None, solver='liblinear', max_iter=500,
                                     multi_class='ovr',
                                     verbose=0, warm_start=False, n_jobs=1)
        model.fit(X,y)
        scoreLR = cross_val_score(model, X, y, cv=5)
        return np.mean(scoreLR)

In [23]:
# Logistic Regression
model = LogisticRegressionClassifier(X, y)
print("Logistic Regression:")
start=time.time()
Logistic_score=model.classify()
end = time.time()
Logistic_time = end - start
print("Accuracy: ", Logistic_score * 100,"%")
print("Time: ", round(Logistic_time,2), " seconds.")

# Logistic Regression tuned
print("\nLogistic Regression tuned:")
start=time.time()
Logistic_score_tuned=model.classify(tuned=True)
end = time.time()
Logistic_time_tuned = end - start
print("Accuracy: ", Logistic_score_tuned * 100,"%")
print("Time: ", round(Logistic_time_tuned,2), " seconds.")

Logistic Regression:
Accuracy:  62.3 %
Time:  17.39  seconds.

Logistic Regression tuned:
Accuracy:  65.1 %
Time:  181.17  seconds.


In [28]:
from sklearn.linear_model import Perceptron

class PerceptronClassifier:
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def classify(self):
        model = Perceptron(penalty=None, alpha=0.01, fit_intercept=True, n_iter=5000, shuffle=True, 
                               verbose=0, eta0=1.0,n_jobs=1, random_state=0, class_weight=None, warm_start=False)
        model.fit(X,y)
        scorePerceptron = cross_val_score(model, X, y, cv=5)
        return np.mean(scorePerceptron)

In [29]:
# Perceptron
model = PerceptronClassifier(X, y)
print("Perceptron:")
start=time.time()
Preceptron_score=model.classify()
end = time.time()
Perceptron_time = end - start
print("Accuracy: ", Preceptron_score * 100,"%")
print("Time: ", round(Perceptron_time,2), " seconds.")

Perceptron:
Accuracy:  58.8 %
Time:  68.76  seconds.


In [30]:
from xgboost import XGBClassifier

class XGB:
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def classify(self, tuned=False):
        if (tuned):
            parameters = {'max_depth': [2,4,6],'n_estimators': [50,100,200]}
            model = XGBClassifier()
            model = GridSearchCV(model, parameters, verbose=1)
            model.fit(X,y)
        else:
            model = XGBClassifier()
            model.fit(X,y)
        score = cross_val_score(model, X, y, cv=5)
        return np.mean(score)

In [31]:
# XGBoosting
model = XGB(X, y)
print("XGBoosting:")
start=time.time()
XGB_score=model.classify()
end = time.time()
XGB_time = end - start
print("Accuracy: ", XGB_score * 100,"%")
print("Time: ", round(XGB_time,2), " seconds.")

# Logistic Regression tuned
print("\nLogistic Regression tuned:")
start=time.time()
XGB_score_tuned=model.classify(tuned=True)
end = time.time()
XGB_time_tuned = end - start
print("Accuracy: ", XGB_score_tuned * 100,"%")
print("Time: ", round(XGB_time_tuned,2), " seconds.")

XGBoosting:
XGBoosting Score:  0.691
Accuracy:  69.1 %
Time:  57.48  seconds.

Logistic Regression tuned:
Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:  4.3min finished


Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:  2.7min finished


Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:  2.8min finished


Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:  2.7min finished


Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:  2.7min finished


Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:  2.7min finished


XGBoosting Score:  0.699
Accuracy:  69.9 %
Time:  1177.59  seconds.


In [5]:
import autosklearn.classification
from sklearn.model_selection import train_test_split

# Split data into training and test subsets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [None]:
automl = autosklearn.classification.AutoSklearnClassifier()
automl.fit(X_train, y_train)
y_hat = automl.predict(X_test)
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, y_hat))

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.cross_validation import StratifiedKFold
k = 10
skf = StratifiedKFold(labels,n_folds=k)
averageError = 0.0
for train_index, test_index in skf:
    X_train, X_test = mfcc[:,train_index], mfcc[:,test_index]
    y_train, y_test = labels[train_index], labels[test_index]
    clf = LinearDiscriminantAnalysis()
    clf.fit(X_train.T,y_train)
    y_pred = clf.predict(X_test.T)
    error = zero_one_loss(y_pred,y_test)
    print error
    averageError += (1./k) * error
print "Average error: %4.2f%s" % (100 * averageError,'%')