In [None]:
import scipy as sp
import numpy as np
import pandas as pd
import pylab as py 
import sklearn

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA

#visualização
import seaborn as sns
import IPython.display as ipd
import matplotlib.pyplot as plt
from mlxtend.plotting import plot_decision_regions
from mlxtend.plotting import scatterplotmatrix

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
audioft_str = pd.read_csv('/content/drive/My Drive/Audio Dataset/audioft_cleanStr.csv')
audioft_nmb = pd.read_csv('/content/drive/My Drive/Audio Dataset/audioft_cleanNmb.csv')

In [None]:
X = audioft_nmb.iloc[:,0:7].values
y = audioft_nmb['CLASS'].values

# **PIPELINE**

1. Pre-processing

Já feito nos trabalhos anteriores.

2. Transformação/Conversão dos dados

Já feito nos trabalhos anteriores.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=1, stratify=y)

In [None]:
print(X_train.size, X_test.size, y_train.size, y_test.size)

2688 679 384 97


3. Normalização

In [None]:
scaler = StandardScaler()

scaler.fit(X_train)
X_train_std = scaler.transform(X_train)

scaler.fit(X_test)
X_test_std = scaler.transform(X_test)

4. Redução de Dimensionalidade & Classificador

In [None]:
pipe = Pipeline([('z-score', StandardScaler()), ('reduce_dim', PCA(n_components=3)), ('classify', KNeighborsClassifier(n_neighbors=1))])

In [None]:
pipe.fit(X_train_std, y_train)

Pipeline(memory=None,
         steps=[('z-score',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('reduce_dim',
                 PCA(copy=True, iterated_power='auto', n_components=3,
                     random_state=None, svd_solver='auto', tol=0.0,
                     whiten=False)),
                ('classify',
                 KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                      metric='minkowski', metric_params=None,
                                      n_jobs=None, n_neighbors=1, p=2,
                                      weights='uniform'))],
         verbose=False)

In [None]:
y_train_pred = pipe.predict(X_train_std)
accuracy_score(y_train, y_train_pred)

1.0

In [None]:
y_test_pred = pipe.predict(X_test_std)
accuracy_score(y_test, y_test_pred)

1.0

# **GRID-SEARCH**

In [None]:
param_grid = {'reduce_dim__n_components': [1, 2, 3, 4], 'classify__n_neighbors': [2, 3, 4, 5]}

grid = GridSearchCV(pipe, cv=2, n_jobs=1, param_grid=param_grid, scoring='accuracy')

In [None]:
grid.fit(X_train_std, y_train)

GridSearchCV(cv=2, error_score=nan,
             estimator=Pipeline(memory=None,
                                steps=[('z-score',
                                        StandardScaler(copy=True,
                                                       with_mean=True,
                                                       with_std=True)),
                                       ('reduce_dim',
                                        PCA(copy=True, iterated_power='auto',
                                            n_components=3, random_state=None,
                                            svd_solver='auto', tol=0.0,
                                            whiten=False)),
                                       ('classify',
                                        KNeighborsClassifier(algorithm='auto',
                                                             leaf_size=30,
                                                             metric='minkowski',
                             

In [None]:
print(grid.cv_results_)

{'mean_fit_time': array([0.00295448, 0.00131607, 0.0014652 , 0.00127411, 0.00137019,
       0.0012058 , 0.00121284, 0.00146151, 0.00138593, 0.00126112,
       0.00118697, 0.00125194, 0.00122273, 0.00125766, 0.00128841,
       0.00123477]), 'std_fit_time': array([1.64103508e-03, 1.01327896e-04, 1.39474869e-05, 4.14848328e-05,
       2.24113464e-05, 6.31809235e-06, 4.76837158e-06, 2.92062759e-04,
       1.06573105e-04, 1.19209290e-07, 1.19209290e-07, 3.00407410e-05,
       5.85317612e-05, 7.27176666e-05, 2.98023224e-05, 3.81469727e-06]), 'mean_score_time': array([0.00673187, 0.00556707, 0.00605655, 0.00589168, 0.00619113,
       0.00552595, 0.00565922, 0.00548255, 0.00661147, 0.0055871 ,
       0.00557649, 0.00543797, 0.00539422, 0.00550377, 0.00536156,
       0.00579071]), 'std_score_time': array([1.14309788e-03, 4.91857529e-04, 5.79357147e-05, 8.42809677e-05,
       4.49419022e-05, 1.10864639e-05, 2.42114067e-04, 1.73687935e-04,
       8.37445259e-04, 1.21355057e-04, 2.80141830e-05, 4.

In [None]:
grid.cv_results_['mean_test_score']

array([0.6171875 , 0.78645833, 0.97916667, 0.9921875 , 0.66666667,
       0.80989583, 0.97135417, 0.97916667, 0.65885417, 0.79166667,
       0.97135417, 0.97395833, 0.66666667, 0.79427083, 0.96875   ,
       0.9765625 ])

Exibindo parâmetros ideias encontrados a partir do melhor Score obtido

In [None]:
print(grid.best_score_)
print(grid.best_params_)

0.9921875
{'classify__n_neighbors': 2, 'reduce_dim__n_components': 4}


In [None]:
clf = grid.best_estimator_

In [None]:
y_test_pred = clf.predict(X_test_std)
accuracy_score(y_test, y_test_pred)

1.0