In [1]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import classification_report

In [2]:
iris = datasets.load_iris()
print(iris.keys())
print("\nFeatures in the iris dataset are: ",iris.feature_names)

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

Features in the iris dataset are:  ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [3]:
X = iris.data 
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state = 42)

In [4]:
pipeline = Pipeline([
    ("featureGeneration", PolynomialFeatures()), 
    ("scaler" , StandardScaler()), 
    ("classifier",SVC())
])

grid_parameters = {'featureGeneration__degree': (1, 2, 3, 4), 
                   'classifier__kernel': ['linear', 'rbf'], 
                   'classifier__gamma': [1e-3, 1e-4], 
                   'classifier__C': [1, 10, 50, 100]
                  }

clf = GridSearchCV(pipeline, grid_parameters, cv=5, scoring='accuracy')

In [5]:
clf.fit(X_train, y_train)

print("Grid scores for all the models based on CV:\n")

means = clf.cv_results_['mean_test_score']

stds = clf.cv_results_['std_test_score']

for mean, std, params in zip(means, stds, clf.cv_results_['params']):
    print("%0.5f (+/-%0.05f) for %r" % (mean, std * 2, params))

Grid scores for all the models based on CV:

0.96296 (+/-0.11476) for {'classifier__C': 1, 'classifier__gamma': 0.001, 'classifier__kernel': 'linear', 'featureGeneration__degree': 1}
0.96296 (+/-0.11476) for {'classifier__C': 1, 'classifier__gamma': 0.001, 'classifier__kernel': 'linear', 'featureGeneration__degree': 2}
0.96296 (+/-0.11476) for {'classifier__C': 1, 'classifier__gamma': 0.001, 'classifier__kernel': 'linear', 'featureGeneration__degree': 3}
0.95556 (+/-0.10887) for {'classifier__C': 1, 'classifier__gamma': 0.001, 'classifier__kernel': 'linear', 'featureGeneration__degree': 4}
0.67407 (+/-0.02963) for {'classifier__C': 1, 'classifier__gamma': 0.001, 'classifier__kernel': 'rbf', 'featureGeneration__degree': 1}
0.88889 (+/-0.12395) for {'classifier__C': 1, 'classifier__gamma': 0.001, 'classifier__kernel': 'rbf', 'featureGeneration__degree': 2}
0.91111 (+/-0.08889) for {'classifier__C': 1, 'classifier__gamma': 0.001, 'classifier__kernel': 'rbf', 'featureGeneration__degree': 3

In [6]:
print("\nBest parameters set found on development set:", clf.best_params_)
print("Best model validation accuracy:", clf.best_score_)


Best parameters set found on development set: {'classifier__C': 50, 'classifier__gamma': 0.001, 'classifier__kernel': 'rbf', 'featureGeneration__degree': 1}
Best model validation accuracy: 0.9703703703703704


In [7]:
gs_best = clf.best_estimator_
tuned_y_pred = gs_best.predict(X_test)
print('\n\nTuned Model Stats:')
print(classification_report(y_test, tuned_y_pred, target_names=['class 0', 'class 1', 'class 2']))



Tuned Model Stats:
              precision    recall  f1-score   support

     class 0       1.00      1.00      1.00         6
     class 1       1.00      1.00      1.00         6
     class 2       1.00      1.00      1.00         3

    accuracy                           1.00        15
   macro avg       1.00      1.00      1.00        15
weighted avg       1.00      1.00      1.00        15

