In [None]:
# What is Hyperparameter?
#
# The parameters that we set in the model constructor during the initialization phase are hyperparameters.
#
# Every algo has different set of Hyperparameters
# Our Goal is to identify the best Hyperparameters for the given dataset and model
# This is something we can achieve using Hyperparameter Tuning

# Is Hyperparameter Tuning Mandatory?
# No, if you achieve the best model using default config


In [None]:
import numpy as np
import pandas as pd

In [None]:
data = pd.read_csv("iris.csv")

In [None]:
features = data.iloc[:,:-1].values
label = data.iloc[:,-1].values

In [None]:
from sklearn.neighbors import KNeighborsClassifier
modeltest = KNeighborsClassifier()

In [None]:
from sklearn.model_selection import cross_val_score

# Supress warnings
import warnings
warnings.filterwarnings('ignore')

scores = cross_val_score(modeltest,
                        features,
                        label,
                        cv = 10) #5 or 10

scores

array([1.        , 0.93333333, 1.        , 1.        , 0.86666667,
       0.93333333, 0.93333333, 1.        , 1.        , 1.        ])

In [None]:
scores.mean()

0.9666666666666668

In [None]:
#Goal: To identify the best Hyperparamter combinations for KNN w.r.t iris dataset
#      that can give me the score > 0.96

# Method 1 --- Using GridSearchCV

In [None]:
'''
KNeighborsClassifier(	
n_neighbors=5,	               Positive Integer
weights='uniform',	          ‘uniform’, 'distance' 
algorithm='auto',	          'auto', 'ball_tree', 'kd_tree', 'brute'
leaf_size=30,	               Positive Integer
p=2,	                      1 (Manhattan distance),   2(Elucidean distance)
metric='minkowski',	          "“euclidean”, “manhattan”, “chebyshev”, “minkowski”
                               “wminkowski”, “seuclidean”, “mahalanobis”"
metric_params=None,	          Dictionary
n_jobs=None,	-1,            1 or -2
**kwargs,	
)	

'''


In [None]:
#Step1: Design the parameter grid. In Python, you can represent parameter grid in the form of Dictionary

weightsParameter = ['uniform','distance']
n_neighborsParameter = np.arange(1,31)
algorithmParameter = ['auto', 'ball_tree', 'kd_tree', 'brute']
metricParameter = ['euclidean', 'manhattan', 'chebyshev' , 'minkowski', 'seuclidean', 'mahalanobis']


paramGrid = dict(n_neighbors = n_neighborsParameter,
                weights= weightsParameter,
                algorithm = algorithmParameter,
                metric=metricParameter)

# Step2: Initialize the algo

from sklearn.neighbors import KNeighborsClassifier
modelGridSearch = KNeighborsClassifier()

#Step3: Search the best parameter for your data

#GridSearchCV


from sklearn.model_selection import GridSearchCV
grid = GridSearchCV(modelGridSearch,
                   param_grid=paramGrid,
                   cv = 10) #Same as cross_val_score

In [None]:
# Step4: Extract Results

grid.fit(features,label)

GridSearchCV(cv=10, error_score=nan,
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=5, p=2,
                                            weights='uniform'),
             iid='deprecated', n_jobs=None,
             param_grid={'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
                         'metric': ['euclidean', 'manhattan', 'chebyshev',
                                    'minkowski', 'seuclidean', 'mahalanobis'],
                         'n_neighbors': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]),
                         'weights': ['uniform', 'distance']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None

In [None]:
grid.best_score_

0.9866666666666667

In [None]:
grid.best_estimator_

KNeighborsClassifier(algorithm='brute', leaf_size=30, metric='chebyshev',
                     metric_params=None, n_jobs=None, n_neighbors=6, p=2,
                     weights='uniform')

In [None]:
finalModel = KNeighborsClassifier(algorithm='brute', leaf_size=30, metric='chebyshev',
                     metric_params=None, n_jobs=None, n_neighbors=6, p=2,
                     weights='uniform')

In [None]:
grid.best_params_

{'algorithm': 'brute',
 'metric': 'chebyshev',
 'n_neighbors': 6,
 'weights': 'uniform'}

# Method 2: Using RandomizedGridSearchCV

In [None]:
#Step1: Design the parameter grid. In Python, you can represent parameter grid in the form of Dictionary

weightsParameter = ['uniform','distance']
n_neighborsParameter = np.arange(1,31)
algorithmParameter = ['auto', 'ball_tree', 'kd_tree', 'brute']
metricParameter = ['euclidean', 'manhattan', 'chebyshev' , 'minkowski', 'seuclidean', 'mahalanobis']


paramGrid = dict(n_neighbors = n_neighborsParameter,
                weights= weightsParameter,
                algorithm = algorithmParameter,
                metric=metricParameter)

# Step2: Initialize the algo

from sklearn.neighbors import KNeighborsClassifier
modelRGridSearch = KNeighborsClassifier()

#Step3: Search the best parameter for your data

#RandomizedSearchCV


from sklearn.model_selection import RandomizedSearchCV
rSearchgrid = RandomizedSearchCV(modelRGridSearch,
                   param_distributions=paramGrid,
                   cv = 10) #Same as cross_val_score

In [None]:
rSearchgrid.fit(features,label)

RandomizedSearchCV(cv=10, error_score=nan,
                   estimator=KNeighborsClassifier(algorithm='auto',
                                                  leaf_size=30,
                                                  metric='minkowski',
                                                  metric_params=None,
                                                  n_jobs=None, n_neighbors=5,
                                                  p=2, weights='uniform'),
                   iid='deprecated', n_iter=10, n_jobs=None,
                   param_distributions={'algorithm': ['auto', 'ball_tree',
                                                      'kd_tree', 'brute'],
                                        'metric': ['euclidean', 'manhattan',
                                                   'chebyshev', 'minkowski',
                                                   'seuclidean',
                                                   'mahalanobis'],
                                   

In [None]:
rSearchgrid.best_score_

0.9733333333333334

In [None]:
rSearchgrid.best_estimator_

KNeighborsClassifier(algorithm='brute', leaf_size=30, metric='manhattan',
                     metric_params=None, n_jobs=None, n_neighbors=18, p=2,
                     weights='distance')

In [None]:
grid.best_estimator_

KNeighborsClassifier(algorithm='brute', leaf_size=30, metric='chebyshev',
                     metric_params=None, n_jobs=None, n_neighbors=6, p=2,
                     weights='uniform')