<a href="https://colab.research.google.com/github/waghvaishnav/Model-Evaluation-Fine-Tuning-in-the-Machine-Learning-Toolkit/blob/main/GridSearchCV_Randomized_SearchCV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

importing dataset

In [None]:
from sklearn.datasets import make_classification
import pandas as pd

x, y = make_classification(
    n_samples=1000,
    n_features=10,
    n_classes=2,
    random_state=42
)

# GridSearchcv

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

grid_param = {"decisiontree":{"model":DecisionTreeClassifier(),
                              "param":{"criterion":["gini","entropy"],
                                       "max_depth":[4,5,3,6]}},
              "supportvectorclassifier":{"model":SVC(gamma="auto"),"param":{"kernel":["rbf","sigmoid"],"C":[1,10,20]}}
}



In [None]:
scores = []
best_model = None
best_score = 0

for key,val in grid_param.items():
  clf = GridSearchCV(val["model"],val["param"],cv=5,return_train_score=False)
  clf.fit(x,y)
  scores.append({                                  # this shows info about best param from each model.
      "model":key,
      "best_score":clf.best_score_,
      "best_param":clf.best_params_
  })

pd.DataFrame(scores)

Unnamed: 0,model,best_score,best_param
0,decisiontree,0.904,"{'criterion': 'entropy', 'max_depth': 5}"
1,supportvectorclassifier,0.866,"{'C': 1, 'kernel': 'rbf'}"


In [None]:
# all results display by

clf.cv_results_

{'mean_fit_time': array([0.01492577, 0.01185899, 0.02187057, 0.01004462, 0.02327232,
        0.01012969]),
 'std_fit_time': array([0.00042695, 0.00030439, 0.00456371, 0.00073963, 0.00123934,
        0.00102184]),
 'mean_score_time': array([0.0047699 , 0.00317612, 0.00499816, 0.00285873, 0.00447345,
        0.00284901]),
 'std_score_time': array([1.25933822e-04, 7.72824327e-05, 7.65893647e-04, 5.35667921e-05,
        1.09116659e-04, 5.42808684e-05]),
 'param_C': masked_array(data=[1, 1, 10, 10, 20, 20],
              mask=[False, False, False, False, False, False],
        fill_value=999999),
 'param_kernel': masked_array(data=['rbf', 'sigmoid', 'rbf', 'sigmoid', 'rbf', 'sigmoid'],
              mask=[False, False, False, False, False, False],
        fill_value=np.str_('?'),
             dtype=object),
 'params': [{'C': 1, 'kernel': 'rbf'},
  {'C': 1, 'kernel': 'sigmoid'},
  {'C': 10, 'kernel': 'rbf'},
  {'C': 10, 'kernel': 'sigmoid'},
  {'C': 20, 'kernel': 'rbf'},
  {'C': 20, 'kernel'

In [None]:
# best_index_ gives the index of the model which is performs best among all :

clf.best_index_

np.int64(0)

In [None]:
# best_estimator_ gives the best estimator among all the model

clf.best_estimator_

In [None]:
clf.best_score_

np.float64(0.866)

In [None]:
clf.best_params_

{'C': 1, 'kernel': 'rbf'}

In [None]:
scores = []
best_model = None
best_score = 0

for key,val in grid_param.items():
  clf = GridSearchCV(val["model"],val["param"],cv=5,return_train_score=False)
  clf.fit(x,y)

  if clf.best_score_ > best_score:
    best_score = clf.best_score_
    best_model = clf.best_estimator_

print(best_model,best_score)

DecisionTreeClassifier(criterion='entropy', max_depth=5) 0.9019999999999999


# RandomizedSearchCV

In [None]:
from sklearn.model_selection import RandomizedSearchCV

grid_params = {"decisiontreeclassifier":{"model":DecisionTreeClassifier(),"param":{
      "criterion":["gini","entropy"],"max_depth":[3,5,7,8]
}   },
            "supportvectorclassifier":{"model":SVC(),"param":{"C":[12,15,16,20],"kernel":["rbf","linear"]}}

}

list1 = []

for key,val in grid_params.items():
  random_cv = RandomizedSearchCV(val['model'],val['param'],cv=5,return_train_score=False,n_iter=5)
  random_cv.fit(x,y)
  list1.append({
      "best model":random_cv.best_estimator_,
      "best_param":random_cv.best_params_,
      "best_score":random_cv.best_score_
  })

pd.DataFrame(list1)

Unnamed: 0,best model,best_param,best_score
0,"DecisionTreeClassifier(criterion='entropy', ma...","{'max_depth': 7, 'criterion': 'entropy'}",0.882
1,"SVC(C=12, kernel='linear')","{'kernel': 'linear', 'C': 12}",0.86


In [None]:
pd.DataFrame(random_cv.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.020226,0.001491,0.004723,0.000602,rbf,12,"{'kernel': 'rbf', 'C': 12}",0.845,0.835,0.865,0.825,0.89,0.852,0.023152,3
1,0.021558,0.000726,0.004513,0.00022,rbf,15,"{'kernel': 'rbf', 'C': 15}",0.845,0.82,0.865,0.81,0.885,0.845,0.027749,4
2,0.094725,0.011805,0.002595,0.000328,linear,16,"{'kernel': 'linear', 'C': 16}",0.87,0.865,0.845,0.835,0.885,0.86,0.017889,1
3,0.036283,0.001694,0.007658,0.000395,rbf,16,"{'kernel': 'rbf', 'C': 16}",0.845,0.82,0.855,0.81,0.885,0.843,0.026571,5
4,0.125887,0.006597,0.004303,0.00098,linear,15,"{'kernel': 'linear', 'C': 15}",0.87,0.865,0.845,0.835,0.885,0.86,0.017889,1


In [None]:
random_cv.best_index_

np.int64(0)

In [None]:
random_cv.best_estimator_

In [None]:
random_cv.best_params_

{'kernel': 'linear', 'C': 20}

In [None]:
random_cv.cv_results_

{'mean_fit_time': array([0.02031827, 0.07018933, 0.02121043, 0.02199507, 0.09034038]),
 'std_fit_time': array([0.00136177, 0.00375739, 0.000933  , 0.001351  , 0.00880633]),
 'mean_score_time': array([0.00447431, 0.00245867, 0.00449367, 0.00449166, 0.00246553]),
 'std_score_time': array([8.78210038e-05, 6.25723372e-05, 1.31177668e-04, 1.15811427e-04,
        2.30891623e-05]),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'rbf', 'linear'],
              mask=[False, False, False, False, False],
        fill_value=np.str_('?'),
             dtype=object),
 'param_C': masked_array(data=[12, 12, 15, 16, 16],
              mask=[False, False, False, False, False],
        fill_value=999999),
 'params': [{'kernel': 'rbf', 'C': 12},
  {'kernel': 'linear', 'C': 12},
  {'kernel': 'rbf', 'C': 15},
  {'kernel': 'rbf', 'C': 16},
  {'kernel': 'linear', 'C': 16}],
 'split0_test_score': array([0.845, 0.87 , 0.845, 0.845, 0.87 ]),
 'split1_test_score': array([0.835, 0.865, 0.82 , 0.82 , 0