# GridSearchCV

In [5]:
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer

In [2]:
cancer = load_breast_cancer()

In [4]:
X, y = cancer.data, cancer.target

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
model = LogisticRegression(max_iter=1000, random_state=42)

In [8]:
param_grid = {
    'C' : [0.001, 0.01, 0.1, 1, 10, 100], # default 1
    'penalty' : ['l1', 'l2'],
    'solver' : ['liblinear', 'saga']
}

In [9]:
grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1,
    verbose=2
)

grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 24 candidates, totalling 120 fits




0,1,2
,estimator,LogisticRegre...ndom_state=42)
,param_grid,"{'C': [0.001, 0.01, ...], 'penalty': ['l1', 'l2'], 'solver': ['liblinear', 'saga']}"
,scoring,'accuracy'
,n_jobs,-1
,refit,True
,cv,5
,verbose,2
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,penalty,'l1'
,dual,False
,tol,0.0001
,C,100
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,42
,solver,'liblinear'
,max_iter,1000


In [11]:
grid_search.best_params_, grid_search.best_score_

({'C': 100, 'penalty': 'l1', 'solver': 'liblinear'},
 np.float64(0.9670329670329672))

In [12]:
best_model = grid_search.best_estimator_
best_model.predict(X_test)

array([1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 0, 0])

In [35]:
best_model

0,1,2
,penalty,'l1'
,dual,False
,tol,0.0001
,C,100
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,42
,solver,'liblinear'
,max_iter,1000


In [16]:
best_model.score(X_test, y_test) # 하이퍼파라미터 튜닝 한 모델 점수 98점

0.9824561403508771

In [15]:
model2 = LogisticRegression() # 하이퍼파라미터 튜닝 안한 모델
model2.fit(X_train, y_train)
model2.score(X_test, y_test) # 95점

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.956140350877193

[CV] END ..............C=0.001, penalty=l1, solver=liblinear; total time=   0.0s
[CV] END ..............C=0.001, penalty=l2, solver=liblinear; total time=   0.0s
[CV] END ..............C=0.001, penalty=l2, solver=liblinear; total time=   0.0s
[CV] END ...................C=0.001, penalty=l2, solver=saga; total time=   0.1s
[CV] END ...............C=0.01, penalty=l1, solver=liblinear; total time=   0.0s
[CV] END ....................C=0.01, penalty=l1, solver=saga; total time=   0.1s
[CV] END ................C=0.1, penalty=l1, solver=liblinear; total time=   0.2s
[CV] END ..................C=1, penalty=l2, solver=liblinear; total time=   0.0s
[CV] END ..................C=1, penalty=l2, solver=liblinear; total time=   0.0s
[CV] END ..................C=1, penalty=l2, solver=liblinear; total time=   0.0s
[CV] END .......................C=1, penalty=l2, solver=saga; total time=   0.1s
[CV] END .................C=10, penalty=l1, solver=liblinear; total time=   0.3s
[CV] END ...................

In [17]:
import pandas as pd

In [20]:
results = pd.DataFrame(grid_search.cv_results_)

# RandomSearchCV

In [22]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform, randint

In [29]:
pram_distributions = {
    'C' : uniform(0.001, 100),
    'penalty' : ['l1', 'l2'],
    'solver' : ['liblinear', 'saga']
}

In [30]:
random_search = RandomizedSearchCV(
    estimator=model,
    param_distributions=pram_distributions,
    n_iter=50, # 조합 개수
    cv=5,
    scoring='accuracy',
    n_jobs=-1,
    random_state=42,
    verbose=2
)

In [31]:
random_search.fit(X_train, y_train)

Fitting 5 folds for each of 50 candidates, totalling 250 fits




0,1,2
,estimator,LogisticRegre...ndom_state=42)
,param_distributions,"{'C': <scipy.stats....t 0x11d15a9f0>, 'penalty': ['l1', 'l2'], 'solver': ['liblinear', 'saga']}"
,n_iter,50
,scoring,'accuracy'
,n_jobs,-1
,refit,True
,cv,5
,verbose,2
,pre_dispatch,'2*n_jobs'
,random_state,42

0,1,2
,penalty,'l1'
,dual,False
,tol,0.0001
,C,np.float64(73.20039418114051)
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,42
,solver,'liblinear'
,max_iter,1000


In [32]:
random_search.best_params_, random_search.best_score_

({'C': np.float64(73.20039418114051), 'penalty': 'l1', 'solver': 'liblinear'},
 np.float64(0.9692307692307693))

In [33]:
random_best_model = random_search.best_estimator_

In [34]:
random_best_model.score(X_test, y_test)

0.9824561403508771

[CV] END .C=37.455011884736244, penalty=l1, solver=liblinear; total time=   0.2s
[CV] END ..C=15.60286404424365, penalty=l1, solver=liblinear; total time=   0.2s
[CV] END .......C=83.24526408004218, penalty=l2, solver=saga; total time=   0.1s
[CV] END .C=18.183496720710064, penalty=l1, solver=liblinear; total time=   0.2s
[CV] END ......C=61.186289472237945, penalty=l2, solver=saga; total time=   0.1s
[CV] END .......C=45.60799842170359, penalty=l1, solver=saga; total time=   0.1s
[CV] END ......C=19.968378215835976, penalty=l1, solver=saga; total time=   0.1s
[CV] END ..C=60.75548519014384, penalty=l1, solver=liblinear; total time=   0.3s
[CV] END .......C=30.46237691733707, penalty=l1, solver=saga; total time=   0.1s
[CV] END ........C=68.4243026512157, penalty=l1, solver=saga; total time=   0.1s
[CV] END ......C=25.878998160001693, penalty=l2, solver=saga; total time=   0.1s
[CV] END ......C=31.172107608941097, penalty=l2, solver=saga; total time=   0.1s
[CV] END .......C=93.9508941