<a href="https://colab.research.google.com/github/pythonpdnp/MS_kofia/blob/main/09_Tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Hyperparameter Tuning

### SVM 적용 Review

In [None]:
from sklearn.datasets import load_iris

iris = load_iris()
X = iris.data
y = iris.target

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=999)

In [None]:
from sklearn.svm import SVC

clf = SVC()   # C=1, kernel='rbf' 가 default
clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.9333333333333333

In [None]:
from sklearn.model_selection import cross_val_score

cross_val_score(SVC(), X, y, cv=5)

array([0.96666667, 0.96666667, 0.96666667, 0.93333333, 1.        ])

### GridSearchCV

In [None]:
from sklearn.model_selection import GridSearchCV

# define the parameter grid
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly']
}

# create a GridSearchCV object
clf = GridSearchCV(SVC(), param_grid, cv=5, scoring='accuracy')

clf.fit(X_train, y_train)

clf.best_params_ , clf.best_score_

({'C': 1, 'kernel': 'linear'}, 0.975)

***모델/하이퍼파라미터 동시 검증***

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

model_params = {
    'svm': {
        'model': SVC(),
        'params' : {
            'C': [1,10,20],
            'kernel': ['rbf','linear']
        }
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params' : {
            'n_estimators': [1,5,10]
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(max_iter=1000),
        'params': {
            'C': [1,5,10]
        }
    }
}

In [None]:
import pandas as pd

scores = []

for model_name, mp in model_params.items():
    clf =  GridSearchCV(mp['model'], mp['params'], cv=5)
    clf.fit(X_train, y_train)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })

df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df

Unnamed: 0,model,best_score,best_params
0,svm,0.975,"{'C': 1, 'kernel': 'linear'}"
1,random_forest,0.958333,{'n_estimators': 1}
2,logistic_regression,0.975,{'C': 5}
