<font size=6><b>Lec11. Optuna 
* https://optuna.org/
* 하이퍼파라미터 튜닝

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# ------------------------------------------------------ 훈련데이터
from sklearn.datasets import load_iris
# --------------------------------------------------------------------------
from sklearn.model_selection import train_test_split
# -------------------------------------------------------------------------- 정규화
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
# -------------------------------------------------------------------------- 인코딩
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
# -------------------------------------------------------------------------- 모델
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
# -------------------------------------------------------------------------- 평가
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score 
from sklearn.metrics import confusion_matrix,classification_report
from sklearn.metrics import roc_curve, roc_auc_score
# -------------------------------------------------------------------------- 교차검증
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.model_selection import GridSearchCV
# --------------------------------------------------------------------------

import warnings
warnings.filterwarnings(action="ignore")

In [2]:
#-------------------- 차트 관련 속성 (한글처리, 그리드) -----------
plt.rcParams['font.family']= 'Malgun Gothic'
plt.rcParams['axes.unicode_minus'] = False

#-------------------- 차트 관련 속성 (차트크기 ) -----------
# plt.rcParams['figure.figsize'] = (4, 2)
# plt.rcParams['font.size'] = 7

# Basic Tutorial

<ol>
<li>Wrap model training with an<code>objective</code> function and return score matrics</li>
<li><code>Suggest hyperparameters</code> using a <code>trial</code> object</li>
<li>Create a <code>study</code> object and execute the optimization</li>
</ol>

* optuna.trial.Trial.suggest_categorical() : 리스트 범위 내에서 값을 선택한다.
* optuna.trial.Trial.suggest_int() : 범위 내에서 정수형 값을 선택한다.
* optuna.trial.Trial.suggest_float() : 범위 내에서 소수형 값을 선택한다.
* optuna.trial.Trial.suggest_uniform() : 범위 내에서 균일분포 값을 선택한다.
* optuna.trial.Trial.suggest_discrete_uniform() : 범위 내에서 이산 균일분포 값을 선택한다.
* optuna.trial.Trial.suggest_loguniform() : 범위 내에서 로그 함수 값을 선택한다.

In [12]:
import optuna

import sklearn.datasets
import sklearn.ensemble
import sklearn.model_selection
import sklearn.svm
import logging 

optuna.logging.set_verbosity(logging.CRITICAL)

iris = sklearn.datasets.load_iris()
x, y = iris.data, iris.target


def my_objective(trial):
    classifier_name = trial.suggest_categorical("classifier", ["SVC", "RandomForest"])
    if classifier_name == "SVC":
        svc_c = trial.suggest_float("svc_c", 1e-10, 1e10, log=True)
        model = sklearn.svm.SVC(C=svc_c, gamma="auto")
    else:
        rf_max_depth = trial.suggest_int("rf_max_depth", 2, 32, log=True)
        model = sklearn.ensemble.RandomForestClassifier(
            max_depth=rf_max_depth, n_estimators=10
        )

    scores = cross_val_score(model, x, y, cv=3, scoring="accuracy")
    accuracy = scores.mean()
    return accuracy

In [13]:
#--------------------------- 호출
study = optuna.create_study(direction="maximize")
study.optimize(my_objective, n_trials=10)

print(study.best_trial)

FrozenTrial(number=0, state=TrialState.COMPLETE, values=[0.96], datetime_start=datetime.datetime(2023, 9, 6, 11, 8, 54, 975978), datetime_complete=datetime.datetime(2023, 9, 6, 11, 8, 55, 79695), params={'classifier': 'RandomForest', 'rf_max_depth': 12}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'classifier': CategoricalDistribution(choices=('SVC', 'RandomForest')), 'rf_max_depth': IntDistribution(high=32, log=True, low=2, step=1)}, trial_id=0, value=None)


In [14]:
print(f"Number of finished trials: {len(study.trials)}       ")
print(f"Best trial               : {study.best_trial}        ")
print(f"Best score               : {study.best_trial.value}  ")
print(f"Best hyper-param         : {study.best_trial.params} ")
#print([(key, value) for key, value in study.best_trial.params.items()] )  

Number of finished trials: 10       
Best trial               : FrozenTrial(number=0, state=TrialState.COMPLETE, values=[0.96], datetime_start=datetime.datetime(2023, 9, 6, 11, 8, 54, 975978), datetime_complete=datetime.datetime(2023, 9, 6, 11, 8, 55, 79695), params={'classifier': 'RandomForest', 'rf_max_depth': 12}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'classifier': CategoricalDistribution(choices=('SVC', 'RandomForest')), 'rf_max_depth': IntDistribution(high=32, log=True, low=2, step=1)}, trial_id=0, value=None)        
Best score               : 0.96  
Best hyper-param         : {'classifier': 'RandomForest', 'rf_max_depth': 12} 
