In [1]:
pip install hyperopt

Note: you may need to restart the kernel to use updated packages.


In [2]:
from hyperopt import hp

# -10 ~ 10까지 1간격을 가지는 입력 변수 x와 -15 ~ 15까지 1간격으로 입력 변수 y 설정.
search_space = {'x': hp.quniform('x', -10, 10, 1), 'y': hp.quniform('y', -15, 15, 1) }

In [3]:
from hyperopt import STATUS_OK

# 목적 함수를 생성. 변숫값과 변수 검색 공간을 가지는 딕셔너리를 인자로 받고, 특정 값을 반환
def objective_func(search_space):
    x = search_space['x']
    y = search_space['y']
    retval = x**2 - 20*y
    
    return retval

In [4]:
from hyperopt import fmin, tpe, Trials

# 입력 결괏값을 저장한 Trials 객체값 생성.
trial_val = Trials()

# 목적 함수의 최솟값을 반환하는 최적 입력 변숫값을 5번의 입력값 시도(max_evals=5)로 찾아냄.
best_01 = fmin(fn=objective_func, space=search_space, algo=tpe.suggest, max_evals=5, trials=trial_val)
print('best:', best_01)

100%|████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 1000.12trial/s, best loss: -199.0]
best: {'x': 9.0, 'y': 14.0}


In [5]:
trial_val = Trials()

# max_evals를 20회로 늘려서 재테스트
best_02 = fmin(fn=objective_func, space=search_space, algo=tpe.suggest, max_evals=20, trials=trial_val)
print('best:', best_02)

100%|██████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 1052.63trial/s, best loss: -216.0]
best: {'x': 2.0, 'y': 11.0}


In [6]:
# fmin( )에 인자로 들어가는 Trials 객체의 result 속성에 list로 목적 함수 반환값들이 저장됨
# 목적함수의 개별 반환값이 {'loss':함수 반환값, 'status':반환 상태값} 형태인 list로 저장됨.
print(trial_val.results)

[{'loss': -155.0, 'status': 'ok'}, {'loss': 209.0, 'status': 'ok'}, {'loss': 156.0, 'status': 'ok'}, {'loss': 304.0, 'status': 'ok'}, {'loss': -164.0, 'status': 'ok'}, {'loss': 344.0, 'status': 'ok'}, {'loss': 176.0, 'status': 'ok'}, {'loss': 344.0, 'status': 'ok'}, {'loss': 100.0, 'status': 'ok'}, {'loss': -80.0, 'status': 'ok'}, {'loss': 109.0, 'status': 'ok'}, {'loss': -156.0, 'status': 'ok'}, {'loss': -216.0, 'status': 'ok'}, {'loss': 209.0, 'status': 'ok'}, {'loss': -139.0, 'status': 'ok'}, {'loss': 116.0, 'status': 'ok'}, {'loss': 229.0, 'status': 'ok'}, {'loss': -171.0, 'status': 'ok'}, {'loss': 121.0, 'status': 'ok'}, {'loss': 60.0, 'status': 'ok'}]


In [7]:
# Trials 객체의 vals 속성에 {'입력변수명':개별 수행 시마다 입력된 값 리스트} 형태로 저장됨.
print(trial_val.vals)

{'x': [-5.0, -3.0, 6.0, -2.0, -4.0, 8.0, -4.0, -8.0, 10.0, 10.0, 7.0, 8.0, 2.0, -7.0, 1.0, -4.0, -3.0, 3.0, -9.0, 0.0], 'y': [9.0, -10.0, -6.0, -15.0, 9.0, -14.0, -8.0, -14.0, -0.0, 9.0, -3.0, 11.0, 11.0, -8.0, 7.0, -5.0, -11.0, 9.0, -2.0, -3.0]}


In [8]:
import pandas as pd

# results에서 loss 키값만 추출하여 list로 생성.
losses = [loss_dict['loss'] for loss_dict in trial_val.results]

# DataFrame으로 생성.
result_df = pd.DataFrame({'x': trial_val.vals['x'], 'y': trial_val.vals['y'], 'losses': losses})
result_df

Unnamed: 0,x,y,losses
0,-5.0,9.0,-155.0
1,-3.0,-10.0,209.0
2,6.0,-6.0,156.0
3,-2.0,-15.0,304.0
4,-4.0,9.0,-164.0
5,8.0,-14.0,344.0
6,-4.0,-8.0,176.0
7,-8.0,-14.0,344.0
8,10.0,-0.0,100.0
9,10.0,9.0,-80.0


### HyperOpt를 이용한 XGBoost 하이퍼 파라미터 최적화

In [9]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

dataset = load_breast_cancer()

cancer_df = pd.DataFrame(data=dataset.data, columns=dataset.feature_names)
cancer_df['target']= dataset.target
X_features = cancer_df.iloc[:, :-1]
y_label = cancer_df.iloc[:, -1]

In [10]:
# 전체 데이터 중 80%는 학습용 데이터, 20%는 테스트용 데이터 추출
X_train, X_test, y_train, y_test=train_test_split(X_features, y_label, test_size=0.2, random_state=156 )

# 앞에서 추출한 학습 데이터를 다시 학습과 검증 데이터로 분리
X_tr, X_val, y_tr, y_val= train_test_split(X_train, y_train, test_size=0.1, random_state=156 )

In [11]:
from hyperopt import hp

# max_depth는 5에서 20까지 1간격으로, min_child_weight는 1에서 6까지 1간격으로
# colsample_bytree는 0.5에서 0.95 사이, learning_rate는 0.01에서 0.2 사이 정규 분포된 값으로 검색.
xgb_search_space = {'max_depth': hp.quniform('max_depth', 5, 20, 1), 
                    'min_child_weight': hp.quniform('min_child_weight', 1, 2, 1),
                    'learning_rate': hp.uniform('learning_rate', 0.01, 0.2),
                    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
                   }

In [12]:
from sklearn.model_selection import cross_val_score
from xgboost import XGBClassifier
from hyperopt import STATUS_OK

# fmin()에서 입력된 search_space 값으로 입력된 모든 값은 실수형임.
# XGBClassifier의 정수형 하이퍼 파라미터는 정수형 변환을 해줘야 함.
# 평가 지표 roc-auc는 높을수록 더 좋은 수치임. -1* roc-auc를 곱해서 큰 roc-auc 값일수록 최소가 되도록 변환
def objective_func(search_space):
    # 수행 시간 절약을 위해 nestimators는 100으로 축소
    xgb_clf = XGBClassifier(n_estimators=100, max_depth=int(search_space['max_depth']),
                            min_child_weight=int(search_space['min_child_weight']),
                            learning_rate=search_space['learning_rate'],
                            colsample_bytree=search_space['colsample_bytree'],
                            eval_metric='logloss')
    accuracy = cross_val_score(xgb_clf, X_train, y_train, scoring='accuracy', cv=3)
    
    # accuracy는 cv=3 개수만큼 roc-auc 결과를 리스트로 가짐. 이를 평균해서 반환하되 -1을 곱함.
    return {'loss':-1 * np.mean(accuracy), 'status': STATUS_OK}


  from pandas import MultiIndex, Int64Index


In [13]:
from hyperopt import fmin, tpe, Trials

trial_val = Trials()
best = fmin(fn=objective_func,
            space=xgb_search_space,
            algo=tpe.suggest,
            max_evals=50, # 최대 반복 횟수를 지정합니다.
            trials=trial_val)
print('best:', best)


  0%|                                                                           | 0/50 [00:00<?, ?trial/s, best loss=?]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



  2%|▉                                               | 1/50 [00:00<00:08,  5.91trial/s, best loss: -0.9560822586266992]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



  4%|█▉                                              | 2/50 [00:00<00:07,  6.76trial/s, best loss: -0.9560822586266992]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



  6%|██▉                                             | 3/50 [00:00<00:07,  5.94trial/s, best loss: -0.9560822586266992]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



  8%|███▊                                            | 4/50 [00:00<00:08,  5.69trial/s, best loss: -0.9560822586266992]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 10%|████▊                                           | 5/50 [00:00<00:07,  5.78trial/s, best loss: -0.9560822586266992]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 12%|█████▊                                          | 6/50 [00:00<00:07,  6.24trial/s, best loss: -0.9560822586266992]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




 14%|██████▋                                         | 7/50 [00:01<00:06,  6.21trial/s, best loss: -0.9560822586266992]

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




 16%|███████▋                                        | 8/50 [00:01<00:06,  6.55trial/s, best loss: -0.9670616939700244]

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




 18%|████████▋                                       | 9/50 [00:01<00:06,  6.68trial/s, best loss: -0.9670616939700244]

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




 20%|█████████▍                                     | 10/50 [00:01<00:06,  6.45trial/s, best loss: -0.9670616939700244]

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




 22%|██████████▎                                    | 11/50 [00:01<00:05,  6.78trial/s, best loss: -0.9670616939700244]

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 24%|███████████▎                                   | 12/50 [00:01<00:06,  6.31trial/s, best loss: -0.9670616939700244]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 26%|████████████▏                                  | 13/50 [00:02<00:05,  6.32trial/s, best loss: -0.9670616939700244]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 28%|█████████████▏                                 | 14/50 [00:02<00:05,  6.12trial/s, best loss: -0.9670616939700244]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




 30%|██████████████                                 | 15/50 [00:02<00:06,  5.79trial/s, best loss: -0.9692837225514116]

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 32%|███████████████                                | 16/50 [00:02<00:05,  5.92trial/s, best loss: -0.9692837225514116]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 36%|████████████████▉                              | 18/50 [00:02<00:05,  6.38trial/s, best loss: -0.9692837225514116]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 38%|█████████████████▊                             | 19/50 [00:03<00:04,  6.27trial/s, best loss: -0.9692837225514116]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 42%|███████████████████▋                           | 21/50 [00:03<00:04,  6.57trial/s, best loss: -0.9692837225514116]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 44%|████████████████████▋                          | 22/50 [00:03<00:04,  6.50trial/s, best loss: -0.9692837225514116]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




 46%|█████████████████████▌                         | 23/50 [00:03<00:04,  6.71trial/s, best loss: -0.9692837225514116]

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




 48%|██████████████████████▌                        | 24/50 [00:03<00:03,  6.61trial/s, best loss: -0.9692837225514116]

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 52%|████████████████████████▍                      | 26/50 [00:04<00:03,  6.70trial/s, best loss: -0.9692837225514116]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 54%|█████████████████████████▍                     | 27/50 [00:04<00:03,  6.71trial/s, best loss: -0.9692837225514116]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 58%|███████████████████████████▎                   | 29/50 [00:04<00:03,  6.76trial/s, best loss: -0.9692837225514116]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 60%|████████████████████████████▏                  | 30/50 [00:04<00:03,  6.59trial/s, best loss: -0.9692837225514116]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 64%|██████████████████████████████                 | 32/50 [00:05<00:02,  6.31trial/s, best loss: -0.9692837225514116]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 66%|███████████████████████████████                | 33/50 [00:05<00:02,  6.45trial/s, best loss: -0.9692837225514116]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




 68%|███████████████████████████████▉               | 34/50 [00:05<00:02,  6.39trial/s, best loss: -0.9692837225514116]

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 72%|█████████████████████████████████▊             | 36/50 [00:05<00:02,  6.21trial/s, best loss: -0.9692837225514116]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




 74%|██████████████████████████████████▊            | 37/50 [00:05<00:02,  6.38trial/s, best loss: -0.9692837225514116]

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 76%|███████████████████████████████████▋           | 38/50 [00:05<00:01,  6.05trial/s, best loss: -0.9692837225514116]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 80%|█████████████████████████████████████▌         | 40/50 [00:06<00:01,  5.97trial/s, best loss: -0.9692837225514116]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




 82%|██████████████████████████████████████▌        | 41/50 [00:06<00:01,  6.20trial/s, best loss: -0.9692837225514116]

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




 86%|████████████████████████████████████████▍      | 43/50 [00:06<00:01,  6.43trial/s, best loss: -0.9692837225514116]

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 88%|█████████████████████████████████████████▎     | 44/50 [00:06<00:00,  6.05trial/s, best loss: -0.9692837225514116]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 90%|██████████████████████████████████████████▎    | 45/50 [00:07<00:00,  6.27trial/s, best loss: -0.9692837225514116]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 92%|███████████████████████████████████████████▏   | 46/50 [00:07<00:00,  6.24trial/s, best loss: -0.9692837225514116]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 96%|█████████████████████████████████████████████  | 48/50 [00:07<00:00,  6.41trial/s, best loss: -0.9692837225514116]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 98%|██████████████████████████████████████████████ | 49/50 [00:07<00:00,  6.25trial/s, best loss: -0.9692837225514116]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




100%|███████████████████████████████████████████████| 50/50 [00:07<00:00,  6.32trial/s, best loss: -0.9692837225514116]
best: {'colsample_bytree': 0.5164338672950186, 'learning_rate': 0.11616673497280676, 'max_depth': 7.0, 'min_child_weight': 1.0}


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



In [14]:
print('colsample_bytree:{0}, learning_rate:{1}, max_depth:{2}, min_child_weight:{3}'.format(
    round(best['colsample_bytree'], 5), round(best['learning_rate'], 5),
    int(best['max_depth']), int(best['min_child_weight'])))

colsample_bytree:0.51643, learning_rate:0.11617, max_depth:7, min_child_weight:1


In [15]:
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import f1_score, roc_auc_score

def get_clf_eval(y_test, pred=None, pred_proba=None):
    confusion = confusion_matrix( y_test, pred)
    accuracy = accuracy_score(y_test , pred)
    precision = precision_score(y_test , pred)
    recall = recall_score(y_test , pred)
    f1 = f1_score(y_test,pred)
    # ROC-AUC 추가 
    roc_auc = roc_auc_score(y_test, pred_proba)
    print('오차 행렬')
    print(confusion)
    # ROC-AUC print 추가
    print('정확도: {0:.4f}, 정밀도: {1:.4f}, 재현율: {2:.4f},\
    F1: {3:.4f}, AUC:{4:.4f}'.format(accuracy, precision, recall, f1, roc_auc))

In [16]:
xgb_wrapper = XGBClassifier(n_estimators=400,
                            learning_rate=round(best['learning_rate'], 5),
                            max_depth=int(best['max_depth']),
                            min_child_weight=int(best['min_child_weight']),
                            colsample_bytree=round(best['colsample_bytree'], 5)
                           )

evals = [(X_tr, y_tr), (X_val, y_val)]
xgb_wrapper.fit(X_tr, y_tr, early_stopping_rounds=50, eval_metric='logloss',
                eval_set=evals, verbose=True)

preds = xgb_wrapper.predict(X_test)
pred_proba = xgb_wrapper.predict_proba(X_test)[:, 1]

get_clf_eval(y_test, preds, pred_proba)


[0]	validation_0-logloss:0.59563	validation_1-logloss:0.62408
[1]	validation_0-logloss:0.51751	validation_1-logloss:0.56720
[2]	validation_0-logloss:0.45294	validation_1-logloss:0.51702
[3]	validation_0-logloss:0.39912	validation_1-logloss:0.47785
[4]	validation_0-logloss:0.35368	validation_1-logloss:0.44141
[5]	validation_0-logloss:0.31492	validation_1-logloss:0.41174
[6]	validation_0-logloss:0.28197	validation_1-logloss:0.38558
[7]	validation_0-logloss:0.25315	validation_1-logloss:0.36909
[8]	validation_0-logloss:0.22648	validation_1-logloss:0.34847
[9]	validation_0-logloss:0.20388	validation_1-logloss:0.33189
[10]	validation_0-logloss:0.18376	validation_1-logloss:0.31765
[11]	validation_0-logloss:0.16733	validation_1-logloss:0.31030
[12]	validation_0-logloss:0.15246	validation_1-logloss:0.30269
[13]	validation_0-logloss:0.13899	validation_1-logloss:0.28949
[14]	validation_0-logloss:0.12758	validation_1-logloss:0.28244
[15]	validation_0-logloss:0.11728	validation_1-logloss:0.27387
[1

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


[61]	validation_0-logloss:0.01441	validation_1-logloss:0.24414
[62]	validation_0-logloss:0.01416	validation_1-logloss:0.24620
[63]	validation_0-logloss:0.01395	validation_1-logloss:0.24583
[64]	validation_0-logloss:0.01363	validation_1-logloss:0.24771
[65]	validation_0-logloss:0.01325	validation_1-logloss:0.24371
[66]	validation_0-logloss:0.01302	validation_1-logloss:0.24425
[67]	validation_0-logloss:0.01279	validation_1-logloss:0.24570
[68]	validation_0-logloss:0.01262	validation_1-logloss:0.24565
[69]	validation_0-logloss:0.01241	validation_1-logloss:0.24483
[70]	validation_0-logloss:0.01226	validation_1-logloss:0.24449
[71]	validation_0-logloss:0.01210	validation_1-logloss:0.24369
[72]	validation_0-logloss:0.01193	validation_1-logloss:0.24317
[73]	validation_0-logloss:0.01178	validation_1-logloss:0.24351
[74]	validation_0-logloss:0.01158	validation_1-logloss:0.24153
[75]	validation_0-logloss:0.01147	validation_1-logloss:0.24100
[76]	validation_0-logloss:0.01132	validation_1-logloss: