## [作業重點]
了解如何使用 Sklearn 中的 hyper-parameter search 找出最佳的超參數

### 作業
請使用不同的資料集，並使用 hyper-parameter search 的方式，看能不能找出最佳的超參數組合

In [13]:
from sklearn.model_selection import train_test_split,KFold,GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import metrics,datasets
import numpy as np

iris = datasets.load_iris()
x_train,x_test,y_train,y_test = train_test_split(iris.data,iris.target,test_size = .25,random_state = 1)

n_estimators = np.arange(2,10,2)
print(n_estimators)
max_depth = np.arange(5,50,10)
print(max_depth)
learning_rate = np.arange(.01,.1,.01)
print(learning_rate)
min_samples_split = np.arange(10,50,20)
print(min_samples_split)

clf = GradientBoostingClassifier()

param_grid = dict({'n_estimators' : n_estimators,'max_depth': max_depth,'min_samples_split':min_samples_split,'learning_rate': learning_rate})
grid_search = GridSearchCV(clf,param_grid,scoring = 'accuracy',n_jobs = -1,verbose = 1)

grid_search_result = grid_search.fit(x_train,y_train)

[2 4 6 8]
[ 5 15 25 35 45]
[0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09]
[10 30]
Fitting 5 folds for each of 360 candidates, totalling 1800 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 200 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done 1400 tasks      | elapsed:    8.0s
[Parallel(n_jobs=-1)]: Done 1800 out of 1800 | elapsed:   10.2s finished


In [14]:
print(f"best_score:{grid_search_result.best_score_}")
print(f"best_params:{grid_search_result.best_params_}")

best_score:0.9466403162055336
best_params:{'learning_rate': 0.02, 'max_depth': 5, 'min_samples_split': 10, 'n_estimators': 6}


In [15]:
#normal_form

clf.fit(x_train,y_train)
predict_y = clf.predict(x_test)

print(f"mean_squared_error:{metrics.accuracy_score(predict_y,y_test)}")

mean_squared_error:0.9736842105263158


In [16]:
#best_params

best_clf = GradientBoostingClassifier(n_estimators = grid_search_result.best_params_['n_estimators'],
                                     max_depth = grid_search_result.best_params_['max_depth'],
                                     learning_rate = grid_search_result.best_params_['learning_rate'],
                                     min_samples_split = grid_search_result.best_params_['min_samples_split'])
best_clf.fit(x_train,y_train)
predict_y_fixed = best_clf.predict(x_test)

print(f"accuracy_score:{metrics.accuracy_score(predict_y_fixed,y_test)}")

accuracy_score:0.9736842105263158
