### 作業
請使用不同的資料集，並使用 hyper-parameter search 的方式，看能不能找出最佳的超參數組合

In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, KFold, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score
from sklearn.ensemble import GradientBoostingClassifier
from scipy.stats import uniform, randint

In [2]:
cancer = load_breast_cancer()
x_train, x_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.2, random_state=4)

gb_classifier = GradientBoostingClassifier()
gb_classifier.fit(x_train, y_train)

gb_pred = gb_classifier.predict(x_test)
acc = accuracy_score(y_test, gb_pred)
print("Accuracy:", acc)

Accuracy: 0.9385964912280702


### GridSearch超參數

In [3]:
# 定義要調整的超參數網格
param_grid = {
     'n_estimators': [50, 100, 150, 200],
     'learning_rate': [0.01, 0.1, 0.2],
     # 增加更多要調整的超參數及其候選值
}

In [4]:
# 初始化 GradientBoostingClassifier
gb_classifier = GradientBoostingClassifier()

# 使用 GridSearchCV 進行超參數搜索
grid_search = GridSearchCV(gb_classifier, param_grid, cv=5)
grid_search.fit(x_train, y_train)

GridSearchCV(cv=5, estimator=GradientBoostingClassifier(),
             param_grid={'learning_rate': [0.01, 0.1, 0.2],
                         'n_estimators': [50, 100, 150, 200]})

In [5]:
# 取得最佳參數和最佳模型
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# 使用最佳模型進行預測
gb_pred = best_model.predict(x_test)
acc = accuracy_score(y_test, gb_pred)
print("Best Parameters:", best_params)
print("Accuracy:", acc)

Best Parameters: {'learning_rate': 0.1, 'n_estimators': 150}
Accuracy: 0.9473684210526315


### RandomizedSearch超參數

In [8]:
# 定義要調整的超參數分佈
param_dist = {
     'n_estimators': randint(50, 200), # 隨機選取50到200之間的整數
     'learning_rate': uniform(0.01, 0.2), # 在0.01到0.2之間的均勻分佈中隨機選擇
     # 增加更多要調整的超參數及其分佈
}

# 初始化 GradientBoostingClassifier
gb_classifier = GradientBoostingClassifier()

# 使用 RandomizedSearchCV 進行超參數搜索
random_search = RandomizedSearchCV(gb_classifier, param_distributions=param_dist, n_iter=10, cv=5)
random_search.fit(x_train, y_train)

# 取得最佳參數和最佳模型
best_params = random_search.best_params_
best_model = random_search.best_estimator_

# 使用最佳模型進行預測
gb_pred = best_model.predict(x_test)
acc = accuracy_score(y_test, gb_pred)
print("Best Parameters:", best_params)
print("Accuracy:", acc)

Best Parameters: {'learning_rate': 0.14905108535608255, 'n_estimators': 173}
Accuracy: 0.9473684210526315
