# 🙆🏻 模型微调

In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

X, y = load_breast_cancer(return_X_y=True, as_frame=True)
y[0:15] = 2  # 手动将2分类问题修改成3分类问题对应下面的`Dist=k_categorical(3)`
X_cls_train, X_cls_test, Y_cls_train, Y_cls_test = train_test_split(X, y, test_size=0.2)

## 壹丨分阶段预测

NGBoost 的 Staged Prediction 是一种用于获取模型在不同训练阶段的预测结果的功能。这种方法允许用户在训练过程中查看模型在每个阶段的性能

In [2]:
from ngboost import NGBClassifier
from ngboost.distns import k_categorical, Bernoulli
from ngboost.scores import LogScore

ngb_cls = NGBClassifier(Dist=k_categorical(3), Score=LogScore, n_estimators=500, verbose=False).fit(X_cls_train, Y_cls_train)

In [None]:
ngb_cls.staged_predict(X_cls_train)

## 贰丨提前结束

将一个整数`early_stopping_rounds`和一个验证集(`X_val`, `Y_val`)传递给`fit()`，则会在验证损失+`early_stopping_rounds`后停止算法

验证集数据权重可以通过`val_sample_weight`参数传给`fit()`

In [4]:
from ngboost import NGBRegressor

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

X, Y = fetch_california_housing(return_X_y=True, as_frame=True)
X_reg_train, X_reg_test, Y_reg_train, Y_reg_test = train_test_split(X, Y, test_size=0.2)

_ = NGBRegressor().fit(X_reg_train, Y_reg_train, X_val=X_reg_test, Y_val=Y_reg_test, early_stopping_rounds=2)

[iter 0] loss=1.5641 val_loss=1.5444 scale=1.0000 norm=1.1066
[iter 100] loss=1.1366 val_loss=1.1271 scale=2.0000 norm=1.5541
[iter 200] loss=0.9167 val_loss=0.9139 scale=1.0000 norm=0.7047
[iter 300] loss=0.7755 val_loss=0.7840 scale=1.0000 norm=0.6812
[iter 400] loss=0.7027 val_loss=0.7235 scale=1.0000 norm=0.6786


## 叁丨使用sklearn模型选择

In [5]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from ngboost.distns import Exponential,Normal

b1 = DecisionTreeRegressor(criterion='friedman_mse', max_depth=2)
b2 = DecisionTreeRegressor(criterion='friedman_mse', max_depth=4)

param_grid = {
    'minibatch_frac': [1.0, 0.5],
    'Base': [b1, b2]
}

ngb = NGBRegressor(Dist=Normal, verbose=False)

grid_search = GridSearchCV(ngb, param_grid=param_grid, cv=5)
grid_search.fit(X_reg_train, Y_reg_train)
print(grid_search.best_params_)

{'Base': DecisionTreeRegressor(criterion='friedman_mse', max_depth=2), 'minibatch_frac': 1.0}
