## 深入理解xgboost十

### 基于历史预测值继续训练

In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

import xgboost as xgb

In [2]:
# 加载数据集
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [4]:
xgb_train = xgb.DMatrix(X_train, label=y_train)
xgb_test = xgb.DMatrix(X_test, label=y_test)

In [5]:
params = {
    "objective": "binary:logistic",
    "booster": "gbtree",
    "eta": 0.05,
    "max_depth": 6
}
num_round = 60
watch_list = [(xgb_train, "training"), (xgb_test, "testing")]

In [6]:
model = xgb.train(params=params, dtrain=xgb_train, num_boost_round=num_round, evals=watch_list)

[0]	training-logloss:0.64845	testing-logloss:0.65376
[1]	training-logloss:0.60802	testing-logloss:0.61785
[2]	training-logloss:0.57111	testing-logloss:0.58557
[3]	training-logloss:0.53729	testing-logloss:0.55530
[4]	training-logloss:0.50619	testing-logloss:0.52757
[5]	training-logloss:0.47751	testing-logloss:0.50274
[6]	training-logloss:0.45100	testing-logloss:0.47952
[7]	training-logloss:0.42644	testing-logloss:0.45837
[8]	training-logloss:0.40363	testing-logloss:0.43853
[9]	training-logloss:0.38283	testing-logloss:0.42101
[10]	training-logloss:0.36302	testing-logloss:0.40390
[11]	training-logloss:0.34478	testing-logloss:0.38779
[12]	training-logloss:0.32709	testing-logloss:0.37103
[13]	training-logloss:0.31055	testing-logloss:0.35520
[14]	training-logloss:0.29524	testing-logloss:0.34180
[15]	training-logloss:0.28069	testing-logloss:0.32786
[16]	training-logloss:0.26721	testing-logloss:0.31616
[17]	training-logloss:0.25441	testing-logloss:0.30479
[18]	training-logloss:0.24254	testing-

In [7]:
pred_train = model.predict(xgb_train, output_margin=True)
pred_test = model.predict(xgb_test, output_margin=True)

In [8]:
# 设置预测值为初始值，这里设置的初始值需要为未转化之前的值。
xgb_train.set_base_margin(pred_train)
xgb_test.set_base_margin(pred_test)

In [9]:
model = xgb.train(params=params, dtrain=xgb_train, num_boost_round=num_round, evals=watch_list)

[0]	training-logloss:0.04919	testing-logloss:0.14802
[1]	training-logloss:0.04787	testing-logloss:0.14712
[2]	training-logloss:0.04650	testing-logloss:0.14574
[3]	training-logloss:0.04518	testing-logloss:0.14551
[4]	training-logloss:0.04400	testing-logloss:0.14524
[5]	training-logloss:0.04282	testing-logloss:0.14432
[6]	training-logloss:0.04172	testing-logloss:0.14330
[7]	training-logloss:0.04062	testing-logloss:0.14226
[8]	training-logloss:0.03958	testing-logloss:0.14197
[9]	training-logloss:0.03857	testing-logloss:0.14150
[10]	training-logloss:0.03766	testing-logloss:0.14099
[11]	training-logloss:0.03674	testing-logloss:0.14082
[12]	training-logloss:0.03585	testing-logloss:0.14034
[13]	training-logloss:0.03501	testing-logloss:0.14032
[14]	training-logloss:0.03419	testing-logloss:0.14024
[15]	training-logloss:0.03345	testing-logloss:0.13989
[16]	training-logloss:0.03270	testing-logloss:0.13924
[17]	training-logloss:0.03198	testing-logloss:0.13945
[18]	training-logloss:0.03130	testing-