## 深入理解xgboost十五

### 交叉验证

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.model_selection import cross_val_score
from sklearn import svm
from sklearn import metrics

In [2]:
# 加载数据集
iris = datasets.load_breast_cancer()

# 模型配置
clf = svm.SVC(kernel="linear", C=1)

In [3]:
# 交叉验证
scores = cross_val_score(estimator=clf, X=iris.data, y=iris.target, cv=5, scoring="f1_macro")
scores

array([0.94222973, 0.92382225, 0.97158288, 0.91474865, 0.95327876])

In [4]:
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score

In [5]:
X = iris.data
y = iris.target

In [6]:
kf = KFold(n_splits=5, shuffle=True)
for train_idx, test_idx in kf.split(X):
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]
    model = svm.SVC(kernel="linear", C=1)
    model.fit(X=X_train, y=y_train)
    y_pred = model.predict(X=X_test)
    print(f1_score(y_true=y_test, y_pred=y_pred, average="macro"))

0.9526381387619443
0.9407894736842105
0.9605263157894737
0.9364396654719236
0.9633354964308889


In [7]:
from sklearn import metrics

In [8]:
kf = KFold(n_splits=5, shuffle=True)
for train_idx, test_idx in kf.split(X):
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]
    model = svm.SVC(kernel="linear", C=1)
    model.fit(X=X_train, y=y_train)
    y_pred = model.predict(X=X_test)

### 准确率

In [9]:
accuracy_score = metrics.accuracy_score(y_true=y_test, y_pred=y_pred)
accuracy_score

0.9469026548672567

### ACU值

In [10]:
auc = metrics.roc_auc_score(y_test, y_pred)
auc

0.9411764705882353

### 分类报告

In [11]:
classification_report = metrics.classification_report(y_true=y_test, y_pred=y_pred)
print(classification_report)

              precision    recall  f1-score   support

           0       1.00      0.88      0.94        51
           1       0.91      1.00      0.95        62

    accuracy                           0.95       113
   macro avg       0.96      0.94      0.95       113
weighted avg       0.95      0.95      0.95       113



### f1值

In [12]:
f1_score = metrics.f1_score(y_true=y_test, y_pred=y_pred)
f1_score

0.9538461538461539

### top K准确率

In [17]:
top_k_accuracy_score = metrics.top_k_accuracy_score(y_true=y_test, y_score=y_pred, k=1)
top_k_accuracy_score

0.9469026548672567