# 模型评估方法

In [20]:
import numpy as np
import os
%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
import warnings
warnings.filterwarnings('ignore')
np.random.seed(42)

# 数据集读取
* Mnist 数据是：(28, 28, 1)的手写体灰度图像数据

In [21]:
from sklearn.datasets import load_digits
mnist = load_digits()
mnist

{'data': array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ..., 10.,  0.,  0.],
        [ 0.,  0.,  0., ..., 16.,  9.,  0.],
        ...,
        [ 0.,  0.,  1., ...,  6.,  0.,  0.],
        [ 0.,  0.,  2., ..., 12.,  0.,  0.],
        [ 0.,  0., 10., ..., 12.,  1.,  0.]]),
 'target': array([0, 1, 2, ..., 8, 9, 8]),
 'frame': None,
 'feature_names': ['pixel_0_0',
  'pixel_0_1',
  'pixel_0_2',
  'pixel_0_3',
  'pixel_0_4',
  'pixel_0_5',
  'pixel_0_6',
  'pixel_0_7',
  'pixel_1_0',
  'pixel_1_1',
  'pixel_1_2',
  'pixel_1_3',
  'pixel_1_4',
  'pixel_1_5',
  'pixel_1_6',
  'pixel_1_7',
  'pixel_2_0',
  'pixel_2_1',
  'pixel_2_2',
  'pixel_2_3',
  'pixel_2_4',
  'pixel_2_5',
  'pixel_2_6',
  'pixel_2_7',
  'pixel_3_0',
  'pixel_3_1',
  'pixel_3_2',
  'pixel_3_3',
  'pixel_3_4',
  'pixel_3_5',
  'pixel_3_6',
  'pixel_3_7',
  'pixel_4_0',
  'pixel_4_1',
  'pixel_4_2',
  'pixel_4_3',
  'pixel_4_4',
  'pixel_4_5',
  'pixel_4_6',
  'pixel_4_7',
  'pixel_5_0',
  'pixel_5_1',
 

In [22]:
X, y = mnist['data'], mnist['target']
X.shape

(1797, 64)

In [23]:
X_train, X_test, y_train, y_test = X[:1000], X[1000:], y[:1000], y[1000:]
import numpy as np
# 洗牌操作，避免数据顺序影响训练结果
shuffle_index = np.random.permutation(1000)
X_train, y_train = X_train[shuffle_index], y_train[shuffle_index]

# 交叉验证
应该使用训练集中的一部分进行交叉验证，不能使用测试集交叉验证。

In [24]:
y_train_5 = (y_train==5)
y_test_5 = (y_test==5)
y_train_5[:10]

array([ True, False, False, False,  True,  True, False, False, False,
       False])

In [25]:
from sklearn.linear_model import SGDClassifier
sgd_clf = SGDClassifier(max_iter=5, random_state=42)
sgd_clf.fit(X_train, y_train_5)

In [26]:
sgd_clf.predict([X[1000]])

array([False])

In [27]:
y[1000]

1

* 使用sklearn内置方法进行交叉验证

In [28]:
from sklearn.model_selection import cross_val_score
# cv：将数据集分成几分进行交叉验证？
# score：
# - accuracy: 准确率，一般用于分类任务。
# 
cross_val_score(sgd_clf, X_train, y_train_5, cv=3, scoring="accuracy")

array([0.98802395, 0.99099099, 0.98798799])

自己进行交叉验证

In [33]:
from sklearn.model_selection import StratifiedKFold
from sklearn.base import clone
skflods =StratifiedKFold(n_splits=3, random_state=42, shuffle=True)
for train_index, test_index in skflods.split(X_train, y_train_5):
    clone_clf = clone(sgd_clf)
    X_train_folds = X_train[train_index]
    y_train_folds = y_train_5[train_index]
    X_test_folds = X_train[test_index]
    y_test_folds = y_train_5[test_index]
    clone_clf.fit(X_train_folds, y_train_folds)
    y_pred = clone_clf.predict(X_test_folds)
    n_correct = sum(y_pred == y_test_folds)
    print(n_correct/len(y_pred))

0.9760479041916168
0.987987987987988
0.990990990990991


confusion matrix - 混淆矩阵：

指标：
* recall：召回率
* TP(True positive)：正确正例，分类正确
* FP(False positive)：错误正例，分类错误
* FN(False negative)：错误负例
* TN(True negative)：正确负例


In [34]:
from sklearn.model_selection import cross_val_predict
y_train_pred = cross_val_predict(sgd_clf, X_train, y_train_5, cv=3)

In [37]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_train_5, y_train_pred)

array([[898,   2],
       [  9,  91]])

[[TN, FP],
[FN, TP]]
- TN：898个数据被正确地分为非5类别
- FP：2个被错误地分为5类别
- FN：9 个被错误地分为非 5类别
- TP：91 个被正确的分为5类别

一个完美的分类器应该只有 TP 和 TN，也就是主对角矩阵。

precision = TP / (TP + FP)
recall = TP / (TP,+ FN)
将 precision 和 recall 结合到一起，成为 F1 score 指标。使用调和平均值给予低值更多权重。
F1 = 2*(precision*recall)/(precision+recall) = TP/(TP + (FN+FP)/2)

In [41]:
from sklearn.metrics import precision_score, recall_score, f1_score
precision_score(y_train_5, y_train_pred)

0.978494623655914

In [39]:
recall_score(y_train_5, y_train_pred)

0.91

In [42]:
f1_score(y_train_5, y_train_pred)

0.9430051813471503

# 设置阈值
使用 decision_function 设置

In [44]:
y_scores = sgd_clf.decision_function([X[1000]])
y_scores
t = 5000
y_pred = (y_scores > t) # 使用阈值来决定实际预测类别。 

array([-8418.1408434])

# ROC 曲线
ROC(receiver operation characterisitic): 二元分类常用的评估方法。需要再[看看](https://www.bilibili.com/video/BV1pM411r7Pq?p=27&spm_id_from=pageDriver&vd_source=3c3402cdc224f85ec24a26ce2328caab)