# 实现混淆矩阵，精准率和召回率

In [2]:
import numpy as np
from sklearn import datasets

In [14]:
digits = datasets.load_digits()
X = digits.data
y = digits.target.copy()

In [18]:
y[digits.target == 9] = 1
y[digits.target != 9] = 0

In [20]:
from sklearn.model_selection import  train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=666)

In [21]:
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression()
log_reg.fit(X_train,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [22]:
log_reg.score(X_test,y_test)

0.9755555555555555

In [23]:
y_log_predict = log_reg.predict(X_test)

In [24]:
def TN(y_true,y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 0) & (y_predict == 0))

In [25]:
TN(y_test,y_log_predict)

403

In [27]:
def FP(y_true,y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 0) & (y_predict == 1))

FP(y_test,y_log_predict)

2

In [28]:
def FN(y_true,y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 1) & (y_predict == 0))

FN(y_test,y_log_predict)

9

In [29]:
def TP(y_true,y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 1) & (y_predict == 1))

TP(y_test,y_log_predict)

36

In [30]:
def confusion_matric(y_true,y_predict):
    return np.array([
        [TN(y_test,y_log_predict),FP(y_test,y_log_predict)],
        [FN(y_test,y_log_predict),TP(y_test,y_log_predict)]
    ])

In [31]:
confusion_matric(y_test,y_log_predict)

array([[403,   2],
       [  9,  36]])

In [32]:
def precision_socre(y_true,y_predict):
    tp = TP(y_true,y_predict)
    fp = FP(y_true,y_predict)
    try:
        return tp / (tp + fp)
    except:
        return 0.0

In [34]:
precision_socre(y_test,y_log_predict)

0.9473684210526315

In [35]:
def recall_socre(y_true,y_predict):
    tp = TP(y_true,y_predict)
    fn = TN(y_true,y_predict)
    try:
        return tp / (tp + fn)
    except:
        return 0.0

In [36]:
recall_socre(y_test,y_log_predict)

0.08200455580865604

### scikit-learn中的混淆矩阵，精准率和召回率

In [37]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test,y_log_predict)

array([[403,   2],
       [  9,  36]], dtype=int64)

In [38]:
from sklearn.metrics import precision_score
precision_socre(y_test,y_log_predict)

0.9473684210526315

In [39]:
from sklearn.metrics import recall_score
recall_socre(y_test,y_log_predict)

0.08200455580865604