In [5]:
import numpy as np

from sklearn.datasets import load_breast_cancer

data = load_breast_cancer()

X = data.data
y = data.target

In [6]:
from sklearn.model_selection import ShuffleSplit
ss = ShuffleSplit(n_splits=1, 
                  train_size=0.8, 
                  test_size=0.2, 
                  random_state=0)

train_index, test_index = next(ss.split(X, y))

X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]

In [7]:
from sklearn import linear_model
clf = linear_model.LogisticRegression()

In [8]:
clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.956140350877193

In [9]:
y_pred = clf.predict(X_test)

In [11]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

In [12]:
accuracy_score(y_test, y_pred)

0.956140350877193

In [13]:
cmat = confusion_matrix(y_test, y_pred)
cmat

array([[46,  1],
       [ 4, 63]], dtype=int64)

In [17]:
cmat.sum(),
cmat.diagonal().sum(), # diagonal => 対角線の合計値を計算する
cmat.sum() / cmat.diagonal().sum() # 認識率

1.0458715596330275

In [18]:
TP = cmat[0,0]
TN = cmat[1,1]
FP = cmat[1,0]
FN = cmat[0,1]
TP, TN, FP, FN

(46, 63, 4, 1)

In [19]:
from sklearn.metrics import classification_report

In [20]:
print(classification_report(y_test, y_pred, digits=4))

             precision    recall  f1-score   support

          0     0.9200    0.9787    0.9485        47
          1     0.9844    0.9403    0.9618        67

avg / total     0.9578    0.9561    0.9563       114



In [24]:
# recall とは・・・
# 上の例でいれば、悪性の判定がどれだけ漏らさず再現できたか
# (TP, TN, FP, FN) => (46, 63, 4, 1)
recall_0 = TP/ (TP + FN)

# = 46/(46+1) class 0 recall 再現率，

#             sensitivity 感度,
#             True positive rate (TPR)

recall_0

0.9787234042553191

In [25]:
# precisionとは。。。名前どおり予測
# 上の例でいれば、悪性の判定がどれだけ正確に行われたか
# (TP, TN, FP, FN) => (46, 63, 4, 1)

precision_0 = TP / (TP + FP)
# = 46/(46+4) class 0 precision 適合度，精度

precision_0

0.92

In [31]:
FP / (FP + TN)  # Negativeでなければならない判定をどれだけ間違えたか
# False positive rate (FPR) = 1 - specificity

0.05970149253731343

In [32]:
# (TP, TN, FP, FN) => (46, 63, 4, 1)
# 例でいえば、陰性反応の場合

recall_1 = TN /(TN + FP) # 63 / (63 + 4)
recall_1

0.9402985074626866

In [33]:
# (TP, TN, FP, FN) => (46, 63, 4, 1)
# 例でいえば、陰性反応の場合

precision_1 = TN / (TN + FN)
# = 63/(63+1) class 1 precision

precision_1

0.984375

In [34]:
# f1 scoreとは。。。
# recall_0とprecision_0の逆数の平均値

f1_0 = 2 * recall_0 * precision_0 / \
       (recall_0 + precision_0)
# = 2 / (1/recall_0 + 1/precision_0)

f1_0

0.9484536082474226

In [35]:
# こちらは、recall_1とprecision_1の逆数の平均値

f1_1 = 2 * recall_1 * precision_1 / \
       (recall_1 + precision_1)
# = 2 / (1/recall_1 + 1/precision_1)

f1_1

0.9618320610687023

In [37]:
# f1 scoreは以下のモジュールでも計算できる
from sklearn.metrics import f1_score

In [40]:
f1_score(y_test, y_pred, pos_label=0) #陽性検出時のf1 score 

0.9484536082474226

In [42]:
f1_score(y_test, y_pred, pos_label=1) #陰性検出時のf1 score

0.9618320610687023

In [44]:
# レポートの結果を以下のモジュールでもだせる

from sklearn.metrics import precision_recall_fscore_support
precision_recall_fscore_support(y_test, y_pred, beta=1)

(array([0.92    , 0.984375]),
 array([0.9787234 , 0.94029851]),
 array([0.94845361, 0.96183206]),
 array([47, 67], dtype=int64))