In [1]:
import numpy as np

from sklearn.datasets import load_breast_cancer

data = load_breast_cancer()

X = data.data
y = data.target

In [2]:
from sklearn.model_selection import ShuffleSplit
ss = ShuffleSplit(n_splits=1, 
                  train_size=0.8, 
                  test_size=0.2, 
                  random_state=0)

train_index, test_index = next(ss.split(X, y))

X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]

In [3]:
from sklearn import linear_model
clf = linear_model.LogisticRegression()

In [4]:
clf.fit(X_train, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [5]:
clf.score(X_test, y_test)

0.956140350877193

In [6]:
y_pred = clf.predict(X_test)

In [7]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix


In [8]:
accuracy_score(y_test, y_pred)

0.956140350877193

In [9]:
cmat = confusion_matrix(y_test, y_pred)
cmat

array([[46,  1],
       [ 4, 63]], dtype=int64)

In [10]:
cmat.sum(), 
cmat.diagonal().sum(), 
cmat.diagonal().sum() / cmat.sum()

0.956140350877193

In [11]:
TP = cmat[0,0]
TN = cmat[1,1]
FP = cmat[1,0]
FN = cmat[0,1]
TP, TN, FP, FN

(46, 63, 4, 1)

In [12]:
from sklearn.metrics import classification_report

In [13]:
print(classification_report(y_test, y_pred, digits=4))

              precision    recall  f1-score   support

           0     0.9200    0.9787    0.9485        47
           1     0.9844    0.9403    0.9618        67

   micro avg     0.9561    0.9561    0.9561       114
   macro avg     0.9522    0.9595    0.9551       114
weighted avg     0.9578    0.9561    0.9563       114



In [14]:
recall_0 = TP / (TP + FN)

# = 46/(46+1) class 0 recall 再現率，
#             sensitivity 感度,
#             True positive rate (TPR)

recall_0

0.9787234042553191

In [15]:
precision_0 = TP / (TP + FP)
# = 46/(46+4) class 0 precision 適合度，精度

precision_0

0.92

In [16]:
recall_1 = TN / (FP + TN)
# 63 / (63 + 4) class 1 recall, 
#               specificity 特異度

specificity = recall_1
recall_1

0.9402985074626866

In [17]:
FP / (FP + TN) 
# False positive rate (FPR) = 1 - specificity

0.05970149253731343

In [18]:
precision_1 = TN / (TN + FN)
# = 63/(63+1) class 1 precision

precision_1

0.984375

In [19]:
f1_0 = 2 * recall_0 * precision_0 / \
       (recall_0 + precision_0)
# = 2 / (1/recall_0 + 1/precision_0)

f1_0

0.9484536082474226

In [20]:
f1_1 = 2 * recall_1 * precision_1 / \
       (recall_1 + precision_1)
# = 2 / (1/recall_1 + 1/precision_1)

f1_1

0.9618320610687023

In [21]:
from sklearn.metrics import f1_score

f1_score(y_test, y_pred, pos_label=0), f1_score(y_test, y_pred, pos_label=1)

(0.9484536082474226, 0.9618320610687023)

In [22]:
from sklearn.metrics import fbeta_score
fbeta_score(y_test, y_pred, beta=1, pos_label=0), fbeta_score(y_test, y_pred, beta=1, pos_label=1)

(0.9484536082474226, 0.9618320610687023)

In [23]:
from sklearn.metrics import precision_recall_fscore_support
precision_recall_fscore_support(y_test, y_pred, beta=1)

(array([0.92    , 0.984375]),
 array([0.9787234 , 0.94029851]),
 array([0.94845361, 0.96183206]),
 array([47, 67], dtype=int64))

In [24]:
(precision_0 + precision_1) / 2

0.9521875

In [25]:
(precision_0 * 47 + precision_1 * 67) / 114

0.9578344298245615

10 class problem

In [26]:
from sklearn.datasets import load_digits
data = load_digits()

X = data.data
y = data.target

In [27]:
from sklearn.model_selection import ShuffleSplit
ss = ShuffleSplit(n_splits=1, 
                  train_size=0.8, 
                  test_size=0.2,
                  random_state=0)

train_index, test_index = next(ss.split(X, y))

X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]

In [28]:
clf.fit(X_train, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [29]:
clf.score(X_test, y_test)

0.95

In [30]:
y_pred = clf.predict(X_test)

In [31]:
accuracy_score(y_test, y_pred)

0.95

In [32]:
confusion_matrix(y_test, y_pred)

array([[27,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0, 31,  0,  0,  0,  0,  1,  0,  3,  0],
       [ 0,  0, 34,  2,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0, 29,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0, 30,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0, 39,  0,  0,  0,  1],
       [ 0,  1,  0,  0,  0,  0, 43,  0,  0,  0],
       [ 0,  1,  0,  0,  1,  0,  0, 37,  0,  0],
       [ 0,  2,  1,  0,  0,  0,  0,  0, 35,  1],
       [ 0,  0,  0,  1,  0,  1,  0,  0,  2, 37]], dtype=int64)

In [33]:
print(classification_report(y_test, y_pred, digits=4))

              precision    recall  f1-score   support

           0     1.0000    1.0000    1.0000        27
           1     0.8857    0.8857    0.8857        35
           2     0.9714    0.9444    0.9577        36
           3     0.9062    1.0000    0.9508        29
           4     0.9677    1.0000    0.9836        30
           5     0.9750    0.9750    0.9750        40
           6     0.9773    0.9773    0.9773        44
           7     1.0000    0.9487    0.9737        39
           8     0.8750    0.8974    0.8861        39
           9     0.9487    0.9024    0.9250        41

   micro avg     0.9500    0.9500    0.9500       360
   macro avg     0.9507    0.9531    0.9515       360
weighted avg     0.9509    0.9500    0.9500       360

