# Metrics untuk Binary Classification

In [3]:
# imbalanced datasets
from sklearn.datasets import load_digits

digits = load_digits()
y = digits.target == 9

In [4]:
# splitting dataset
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(digits.data, y, random_state=30)

In [5]:
from sklearn.dummy import DummyClassifier

dummy_majority = DummyClassifier(strategy='most_frequent').fit(X_train, y_train)
pred_most_frequent = dummy_majority.predict(X_test)

print("Test score: {:.2f}".format(dummy_majority.score(X_test, y_test)))

Test score: 0.92


In [6]:
from sklearn.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier(max_depth=2).fit(X_train, y_train)
pred_tree = tree.predict(X_test)

print("Test score: {:.2f}".format(tree.score(X_test, y_test)))

Test score: 0.92


In [7]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression(C=0.1).fit(X_train, y_train)
pred_logreg = logreg.predict(X_test)

print("Test score: {:.2f}".format(logreg.score(X_test, y_test)))

Test score: 0.97


In [9]:
# confusion matrix
from sklearn.metrics import confusion_matrix

confusion = confusion_matrix(y_test, pred_logreg)
tn, fp, fn, tp = confusion_matrix(y_test, pred_logreg).ravel()
print("Confusion matrix:\n{}".format(confusion))

Confusion matrix:
[[408   5]
 [  7  30]]


In [11]:
print("tn: {}".format(tn))
print("fp: {}".format(fp))
print("fn: {}".format(fn))
print("tp: {}".format(tp))

tn: 408
fp: 5
fn: 7
tp: 30


In [15]:
print("\nDummy model:")
print(confusion_matrix(y_test, pred_most_frequent))
print("\nDecision tree:")
print(confusion_matrix(y_test, pred_tree))
print("\nLogistic Regression:")
print(confusion_matrix(y_test, pred_logreg))


Dummy model:
[[413   0]
 [ 37   0]]

Decision tree:
[[399  14]
 [ 21  16]]

Logistic Regression:
[[408   5]
 [  7  30]]


In [17]:
from sklearn.metrics import f1_score
print("f1 score dummy: {:.2f}".format(f1_score(y_test, pred_most_frequent)))
print("f1 score decision tree: {:.2f}".format(f1_score(y_test, pred_tree)))
print("f1 score logistic regression: {:.2f}".format(f1_score(y_test, pred_logreg)))

f1 score dummy: 0.00
f1 score decision tree: 0.48
f1 score logistic regression: 0.83


In [20]:
from sklearn.metrics import classification_report
print(classification_report(y_test, pred_most_frequent, target_names=["not nine", "nine"]))

             precision    recall  f1-score   support

   not nine       0.92      1.00      0.96       413
       nine       0.00      0.00      0.00        37

avg / total       0.84      0.92      0.88       450



In [21]:
print(classification_report(y_test, pred_tree, target_names=["not nine", "nine"]))

             precision    recall  f1-score   support

   not nine       0.95      0.97      0.96       413
       nine       0.53      0.43      0.48        37

avg / total       0.92      0.92      0.92       450



In [22]:
print(classification_report(y_test, pred_logreg, target_names=["not nine", "nine"]))

             precision    recall  f1-score   support

   not nine       0.98      0.99      0.99       413
       nine       0.86      0.81      0.83        37

avg / total       0.97      0.97      0.97       450



# Metrics untuk Multiclass Classification

In [24]:
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, random_state=30)

lr = LogisticRegression().fit(X_train, y_train)
pred = lr.predict(X_test)

print("Accuracy score: {:.3f}".format(accuracy_score(y_test, pred)))
print("Confusion matrix:\n{}".format(confusion_matrix(y_test, pred)))

Accuracy score: 0.951
Confusion matrix:
[[43  0  0  0  0  0  0  1  0  0]
 [ 0 44  0  1  0  0  1  0  4  1]
 [ 0  0 48  1  0  0  0  0  0  0]
 [ 0  0  0 34  0  0  0  0  2  1]
 [ 0  0  0  0 49  0  0  0  0  1]
 [ 0  1  0  0  0 45  0  0  0  1]
 [ 0  0  0  0  0  0 53  0  1  0]
 [ 0  0  0  0  0  0  0 41  0  0]
 [ 0  2  0  1  0  0  0  0 37  0]
 [ 0  0  0  1  0  0  0  0  2 34]]


In [25]:
print(classification_report(y_test, pred))

             precision    recall  f1-score   support

          0       1.00      0.98      0.99        44
          1       0.94      0.86      0.90        51
          2       1.00      0.98      0.99        49
          3       0.89      0.92      0.91        37
          4       1.00      0.98      0.99        50
          5       1.00      0.96      0.98        47
          6       0.98      0.98      0.98        54
          7       0.98      1.00      0.99        41
          8       0.80      0.93      0.86        40
          9       0.89      0.92      0.91        37

avg / total       0.95      0.95      0.95       450



In [27]:
# f1-score
# micro: menghitung rata-rata dari jumlah FP, FN, TP pada semua kelas
print("Micro average f1 score: {:.3f}".format(f1_score(y_test, pred, average="micro")))

# macro: menghitung rata-rata dari tiap kelas yang tidak berbobot  
print("Macro average f1 score: {:.3f}".format(f1_score(y_test, pred, average="macro")))

Micro average f1 score: 0.951
Macro average f1 score: 0.949


# Regression Metrics

In [30]:
from sklearn.metrics import r2_score

y_true = [0.5, 2.1, 3.5]
y_pred = [0.7, 1.9, 3.1]

print("r2 score: {:.3f}".format(r2_score(y_true, y_pred)))

r2 score: 0.947


In [31]:
y_true = [1, 2, 3]
y_pred = [1, 2, 3]

print("r2 score: {:.3f}".format(r2_score(y_true, y_pred)))

r2 score: 1.000


In [32]:
y_true = [4, 2, 1]
y_pred = [3, 1, 4]

print("r2 score: {:.3f}".format(r2_score(y_true, y_pred)))

r2 score: -1.357
