# 정확도(Accuracy)

- BaseEstimator : 하이퍼파라미터 튜닝에 필요한 두 메서드 get_params()와 set_params()를 얻을 수 있음 (단, *args, **kargs를 사용하면 안됨)
- TransformerMixin : fit_transform()을 가능하게 해주는 객체

In [None]:
from re import L
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin

class DummyClassifier(BaseEstimator, TransformerMixin):
    # fit()생성
    def fit(self, X, y = None):
        pass

    # predict() 메소드 생성
    ## 단순히 sex가 1이면 0, 0이면 1로 예측
    def predict(self, x):
        pred = np.zeros( (x.shape[0], 1))
        for i in range(x.shape[0]):
            if x['Sex'].iloc[i] == 1:
                pred[i] = 0
            else:
                pred[i] = 1
        return pred


In [6]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd

class Classifier(BaseEstimator, TransformerMixin):
    def fit(self, X, y):
        pass

    # 입력값으로 들어오는 x 뎅
    def predict(self, X):
        return np.zeros( (len(X), 1), dtype = bool)
digits = load_digits()
y = (digits.target == 7).astype(int)
X_train, X_test, y_train, y_test = train_test_split(digits.data, y, test_size = 0.2, random_state = 1214)

# 학습
model = Classifier()

model.fit(X_train, y_train)

pred = model.predict(X_test)

accuracy = accuracy_score(y_test, pred)
print(accuracy)


0.9138888888888889


# 오차행렬(Confusion Matrix)

In [7]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, pred)

array([[329,   0],
       [ 31,   0]])

# 정밀도(Precision)과 재현율(Recall)

In [8]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

print('정밀도 :', precision_score(y_test, pred))
print('재현율 :', recall_score(y_test, pred))

정밀도 : 0.0
재현율 : 0.0


  _warn_prf(average, modifier, msg_start, len(result))


## Threshold를 통해 Binary화 시키기 (Binarizer 활용)

In [9]:
from sklearn.preprocessing import Binarizer

X = [[ 1, -1, 2],
     [ 2, 0, 0 ],
     [0, 1.1, 1.2]]
binarizer = Binarizer(threshold = 1)
binarizer.fit_transform(X)

array([[0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 1.]])

# 임곗값의 변경에 따른 정밀도-재현율 변화 곡선

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.metrics import precision_recall_curve
%matplotlib inline

def precision_recall_curve_plot(y_test, pred_proba_c1):
    # threshold ndarray와 threshold에 따른 정밀도, 재현율 ndarray 추출
    precisions, recalls, thresholds = precision_recall_curve(y_test, pred_proba_c1)

    plt.figure(figsize = (12,8))
    threshold_boundary = thresholds.shape[0]
    plt.plot(thresholds, precisions[0:threshold_boundary], linestyle = '--', label = 'precision')
    plt.plot(thresholds, recalls[0:thresholds], label = 'recall')

    start, end = plt.xlim()
    plt.xticks(np.round(np.arange(start, end, 0.1), 2))

    plt.xlabel('Threshold value')
    plt.ylabel('Precision and Recall Value')
    plt.legend()
    plt.grid()
    plt.show()




precision_recall_curve_plot(y_test, predict_proba)

# F1 Score

In [11]:
from sklearn.metrics import f1_score
f1 = f1_score(y_test, pred)
f1

0.0

# ROC-AUC Curve

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score

def roc_curve_plot(y_test, pred_proba):
    fprs, tprs, thresholds = roc_curve(y_Test, pred_proba)

    plt.plot(figsize = (12,8))

    plt.plot([0,1], [0,1], 'k--', label = 'Random')

    start, end = plt.xlim()
    plt.xticks(np.round(np.arange(start, end, 0.1),2))
    plt.xlim(0,1)
    plt.ylim(0,1)
    plt.xlabel('FPR(1 - Sensitivity)')
    plt.ylabel('TPR( Recall )')
    plt.legend()
    plt.show()

