손글씨 분류
==========
-------------------------------------

+ 의사결정나무, 랜덤포레스트, SVM, SGD, logistic regression 모델

-----------------------------

+ 평가에서는 정확도, 정밀도, 재현율, F1-Score, 혼동행렬

+ 정확도는 직관적인 평가 지표

+ F1-Score는 각 클래스별 성능 종합 평가, 혼동행렬은 어떤 클래스에서 실수를 많이 하는지 구체적으로 확인할 수 있습니다.

+ 정확도, F1-score, 혼동행렬을 조합해서 평가하는 것이 좋습니다.


In [55]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

In [56]:
digits = load_digits()
digits_data=digits.data
digits_label = digits.target

X_train, X_test, y_train, y_test = train_test_split(digits_data, digits_label, test_size=0.2, random_state=7)

In [57]:
#의사결정나무
from sklearn.tree import DecisionTreeClassifier 
decision_tree = DecisionTreeClassifier(random_state=32)
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.98      0.99        43
           1       0.81      0.81      0.81        42
           2       0.79      0.82      0.80        40
           3       0.79      0.91      0.85        34
           4       0.83      0.95      0.89        37
           5       0.90      0.96      0.93        28
           6       0.84      0.93      0.88        28
           7       0.96      0.82      0.89        33
           8       0.88      0.65      0.75        43
           9       0.78      0.78      0.78        32

    accuracy                           0.86       360
   macro avg       0.86      0.86      0.86       360
weighted avg       0.86      0.86      0.85       360



In [58]:
#랜덤포레스트
from sklearn.ensemble import RandomForestClassifier
random_forest = RandomForestClassifier(random_state=32) 
random_forest.fit(X_train, y_train) 
y_pred = random_forest.predict(X_test) 

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.98      0.99        43
           1       0.93      1.00      0.97        42
           2       1.00      1.00      1.00        40
           3       1.00      1.00      1.00        34
           4       0.93      1.00      0.96        37
           5       0.90      0.96      0.93        28
           6       1.00      0.96      0.98        28
           7       0.94      0.97      0.96        33
           8       1.00      0.84      0.91        43
           9       0.94      0.94      0.94        32

    accuracy                           0.96       360
   macro avg       0.96      0.96      0.96       360
weighted avg       0.97      0.96      0.96       360



In [59]:
#SVM모델
from sklearn import svm 
svm_model = svm.SVC()
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        43
           1       0.95      1.00      0.98        42
           2       1.00      1.00      1.00        40
           3       1.00      1.00      1.00        34
           4       1.00      1.00      1.00        37
           5       0.93      1.00      0.97        28
           6       1.00      1.00      1.00        28
           7       1.00      1.00      1.00        33
           8       1.00      0.93      0.96        43
           9       1.00      0.97      0.98        32

    accuracy                           0.99       360
   macro avg       0.99      0.99      0.99       360
weighted avg       0.99      0.99      0.99       360



In [60]:
#SGD
from sklearn.linear_model import SGDClassifier 
sgd_model = SGDClassifier() 
sgd_model.fit(X_train, y_train)
y_pred = sgd_model.predict(X_test)
print(classification_report(y_test, y_pred))

print(sgd_model._estimator_type)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        43
           1       0.86      0.88      0.87        42
           2       0.95      1.00      0.98        40
           3       0.85      0.97      0.90        34
           4       0.97      1.00      0.99        37
           5       0.96      0.93      0.95        28
           6       1.00      0.93      0.96        28
           7       0.97      0.94      0.95        33
           8       0.97      0.86      0.91        43
           9       0.91      0.91      0.91        32

    accuracy                           0.94       360
   macro avg       0.94      0.94      0.94       360
weighted avg       0.94      0.94      0.94       360

classifier


In [63]:
#logistic regression
from sklearn.linear_model import LogisticRegression 
logistic_model = LogisticRegression(max_iter=200) 
logistic_model.fit(X_train, y_train)
y_pred = logistic_model.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        43
           1       0.93      0.95      0.94        42
           2       0.98      1.00      0.99        40
           3       0.94      0.97      0.96        34
           4       0.97      0.97      0.97        37
           5       0.82      0.96      0.89        28
           6       0.96      0.96      0.96        28
           7       0.97      0.97      0.97        33
           8       0.92      0.79      0.85        43
           9       0.97      0.91      0.94        32

    accuracy                           0.95       360
   macro avg       0.95      0.95      0.95       360
weighted avg       0.95      0.95      0.95       360



In [62]:
#평가

from sklearn.metrics import confusion_matrix

fake_pred = [0] * len(y_pred)

print(classification_report(y_test, y_pred))

print(classification_report(y_test, fake_pred, zero_division=0))

accuracy_score(y_test, y_pred), accuracy_score(y_test, fake_pred)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        43
           1       0.95      0.95      0.95        42
           2       0.98      1.00      0.99        40
           3       0.94      0.97      0.96        34
           4       0.97      1.00      0.99        37
           5       0.82      0.96      0.89        28
           6       1.00      0.96      0.98        28
           7       0.97      0.97      0.97        33
           8       0.92      0.81      0.86        43
           9       0.97      0.91      0.94        32

    accuracy                           0.95       360
   macro avg       0.95      0.95      0.95       360
weighted avg       0.95      0.95      0.95       360

              precision    recall  f1-score   support

           0       0.12      1.00      0.21        43
           1       0.00      0.00      0.00        42
           2       0.00      0.00      0.00        40
           3       0.00 

(0.9527777777777777, 0.11944444444444445)