와인 분류
==========
-------------------------------------

+ 의사결정나무, 랜덤포레스트, SVM, SGD, logistic regression 모델

-----------------------------

+ 평가에서는 정확도, 정밀도, 재현율, F1-Score, 혼동행렬

+ 데이터가 균형 잡혀 있으면 정확도가 제일 좋은 평가입니다.

+ F1-Score와 재현율과 정밀도는 클래스 간 불균형이 있을때 중요, 혼동행렬은 어떤 클래스에서 혼동이 발생하는지 구체적으로 확인할 수 있습니다.

In [16]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

In [17]:
wine = load_wine()
wine_data=wine.data
wine_label = wine.target

X_train, X_test, y_train, y_test = train_test_split(wine_data, wine_label, test_size=0.2, random_state=7)

In [18]:
#의사결정나무
from sklearn.tree import DecisionTreeClassifier 
decision_tree = DecisionTreeClassifier(random_state=32)
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       0.89      1.00      0.94        17
           2       1.00      0.83      0.91        12

    accuracy                           0.94        36
   macro avg       0.96      0.94      0.95        36
weighted avg       0.95      0.94      0.94        36



In [19]:
#랜덤포레스트
from sklearn.ensemble import RandomForestClassifier
random_forest = RandomForestClassifier(random_state=32) 
random_forest.fit(X_train, y_train) 
y_pred = random_forest.predict(X_test) 

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       1.00      1.00      1.00        17
           2       1.00      1.00      1.00        12

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36



In [20]:
#SVM모델
from sklearn import svm 
svm_model = svm.SVC()
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.86      0.86      0.86         7
           1       0.58      0.88      0.70        17
           2       0.33      0.08      0.13        12

    accuracy                           0.61        36
   macro avg       0.59      0.61      0.56        36
weighted avg       0.55      0.61      0.54        36



In [21]:
#SGD
from sklearn.linear_model import SGDClassifier 
sgd_model = SGDClassifier() 
sgd_model.fit(X_train, y_train)
y_pred = sgd_model.predict(X_test)
print(classification_report(y_test, y_pred))

print(sgd_model._estimator_type)

              precision    recall  f1-score   support

           0       1.00      0.29      0.44         7
           1       1.00      0.18      0.30        17
           2       0.39      1.00      0.56        12

    accuracy                           0.47        36
   macro avg       0.80      0.49      0.43        36
weighted avg       0.80      0.47      0.41        36

classifier


In [22]:
#logistic regression
from sklearn.linear_model import LogisticRegression 
logistic_model = LogisticRegression(max_iter=10000) 
logistic_model.fit(X_train, y_train)
y_pred = logistic_model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       0.94      1.00      0.97        17
           2       1.00      0.92      0.96        12

    accuracy                           0.97        36
   macro avg       0.98      0.97      0.98        36
weighted avg       0.97      0.97      0.97        36



In [25]:
from sklearn.metrics import confusion_matrix

fake_pred = [0] * len(y_pred)

print(classification_report(y_test, y_pred))

print(classification_report(y_test, fake_pred, zero_division=0))

accuracy_score(y_test, y_pred), accuracy_score(y_test, fake_pred)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       0.94      1.00      0.97        17
           2       1.00      0.92      0.96        12

    accuracy                           0.97        36
   macro avg       0.98      0.97      0.98        36
weighted avg       0.97      0.97      0.97        36

              precision    recall  f1-score   support

           0       0.19      1.00      0.33         7
           1       0.00      0.00      0.00        17
           2       0.00      0.00      0.00        12

    accuracy                           0.19        36
   macro avg       0.06      0.33      0.11        36
weighted avg       0.04      0.19      0.06        36



(0.9722222222222222, 0.19444444444444445)