# Decision Tree

In [1]:
# (1) 필요한 모듈 import
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

# (2) 데이터 준비
wine = load_wine()
wine_data = wine.data
wine_label = wine.target

print(wine.target_names)
print(wine.DESCR)

# (3) train, test 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(wine_data, 
                                                    wine_label, 
                                                    test_size=0.2, 
                                                    random_state=12)

# (4) 모델 학습 및 예측
decision_tree = DecisionTreeClassifier(random_state=32)
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test)

print(classification_report(y_test, y_pred))

# (5) 정확도 확인
from sklearn.metrics import accuracy_score
accuracy=accuracy_score(y_test, y_pred)
print("accuracy:", accuracy)

['class_0' 'class_1' 'class_2']
.. _wine_dataset:

Wine recognition dataset
------------------------

**Data Set Characteristics:**

    :Number of Instances: 178 (50 in each of three classes)
    :Number of Attributes: 13 numeric, predictive attributes and the class
    :Attribute Information:
 		- Alcohol
 		- Malic acid
 		- Ash
		- Alcalinity of ash  
 		- Magnesium
		- Total phenols
 		- Flavanoids
 		- Nonflavanoid phenols
 		- Proanthocyanins
		- Color intensity
 		- Hue
 		- OD280/OD315 of diluted wines
 		- Proline

    - class:
            - class_0
            - class_1
            - class_2
		
    :Summary Statistics:
    
                                   Min   Max   Mean     SD
    Alcohol:                      11.0  14.8    13.0   0.8
    Malic Acid:                   0.74  5.80    2.34  1.12
    Ash:                          1.36  3.23    2.36  0.27
    Alcalinity of Ash:            10.6  30.0    19.5   3.3
    Magnesium:                    70.0 162.0    99.7  14.3
   

# Random Forest

In [2]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report


# (2) 데이터 준비
wine = load_wine()
wine_data = wine.data
wine_label = wine.target

# (3) train, test 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(wine_data, 
                                                    wine_label, 
                                                    test_size=0.2, 
                                                    random_state=10)

# (4) 모델 학습 및 예측
random_forest = RandomForestClassifier(random_state=32)
random_forest.fit(X_train, y_train)
y_pred = random_forest.predict(X_test)

print(classification_report(y_test, y_pred))

# (5) 정확도 확인
from sklearn.metrics import accuracy_score
accuracy=accuracy_score(y_test, y_pred)
print("accuracy:", accuracy)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.89      0.94        18
           2       0.80      1.00      0.89         8

    accuracy                           0.94        36
   macro avg       0.93      0.96      0.94        36
weighted avg       0.96      0.94      0.95        36

accuracy: 0.9444444444444444


# Support Vector Machine (SVM)

In [3]:
# (4) 모델 학습 및 예측
from sklearn import svm
svm_model=svm.SVC()

svm_model.fit(X_train, y_train)
y_pred=svm_model.predict(X_test)

print(classification_report(y_test, y_pred))

# (5) 정확도 확인
from sklearn.metrics import accuracy_score
accuracy=accuracy_score(y_test, y_pred)
print("accuracy:", accuracy)

# random_state를 줄였더니 UndefinedMetricWarning 사라짐

              precision    recall  f1-score   support

           0       0.88      0.70      0.78        10
           1       0.81      0.72      0.76        18
           2       0.42      0.62      0.50         8

    accuracy                           0.69        36
   macro avg       0.70      0.68      0.68        36
weighted avg       0.74      0.69      0.71        36

accuracy: 0.6944444444444444


# Stochastic Gradient Descent Classifier (SGDClassifier)

In [4]:
from sklearn.linear_model import SGDClassifier
sgd_model=SGDClassifier()

sgd_model.fit(X_train, y_train)
y_pred=sgd_model.predict(X_test)

print(classification_report(y_test, y_pred))

from sklearn.metrics import accuracy_score
accuracy=accuracy_score(y_test, y_pred)
print("accuracy:", accuracy)

              precision    recall  f1-score   support

           0       0.88      0.70      0.78        10
           1       0.61      0.94      0.74        18
           2       0.00      0.00      0.00         8

    accuracy                           0.67        36
   macro avg       0.49      0.55      0.51        36
weighted avg       0.55      0.67      0.59        36

accuracy: 0.6666666666666666


  _warn_prf(average, modifier, msg_start, len(result))


# Logistic Regression

In [5]:
# (1) 필요한 모듈 import
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression


# (2) 데이터 준비
wine = load_wine()
wine_data = wine.data
wine_label = wine.target

# (3) train, test 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(wine_data, 
                                                    wine_label, 
                                                    test_size=0.2, 
                                                    random_state=10)

# (4) 모델 학습 및 예측
logistic_model = LogisticRegression(max_iter=5000)

logistic_model.fit(X_train, y_train)
y_pred = logistic_model.predict(X_test)

print(classification_report(y_test, y_pred))

from sklearn.metrics import accuracy_score
accuracy=accuracy_score(y_test, y_pred)
print("accuracy:", accuracy)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.89      0.94        18
           2       0.80      1.00      0.89         8

    accuracy                           0.94        36
   macro avg       0.93      0.96      0.94        36
weighted avg       0.96      0.94      0.95        36

accuracy: 0.9444444444444444
