# 분류 (Classification)

In [1]:
from sklearn.datasets import load_iris
iris = load_iris()

In [2]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target, stratify=iris.target, test_size=0.2, random_state=123
)

## Logistic Regression

In [3]:
# 모델 생성
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(verbose=1)

In [4]:
# 모델 학습
lr.fit(X_train, y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.4s finished


LogisticRegression(verbose=1)

In [5]:
# 예측
pred_lr = lr.predict(X_test)

In [6]:
# 평가(정확도)
from sklearn.metrics import accuracy_score
accuracy_score(y_test, pred_lr)

0.9666666666666667

In [7]:
# 결과 확인 - Weight(Coefficient, 개수)
lr.coef_

array([[-0.43191848,  0.90792849, -2.4058193 , -1.03745967],
       [ 0.31928925, -0.17824221, -0.15730273, -0.74451807],
       [ 0.11262923, -0.72968627,  2.56312204,  1.78197775]])

In [8]:
# 결과 확인 - Bias(Intercept, 절편)
lr.intercept_

array([  9.57853892,   2.48471737, -12.06325629])

```
setosa = -0.4319 * sepal_length + \
          0.9079 * sepal_width + \
          -2.4058 * petal_length + \
          -0.0374 * petal_width + \
          9.5785
```

## 결정 트리 (Decision Tree)

In [9]:
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)
pred_dt = dtc.predict(X_test)
accuracy_score(y_test, pred_dt)

0.8333333333333334

In [10]:
# 결과 확인
dtc.feature_importances_

array([0.0075    , 0.03      , 0.93383357, 0.02866643])

## 서포트 벡터 머신 (Support Vector Machine)

In [11]:
from sklearn.svm import SVC
svc = SVC()
svc.fit(X_train, y_train)
pred_svc = svc.predict(X_test)
accuracy_score(y_test, pred_svc)

0.9

## 랜덤 포레스트(Random Forest)

In [12]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
pred_rf = rf.predict(X_test)
accuracy_score(y_test, pred_rf)

0.9333333333333333

## K 최근접 이웃 (K nearest Neighor)

In [13]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
pred_knn = knn.predict(X_test)
accuracy_score(y_test, pred_knn)

0.9333333333333333

In [15]:
## 하이퍼 파라미터 확인
knn.get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

## 결과 비교

In [14]:
import pandas as pd

df = pd.DataFrame({
    'y_test': y_test,
    'lr': pred_lr,
    'dt': pred_dt,
    'svc': pred_svc,
    'rf': pred_rf,
    'knn': pred_knn,
})

df

Unnamed: 0,y_test,lr,dt,svc,rf,knn
0,1,1,1,1,1,1
1,0,0,0,0,0,0
2,2,2,2,2,2,2
3,2,2,2,2,2,2
4,0,0,0,0,0,0
5,0,0,0,0,0,0
6,2,2,2,2,2,2
7,1,1,2,2,2,2
8,2,2,1,1,2,2
9,0,0,0,0,0,0
