# 분류 (Classification)


In [1]:
from sklearn.datasets import load_iris
iris = load_iris()

In [2]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target, test_size=0.2, random_state=2021
)

In [3]:
## 로지스틱 회귀(Logistic Regression)

In [4]:
# 모델 생성
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()

In [5]:
# 모델 학습 
lr.fit(X_train, y_train)

LogisticRegression()

In [6]:
# 모델로 예측
pred_lr = lr.predict(X_test)

In [8]:
# 평가
from sklearn.metrics import accuracy_score
acc_lr = accuracy_score(y_test, pred_lr)
acc_lr

1.0

In [9]:
# 모델의 하이퍼 파라미터
lr.get_params()

{'C': 1.0,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 100,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': None,
 'solver': 'lbfgs',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [10]:
# 결과 확인 - weight 
lr.coef_

array([[-0.35256838,  0.9416541 , -2.35260876, -1.03252527],
       [ 0.47778197, -0.32798694, -0.13606798, -0.91562912],
       [-0.12521358, -0.61366717,  2.48867674,  1.9481544 ]])

In [11]:
# 결과 확인 - bias
lr.intercept_

array([  8.77223449,   2.11080096, -10.88303545])

In [12]:
X_test.shape

(30, 4)

In [13]:
X_test[:3, :]

array([[4.4, 3.2, 1.3, 0.2],
       [4.6, 3.6, 1. , 0.2],
       [5.5, 2.4, 3.8, 1.1]])

In [14]:
# Y = X x weight.T + bias 
lr.predict_proba(X_test)[:3]

array([[9.84720678e-01, 1.52792692e-02, 5.27592338e-08],
       [9.94362204e-01, 5.63778854e-03, 7.00370373e-09],
       [3.13586688e-02, 9.48883820e-01, 1.97575111e-02]])

In [15]:
pred_lr[:3]

array([0, 0, 1])

## 결정 트리 (Decision Tree)

In [22]:
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()

In [23]:
dtc.fit(X_train, y_train)

DecisionTreeClassifier()

In [24]:
pred_dt = dtc.predict(X_test)

In [25]:
acc_dt = accuracy_score(y_test, pred_dt)
acc_dt

1.0

In [26]:
dtc.get_params()

{'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'random_state': None,
 'splitter': 'best'}

In [27]:
dtc.feature_importances_

array([0.04961594, 0.01254181, 0.01548201, 0.92236025])

### 서포트 백터 머신 (Support Vector Machine)

In [30]:
from sklearn.svm import SVC
svc = SVC(random_state=2021)
svc.fit(X_train, y_train)
pred_sv = svc.predict(X_test)
acc_sv = accuracy_score(y_test, pred_sv)
acc_sv

1.0

In [31]:
svc.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': 2021,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

### 랜덤 포레스트

In [34]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()
rfc = RandomForestClassifier(random_state=2021)
rfc.fit(X_train, y_train)
pred_rfc = rfc.predict(X_test)
acc_rfc = accuracy_score(y_test, pred_sv)
acc_rfc

1.0

In [35]:
rfc.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 2021,
 'verbose': 0,
 'warm_start': False}

### K 최근접 이웃 (Nearest Neighbor)

In [37]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
pred_knn = knn.predict(X_test)
acc_knn = accuracy_score(y_test, pred_sv)
acc_knn

1.0

In [38]:
knn.get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

### 결과비교

In [41]:
import pandas as pd 

df = pd.DataFrame({'y_test': y_test, 'lr' : pred_lr, 'dt': pred_dt, 'svc': pred_sv, 'rf' : pred_rfc, 'knn' : pred_knn})
df

Unnamed: 0,y_test,lr,dt,svc,rf,knn
0,0,0,0,0,0,0
1,0,0,0,0,0,0
2,1,1,1,1,1,1
3,0,0,0,0,0,0
4,0,0,0,0,0,0
5,0,0,0,0,0,0
6,0,0,0,0,0,0
7,0,0,0,0,0,0
8,0,0,0,0,0,0
9,0,0,0,0,0,0
