# OVR OVO모듈 활용

## 1.모듈 로딩 & 데이터 준비

In [1]:
# 모듈 로딩
from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
import numpy as np
import pandas as pd

In [2]:
# 데이터 준비
data_file = "../data/fish.csv"
fishDF = pd.read_csv(data_file)
fishDF.head(2) 

Unnamed: 0,Species,Weight,Length,Diagonal,Height,Width
0,Bream,242.0,25.4,30.0,11.52,4.02
1,Bream,290.0,26.3,31.2,12.48,4.3056


## 2. 데이터셋 준비

### 2-1 피처 / 타겟 분리

In [3]:
featureDF = fishDF[fishDF.columns[1:]]
targetDF = fishDF[fishDF.columns[0]]

In [4]:
targetDF.shape , featureDF.shape

((159,), (159, 5))

### 2-2. 학습용/테스트용 데이터셋 준비

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(featureDF, targetDF, stratify=targetDF, random_state=11)

In [6]:
print(f"[Train Dataset] {X_train.shape}, {y_train.shape}")
print(f"[Test Dataset] {X_test.shape}, {y_test.shape}")

[Train Dataset] (119, 5), (119,)
[Test Dataset] (40, 5), (40,)


## 3. 학습 진행 

In [7]:
## OVO/OVR에서 사용할 관측치(Estimator) 생성
model = LogisticRegression(solver="liblinear")

### 3-1. OvO 기반 학습 진행

In [8]:
ovoModel = OneVsOneClassifier(model)
ovoModel.fit(X_train, y_train)

In [9]:
# 모델 파라미터 확인
print(f"classes_ : {ovoModel.classes_}")
print(f"feature_names_in_ : {ovoModel.feature_names_in_}")
print(f"coef_ : {len(ovoModel.estimators_)}개")

classes_ : ['Bream' 'Parkki' 'Perch' 'Pike' 'Roach' 'Smelt' 'Whitefish']
feature_names_in_ : ['Weight' 'Length' 'Diagonal' 'Height' 'Width']
coef_ : 21개


In [10]:
# 평가
print(f"[Train Score] {ovoModel.score(X_train, y_train)}\n[Test Score] {ovoModel.score(X_test, y_test)}")

[Train Score] 0.957983193277311
[Test Score] 0.925


In [11]:
# 예측 
ovoModel.predict(X_test[:2])

array(['Bream', 'Parkki'], dtype=object)

In [12]:
ovoModel.decision_function(X_test[:2])

array([[ 6.32094951,  5.32872468,  2.32890163,  0.68506766,  3.322758  ,
        -0.33168462,  4.3140798 ],
       [ 4.26849104,  6.32543178,  2.3234672 ,  0.67951149,  5.319289  ,
         0.67104504,  1.85564622]])

### 3-2. OvR기반 학습 진행

In [16]:
ovrModel = OneVsRestClassifier(model)
ovrModel.fit(X_train, y_train)

In [17]:
# 평가
print(f"[Train Score] {ovrModel.score(X_train, y_train)}\n[Test Score] {ovrModel.score(X_test, y_test)}")

[Train Score] 0.9495798319327731
[Test Score] 0.975


In [15]:
model.fit(X_train, y_train)
model.score(X_train, y_train)

0.9495798319327731

In [18]:
model.score(X_test, y_test) # 걍 냅다 한 거랑 똑같은데, Logistic default가 ovr임 

0.975