### OVR & OvO 모듈 활용

(1) 모듈 로딩 & 데이터 준비

In [1]:
## 모듈 로딩
from sklearn.multiclass import OneVsRestClassifier,OneVsOneClassifier
from sklearn.linear_model import LogisticRegression
import pandas as pd
import numpy as np

In [2]:
# 데이터 준비
fishDF = pd.read_csv('../../DATA/fish.csv')
fishDF.head()

Unnamed: 0,Species,Weight,Length,Diagonal,Height,Width
0,Bream,242.0,25.4,30.0,11.52,4.02
1,Bream,290.0,26.3,31.2,12.48,4.3056
2,Bream,340.0,26.5,31.1,12.3778,4.6961
3,Bream,363.0,29.0,33.5,12.73,4.4555
4,Bream,430.0,29.0,34.0,12.444,5.134


(2) 데이터셋 준비

(2-1) 피쳐/타겟 분리

In [3]:
featureDF = fishDF[fishDF.columns[1:]]
targetSR = fishDF[fishDF.columns[0]]
featureDF.shape, targetSR.shape

((159, 5), (159,))

(2-2) 학습용/테스트용 데이터셋 준비

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
xtrain,xtest,ytrain,ytest = train_test_split(featureDF,targetSR,stratify=targetSR,random_state=11)
xtrain.shape, ytrain.shape,xtest.shape, ytest.shape

((119, 5), (119,), (40, 5), (40,))

(3) 학습 진행

In [6]:
# OVO/OVR 에서 사용할 관측치(Estimatoe) 생성
model = LogisticRegression(solver='liblinear')

(3-1)  OVO 기반 학습 진행

In [7]:
ovomodel = OneVsOneClassifier(model)
ovomodel.fit(xtrain,ytrain)

In [8]:
# 학습 후 결정된 모델 파라미터 확인
print(ovomodel.classes_)  # target 값
print(ovomodel.feature_names_in_)  # feature name
print(len(ovomodel.estimators_))  # 21개 -> 7 * 6 / 2

['Bream' 'Parkki' 'Perch' 'Pike' 'Roach' 'Smelt' 'Whitefish']
['Weight' 'Length' 'Diagonal' 'Height' 'Width']
21


In [9]:
# 모델 평가
ovomodel.score(xtrain,ytrain),ovomodel.score(xtest,ytest)

(0.957983193277311, 0.925)

In [10]:
# 예측
ovomodel.predict(xtest[:2]), ytest[:2]

(array(['Bream', 'Parkki'], dtype=object),
 1      Bream
 68    Parkki
 Name: Species, dtype: object)

In [11]:
ovomodel.decision_function(xtest[:2])

array([[ 6.32094951,  5.32872468,  2.32890163,  0.68506766,  3.322758  ,
        -0.33168462,  4.3140798 ],
       [ 4.26849104,  6.32543178,  2.3234672 ,  0.67951149,  5.319289  ,
         0.67104504,  1.85564622]])

(3-2) OVR 기반 학습 진행

In [12]:
ovrmodel = OneVsRestClassifier(model)
ovrmodel.fit(xtrain,ytrain)

In [13]:
# 학습 후 결정된 모델 파라미터 확인
print(ovrmodel.classes_)  # target 값
print(ovrmodel.feature_names_in_)  # feature name
print(len(ovrmodel.estimators_))  # 7 개

['Bream' 'Parkki' 'Perch' 'Pike' 'Roach' 'Smelt' 'Whitefish']
['Weight' 'Length' 'Diagonal' 'Height' 'Width']
7


In [14]:
# 모델 평가
ovrmodel.score(xtrain,ytrain),ovrmodel.score(xtest,ytest)

(0.9495798319327731, 0.975)

In [15]:
# 예측
ovrmodel.predict(xtest[:2]), ytest[:2]

(array(['Bream', 'Parkki'], dtype='<U9'),
 1      Bream
 68    Parkki
 Name: Species, dtype: object)

In [16]:
ovrmodel.predict_proba(xtest[:2]).round(3)

array([[0.504, 0.311, 0.   , 0.   , 0.173, 0.   , 0.011],
       [0.158, 0.73 , 0.044, 0.   , 0.057, 0.   , 0.01 ]])