# Multiclass SVM을 직접 구현

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# IRIS data load
iris =  sns.load_dataset('iris') 
X= iris.iloc[:,:4] # Data to be learned
y = iris.iloc[:,-1] # Target
print(y)

0         setosa
1         setosa
2         setosa
3         setosa
4         setosa
         ...    
145    virginica
146    virginica
147    virginica
148    virginica
149    virginica
Name: species, Length: 150, dtype: object


In [2]:
print(y.unique())

['setosa' 'versicolor' 'virginica']


In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=48)

In [4]:
def standardization(train, test):
    scaler = StandardScaler()
    train = scaler.fit_transform(train)
    test = scaler.transform(test)
    return train, test

X_train, X_test = standardization(X_train, X_test)

# one vs rest 방법

In [5]:
# One hot encoding
y_train = pd.get_dummies(y_train)

In [6]:
# Generate classifier
SVM1 = SVC(kernel ='rbf', C=5, gamma=5)
SVM2 = SVC(kernel ='rbf', C=5, gamma=5)
SVM3 = SVC(kernel ='rbf', C=5, gamma=5)

In [7]:
# Binary classification for all classes
SVM1.fit(X_train,y_train.iloc[:,0])
SVM2.fit(X_train,y_train.iloc[:,1])
SVM3.fit(X_train,y_train.iloc[:,2])

In [8]:
def one_rest_svm(classifier, data, label):
    distance = None
    
    for c in classifier:
        if distance is None:
            distance = c.decision_function(data)
        else:
            # Use decision_function when voting results are tied
            distance = np.vstack((distance, c.decision_function(data)))
        
    distance = distance.T
        
    pred = []
        
    for d in distance:
        pred.append(labels[d.argmax()]) 
        
    return pred

models = [SVM1,SVM2,SVM3]
labels = ['setosa' ,'versicolor' ,'virginica']

prediction = one_rest_svm(models, X_test, labels)

In [9]:
accuracy_score(y_test,prediction)

0.8666666666666667

# (추가로) sklearn에 있는 SVM과 비교

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
X_trains, X_tests, y_trains, y_tests = train_test_split(X, y, test_size=0.2, random_state=48)

SVM4 = SVC(kernel ='rbf', C = 5, gamma = 5)
SVM4.fit(X_trains, y_trains)
y_pred = SVM4.predict(X_tests)

accuracy_score(y_tests,y_pred)

0.8666666666666667

### sklearn library가 제공하는 classifier와 성능 차이가 크지 않으므로 Multiclass SVM을 잘 구현해 냈다고 말할 수 있습니다.