# Support Vector Machines

## Imports

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [48]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.svm import LinearSVC, SVC, LinearSVR, SVR
from sklearn.datasets import load_iris, make_moons
from sklearn.pipeline import Pipeline
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler, PolynomialFeatures

# Classification 

##  LinearSVC

In [3]:
iris = load_iris()
X = iris["data"][:, (2, 3)] # petal length, petal width
y = (iris["target"] == 2).astype(np.float64) # Iris virgin

lin_svc = Pipeline([
    ('scaler', StandardScaler()),
    ('model', LinearSVC(C=1, loss='hinge')),
])

lin_svc.fit(X, y)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('model',
                 LinearSVC(C=1, class_weight=None, dual=True,
                           fit_intercept=True, intercept_scaling=1,
                           loss='hinge', max_iter=1000, multi_class='ovr',
                           penalty='l2', random_state=None, tol=0.0001,
                           verbose=0))],
         verbose=False)

In [4]:
lin_svc.predict([[5.5, 1.7]])

array([1.])

## Non-LinearSVM

In [5]:
X, y = make_moons(100, noise=0.15)

model = Pipeline([
    ('polynomial_features', PolynomialFeatures(degree=3)),
    ('feature_scaler', StandardScaler()),
    ('poly_svc', LinearSVC(C=10, loss='hinge'))
])

model.fit(X, y)



Pipeline(memory=None,
         steps=[('polynomial_features',
                 PolynomialFeatures(degree=3, include_bias=True,
                                    interaction_only=False, order='C')),
                ('feature_scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('poly_svc',
                 LinearSVC(C=10, class_weight=None, dual=True,
                           fit_intercept=True, intercept_scaling=1,
                           loss='hinge', max_iter=1000, multi_class='ovr',
                           penalty='l2', random_state=None, tol=0.0001,
                           verbose=0))],
         verbose=False)

## Kernel Tricks

In [6]:
# kernel trick 
svc_clf = Pipeline([
    ('scaling', StandardScaler()),
    ('model', SVC(kernel='poly', degree=3, C=5, coef0=1))
])

svc_clf.fit(X, y)

Pipeline(memory=None,
         steps=[('scaling',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('model',
                 SVC(C=5, break_ties=False, cache_size=200, class_weight=None,
                     coef0=1, decision_function_shape='ovr', degree=3,
                     gamma='scale', kernel='poly', max_iter=-1,
                     probability=False, random_state=None, shrinking=True,
                     tol=0.001, verbose=False))],
         verbose=False)

In [7]:
# Gaussian RBF kernel

scv_clf = Pipeline([
    ('scaling', StandardScaler()),
    ('model', SVC(kernel='rbf', gamma=5, C=0.001))
])

scv_clf.fit(X,y)

Pipeline(memory=None,
         steps=[('scaling',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('model',
                 SVC(C=0.001, break_ties=False, cache_size=200,
                     class_weight=None, coef0=0.0,
                     decision_function_shape='ovr', degree=3, gamma=5,
                     kernel='rbf', max_iter=-1, probability=False,
                     random_state=None, shrinking=True, tol=0.001,
                     verbose=False))],
         verbose=False)

# Regression 

In [77]:
# using linearSVR
svc_reg = Pipeline([
    ('scaling', StandardScaler()),
]).fit_transform(X_train)
lin_model =  LinearSVC(loss='hinge', C=5).fit(svc_reg, y_train)

In [76]:
# using SVR and kernel trick 
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

svc_reg = Pipeline([
    ('scaling', StandardScaler()),
]).fit_transform(X_train)

svc_model = SVC(kernel='linear', degree=2, C=100).fit(X_train, y_train)

In [74]:
sgd = Pipeline([
    ('scaling', StandardScaler()),
]).fit_transform(X_train)
sgd_model = SGDClassifier(epsilon=0.1, alpha=100)
sgd_model.fit(sgd, y_train)

SGDClassifier(alpha=100, average=False, class_weight=None, early_stopping=False,
              epsilon=0.1, eta0=0.0, fit_intercept=True, l1_ratio=0.15,
              learning_rate='optimal', loss='hinge', max_iter=1000,
              n_iter_no_change=5, n_jobs=None, penalty='l2', power_t=0.5,
              random_state=None, shuffle=True, tol=0.001,
              validation_fraction=0.1, verbose=0, warm_start=False)

In [81]:
print(f'SGD model : {sgd_model.coef_}, {sgd_model.intercept_}')
print(f'SVC model : {svc_model.coef_}, {model.intercept_}')
print(f'Lin model : {lin_model.coef_} , {lin_model.intercept_}')

SGD model : [[ 0.00563258 -0.00738261]], [0.33338486]
SVC model : [[ 0.7429451  -2.48691143]], [0.46062738]
Lin model : [[ 0.66384725 -1.21930045]] , [0.29598883]
