In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

In [2]:
iris = datasets.load_iris()

In [3]:
X = iris['data'][:, (2, 3)] # petal length, petal width

In [4]:
y = (iris['target'] == 2).astype(np.float64) #Iris virginica

In [5]:
svm_clf = Pipeline(steps = [('scaler', StandardScaler()),('linear_svc', LinearSVC(C = 1, loss = 'hinge'))])
# C is the regularization parameter
# loss is the loss function; loss = {'hinge', 'squared_hinge'}

In [6]:
svm_clf.fit(X, y)

Pipeline(steps=[('scaler', StandardScaler()),
                ('linear_svc', LinearSVC(C=1, loss='hinge'))])

In [7]:
svm_clf.predict([[5.5, 1.7]]) 
# Notice: unlike logistic regression classifiers, SVM classifiers do not output probabilities for each class

array([1.])

Linear SVM classifiers are used when the datasets are approximately linearly separable.

### Nonlinear SVM Classfication

In [8]:
from sklearn.datasets import make_moons
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures

In [9]:
X, y = make_moons(n_samples = 100, noise = 0.15)

In [10]:
polynomial_svm_clf = Pipeline(steps = [
    ('poly_features', PolynomialFeatures(degree = 3)),
    ('scaler', StandardScaler()),
    ('svm_clf', LinearSVC(C = 10, loss = 'hinge'))
])

In [11]:
polynomial_svm_clf.fit(X, y)



Pipeline(steps=[('poly_features', PolynomialFeatures(degree=3)),
                ('scaler', StandardScaler()),
                ('svm_clf', LinearSVC(C=10, loss='hinge'))])

### Polynomial Kernel

In [12]:
from sklearn.svm import SVC

In [13]:
poly_kernel_svm_clf = Pipeline(steps = [
    ('scaler', StandardScaler()), 
    ('svm_clf', SVC(kernel = 'poly', degree = 3, coef0 = 1, C = 5))
])

In [14]:
poly_kernel_svm_clf.fit(X, y)

Pipeline(steps=[('scaler', StandardScaler()),
                ('svm_clf', SVC(C=5, coef0=1, kernel='poly'))])

- If your model is overfitting, you might want to reduce the polynomial degree
- If your model is underfitting, you might want to increase the polynomial degree

### Similarity Features 

In [15]:
# Another technique to tackle nonlinear problems 

### Gaussian RBF Kernel

In [16]:
rbf_kernel_svm_clf = Pipeline(steps = [
    ('scaler', StandardScaler()),
    ('svm_clf', SVC(kernel = 'rbf', gamma = 5, C = 0.0001))
])
rbf_kernel_svm_clf.fit(X, y)
# Similar to C
# Overfitting, then reduce gamma
# Underfitting, then increase gamma

Pipeline(steps=[('scaler', StandardScaler()),
                ('svm_clf', SVC(C=0.0001, gamma=5))])

### SVM Regression

In [18]:
from sklearn.svm import LinearSVR

In [19]:
svm_reg = LinearSVR(epsilon = 1.5)
svm_reg.fit(X, y)

LinearSVR(epsilon=1.5)

In [20]:
#和前面的一样，如果是nonlinear的话改动kernel就好

In [21]:
from sklearn.svm import SVR

In [22]:
svm_poly_reg = SVR(kernel = 'poly', degree = 2, C = 100, epsilon = 0.1)
svm_poly_reg.fit(X, y)

SVR(C=100, degree=2, kernel='poly')