# **Support Vector Machine application on iris dataset**
In this study, SVM of three different kernel is trained for binary calssification by using the iris and wine dataset from scikit-learn.

In [25]:
import sklearn
import sys
import numpy as np
from sklearn.svm import SVC
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
from sklearn.pipeline import make_pipeline
from sklearn.svm import LinearSVC
from sklearn.preprocessing import PolynomialFeatures
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

# **Iris Dataset**

# **Linear SVM Classification**

In [2]:
iris = datasets.load_iris(as_frame=True) # loading the data
X = iris.data[["petal length (cm)", "petal width (cm)"]].values # splitting the data as input and output
y = iris.target
# for binary classification
y = (iris.target == 2)  # Iris virginica

In [3]:
# SVM Classifier model

svm_classifier = make_pipeline(StandardScaler(),
                        LinearSVC(C=1, random_state=42))
svm_classifier.fit(X, y)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('linearsvc', LinearSVC(C=1, random_state=42))])

In [6]:
X_new = [[5.4, 1.6], [4.9, 1.4]]
svm_classifier.predict(X_new)

array([ True, False])

# **Polynomial Kernel**

In [7]:
poly_kernel_svm_classifier = make_pipeline(StandardScaler(),
                                    SVC(kernel="poly", degree=3, coef0=1, C=5))
poly_kernel_svm_classifier.fit(X, y)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('svc', SVC(C=5, coef0=1, kernel='poly'))])

# **Gaussian RBF Kernel**

In [8]:
rbf_kernel_svm_clf = make_pipeline(StandardScaler(),
                                   SVC(kernel="rbf", gamma=0.1, C=100))
rbf_kernel_svm_clf.fit(X, y)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('svc', SVC(C=100, gamma=0.1))])

# **Wine Dataset**

In [10]:
wine = load_wine(as_frame=True)

In [12]:
X_train, X_test, y_train, y_test = train_test_split(
    wine.data, wine.target, random_state=42)

In [13]:
X_train.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0
100,12.08,2.08,1.7,17.5,97.0,2.23,2.17,0.26,1.4,3.3,1.27,2.96,710.0
122,12.42,4.43,2.73,26.5,102.0,2.2,2.13,0.43,1.71,2.08,0.92,3.12,365.0
154,12.58,1.29,2.1,20.0,103.0,1.48,0.58,0.53,1.4,7.6,0.58,1.55,640.0
51,13.83,1.65,2.6,17.2,94.0,2.45,2.99,0.22,2.29,5.6,1.24,3.37,1265.0


In [14]:
y_train.head()

2      0
100    1
122    1
154    2
51     0
Name: target, dtype: int64

# **Linear Classifier**

In [19]:
linear_classifier = LinearSVC(max_iter=1_000_000, random_state=42)
linear_classifier.fit(X_train, y_train)



LinearSVC(max_iter=1000000, random_state=42)

In [20]:
cross_val_score(linear_classifier, X_train, y_train).mean()



0.90997150997151

In [21]:
linear_classifier = make_pipeline(StandardScaler(),
                        LinearSVC(random_state=42))
linear_classifier.fit(X_train, y_train)



Pipeline(steps=[('standardscaler', StandardScaler()),
                ('linearsvc', LinearSVC(random_state=42))])

In [22]:
cross_val_score(linear_classifier, X_train, y_train).mean()

0.9774928774928775

In [23]:
svm_classifier = make_pipeline(StandardScaler(), SVC(random_state=42))
cross_val_score(svm_classifier, X_train, y_train).mean()



0.9698005698005698

In [26]:
parameter_distribution = {
    "svc__gamma": reciprocal(0.001, 0.1),
    "svc__C": uniform(1, 10)
}
rndm_search_cv = RandomizedSearchCV(svm_classifier, parameter_distribution, n_iter=100, cv=5,
                                   random_state=42)
rndm_search_cv.fit(X_train, y_train)
rndm_search_cv.best_estimator_

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('svc',
                 SVC(C=9.925589984899778, gamma=0.011986281799901176,
                     random_state=42))])

In [27]:
rndm_search_cv.best_score_

0.9925925925925926

In [28]:
rndm_search_cv.score(X_test, y_test)

0.9777777777777777