In [2]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split,KFold,cross_val_score,GridSearchCV
from sklearn.datasets import load_iris
from sklearn.svm import SVC

In [3]:
df = load_iris()
X = df.data
y = df.target
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.7,random_state=42)

In [5]:
scm = SVC()
'''
# Params
# kernel{‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’}, default=’rbf’
# degree (default=3) -- Degree of the polynomial kernel function (‘poly’). Ignored by all other kernels.
# C(Default 1)--> Regularization parameter. The strength of the regularization is inversely proportional to C. 
    Must be strictly positive. The penalty is a squared l2 penalty.
# gamma --> Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’ ( It is the Margin b/w SV and the sepreator)
        Large Value of parameter C => small margin  ---> Lower misclassification but tends to overfit
        Small Value of paramerter C => Large margin  --> High  misclassification so tends ti underfit 
        So value of C should be optimal to select best margin line
        Value of gamma is scale(default) or auto
        auto ---> 1/n_features
        scale ----> 
# cache_size --->Specify the size of the kernel cache (in MB) default --> 200
# tol ---> Tolerance for stopping criterion. 10e-3
'''

In [7]:
hyper_params = [{
    'kernel': ['linear','poly','rbf','sigmoid'],
    'C':[0.1,1,10,100],
    'gamma':['auto','scale']
}]
folds = KFold(n_splits=5,shuffle=True,random_state=42)
model_cv = GridSearchCV(estimator=scm,
                       param_grid = hyper_params,
                       scoring ='accuracy',
                       cv = folds,
                       verbose=1,
                       return_train_score=True,
                       n_jobs=40)
model_cv.fit(X_train,y_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


[Parallel(n_jobs=40)]: Using backend LokyBackend with 40 concurrent workers.
[Parallel(n_jobs=40)]: Done 160 out of 160 | elapsed:   18.9s finished


GridSearchCV(cv=KFold(n_splits=5, random_state=42, shuffle=True),
             error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=40,
             param_grid=[{'C': [0.1, 1, 10, 100], 'gamma': ['auto', 'scale'],
                          'kernel': ['linear', 'poly', 'rbf', 'sigmoid']}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
             scoring='accuracy', verbose=1)

In [8]:
print(model_cv.best_score_)
print(model_cv.best_params_)
print(model_cv.best_estimator_)

0.9619047619047618
{'C': 0.1, 'gamma': 'auto', 'kernel': 'linear'}
SVC(C=0.1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)


In [9]:
svc= SVC(C=0.1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)
svc.fit(X_train,y_train)

SVC(C=0.1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [12]:
# Attributes
print(svc.support_) # Indices of support vectors
print(svc.n_support_) # No. of support vectors for each class
print(svc.classes_) # Class Labels
print(svc.intercept_) # Constant in decision function
# print(svc.support_vectors_) # Support Vectors

[ 13  16  17  18  76   3   5   7  14  19  24  30  34  39  44  45  47  58
  64  65  67  71  73  75  77  78  84  90  95  96   1   2  15  25  27  28
  35  41  48  53  66  68  70  72  86  92  94  97  98 101]
[ 5 25 20]
[0 1 2]
[1.77293323 1.76200793 6.22330382]


In [17]:
print(svc.get_params())
print(svc.predict(X_test))
print(svc.score(X_test,y_test)) # Mean Score
print(svc.decision_function(X_test))  # Evaluates the Decision function for sample in X

{'C': 0.1, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 'auto', 'kernel': 'linear', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1
 0 0 0 2 1 1 0 0]
1.0
[[-0.24160148  2.23236629  1.08315299]
 [ 2.22873045  1.26282934 -0.2851256 ]
 [-0.2880953   1.17855429  2.27969812]
 [-0.23758087  2.22918307  1.07305789]
 [-0.24982195  2.23191245  1.13781134]
 [ 2.23012597  1.26392814 -0.28593407]
 [-0.18025781  2.24106666  0.80355122]
 [-0.26281666  1.20844553  2.22432793]
 [-0.24726729  2.23518864  1.10759695]
 [-0.20677233  2.241102    0.83498923]
 [-0.2582669   1.21080694  2.2107847 ]
 [ 2.23590229  1.26633854 -0.2882684 ]
 [ 2.24133254  1.2664131  -0.28949798]
 [ 2.23322958  1.26568764 -0.28740392]
 [ 2.23920618  1.26250441 -0.28734036]
 [-0.24162283  2.22326071  1.1258199