In [None]:
# Support Vector Machine

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.svm import SVC

from sklearn.metrics import classification_report,confusion_matrix
from sklearn.model_selection import GridSearchCV

## Get the Data

In [2]:
wine_data = load_wine()

In [3]:
wine_data.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names'])

In [4]:
wine_data['feature_names']

['alcohol',
 'malic_acid',
 'ash',
 'alcalinity_of_ash',
 'magnesium',
 'total_phenols',
 'flavanoids',
 'nonflavanoid_phenols',
 'proanthocyanins',
 'color_intensity',
 'hue',
 'od280/od315_of_diluted_wines',
 'proline']

In [5]:
df_wine = pd.DataFrame(wine_data['data'], columns=wine_data['feature_names'])
df_wine.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178 entries, 0 to 177
Data columns (total 13 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   alcohol                       178 non-null    float64
 1   malic_acid                    178 non-null    float64
 2   ash                           178 non-null    float64
 3   alcalinity_of_ash             178 non-null    float64
 4   magnesium                     178 non-null    float64
 5   total_phenols                 178 non-null    float64
 6   flavanoids                    178 non-null    float64
 7   nonflavanoid_phenols          178 non-null    float64
 8   proanthocyanins               178 non-null    float64
 9   color_intensity               178 non-null    float64
 10  hue                           178 non-null    float64
 11  od280/od315_of_diluted_wines  178 non-null    float64
 12  proline                       178 non-null    float64
dtypes: fl

In [6]:
print(wine_data['target_names'])

['class_0' 'class_1' 'class_2']


In [7]:
set(wine_data['target'])

{np.int64(0), np.int64(1), np.int64(2)}

In [8]:
df_wine.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0


## Train Test Split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(df_wine, wine_data['target'], test_size=0.30, random_state=101)

## Standardize the Variables

In [10]:
scaler = StandardScaler()
scaler.fit(X_train)
scaled_X_train = scaler.transform(X_train) 
scaled_X_test = scaler.transform(X_test) 

## SVM model

In [11]:
model = SVC(C=1.0, gamma = 1.0, kernel = 'rbf')

In [12]:
model.fit(scaled_X_train,y_train)

0,1,2
,C,1.0
,kernel,'rbf'
,degree,3
,gamma,1.0
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [13]:
y_pred = model.predict(scaled_X_test)

## Evaluation

In [14]:
confusion_matrix(y_test, y_pred)

array([[ 8, 11,  0],
       [ 0, 22,  0],
       [ 0, 10,  3]])

In [15]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.42      0.59        19
           1       0.51      1.00      0.68        22
           2       1.00      0.23      0.38        13

    accuracy                           0.61        54
   macro avg       0.84      0.55      0.55        54
weighted avg       0.80      0.61      0.57        54



# Gridsearch - Look for the better hyper parameters

In [16]:
param_grid = {
    'C': [0.1, 1, 10, 100, 1000],
    'gamma': [1,0.1,0.01,0.001,0.0001],
    'kernel': ['rbf']
} 

In [17]:
grid = GridSearchCV(SVC(), param_grid, refit=True)

In [18]:
grid.fit(scaled_X_train,y_train)

0,1,2
,estimator,SVC()
,param_grid,"{'C': [0.1, 1, ...], 'gamma': [1, 0.1, ...], 'kernel': ['rbf']}"
,scoring,
,n_jobs,
,refit,True
,cv,
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,C,10
,kernel,'rbf'
,degree,3
,gamma,0.01
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [19]:
grid.best_params_

{'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}

In [20]:
grid.best_estimator_

0,1,2
,C,10
,kernel,'rbf'
,degree,3
,gamma,0.01
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [21]:
grid_predictions = grid.predict(scaled_X_test)

In [22]:
confusion_matrix(y_test,grid_predictions)

array([[19,  0,  0],
       [ 0, 20,  2],
       [ 0,  0, 13]])

In [23]:
print(classification_report(y_test,grid_predictions))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.91      0.95        22
           2       0.87      1.00      0.93        13

    accuracy                           0.96        54
   macro avg       0.96      0.97      0.96        54
weighted avg       0.97      0.96      0.96        54

