## Using gridsearchcv & select the best hyperparameter for support vector machines

Grid search works good when you have a small number of hyperparameters and when each hyperparameter has about the same magnitude of impact on validation score. 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [2]:
#importing dataset
dataset = pd.read_csv("C:\\Users\\veena\\Desktop\\dataset's\\Advertising_data.csv")
dataset.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19.0,19000.0,0
1,15810944,Male,35.0,20000.0,0
2,15668575,Female,26.0,43000.0,0
3,15603246,Female,27.0,57000.0,0
4,15804002,Male,19.0,76000.0,0


In [3]:
dataset.isnull().sum()


User ID            0
Gender             0
Age                0
EstimatedSalary    0
Purchased          0
dtype: int64

so, now we are here solving this classification problem and use GridsearchCV for selecting best hyperparameter. And these hyperparameters will help in increase in accuracy of this model.



In [4]:
X = dataset.iloc[:,[2,3]].values #independent features
y = dataset.iloc[:, 4].values #dependent features

In [5]:
#splitting the dataset in to training & test data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.25, random_state=5)

In [6]:
#feature scaling - as age and estimated salary are in different units, we are scaling it to same unit
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)


In [7]:
#fitting kernal SVM to the training set
from sklearn.svm import SVC
classifier = SVC(kernel = 'linear', random_state= 0)
classifier.fit(X_train, y_train)

SVC(kernel='linear', random_state=0)

In [8]:
#predicting test set rules
y_pred = classifier.predict(X_test)

In [9]:
#making confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)


In [10]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)


In [11]:
accuracy #model accuracy before gridsearch

0.85

In [12]:
#applying Gridsearch to find best model and best parameters
from sklearn.model_selection import GridSearchCV
parameters = [{'C' : [1, 10, 100, 1000], 'kernel' : ['linear']},
              {'C' : [1, 10, 100, 1000], 'kernel' : ['rbf'], 'gamma' : [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]}]

grid_search = GridSearchCV(estimator = classifier,
                          param_grid = parameters,
                          scoring = 'accuracy',
                          cv = 10,
                          n_jobs = -1)

grid_search = grid_search.fit(X_train, y_train)

In [13]:
accuracy = grid_search.best_score_

In [14]:
accuracy #gridsearch accuracy

0.9100000000000001

In [15]:
grid_search.best_params_ #this is the best parameter which our accuracy is high

{'C': 10, 'gamma': 0.3, 'kernel': 'rbf'}

In [16]:
#now checking accuracy score with this best parameter we have got
classifier = SVC(C = 10, kernel = 'rbf', gamma = 0.3)
classifier.fit(X_train, y_train)

SVC(C=10, gamma=0.3)

In [17]:
#predicting test set rules
y_pred = classifier.predict(X_test)

In [18]:
#making confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)


In [19]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)


In [20]:
accuracy #model accuracy after gridsearch

0.94