In [1]:
%matplotlib inline
import matplotlib.pyplot as plt

from sklearn import svm

import numpy as np
import pandas as pd
import seaborn as sns
import scipy.io as sio

# Gaussian Kernel Function, or Radial Boundry Function

In [2]:
def gaussian_kernel(x1, x2, sigma):
    return np.exp(- np.power(x1 - x2, 2).sum() / (2 * (sigma ** 2)))

Calculate a sample kernel value with the above RBF, when $sigma = 2$: 

In [3]:
x1 = np.array([1, 2, 1])
x2 = np.array([0, 4, -1])
sigma = 2

gaussian_kernel(x1, x2, sigma)

0.32465246735834974

# Load Dataset 2

In [4]:
mat = sio.loadmat('')
print(mat.keys())
data = pd.DataFrame(mat.get('X'), columns=['X1', 'X2'])
data['y'] = mat.get('y')

data.head(5)

FileNotFoundError: [Errno 2] No such file or directory: '.mat'

Print dataframe dimensions (should be 863x3 matrix):

In [None]:
data.shape

#  Visualizing Dataset 2

In [None]:
sns.set(context="notebook", style="white", palette=sns.diverging_palette(240, 10, n=2))
sns.lmplot('X1', 'X2', hue='y', data=data, 
           size=5, 
           fit_reg=False, 
           scatter_kws={"s": 20}
          )

As we can see, we must find a non-linear decision boundrary.

In [None]:
fig, ax = plt.subplots(figsize=(6,6))
ax.scatter(data['X1'], data['X2'], s=50, c=data['y'], marker = '+')
ax.set_title('Raw data')
ax.set_xlabel('X1')
ax.set_ylabel('X2')

# scikit-learn Gaussian Kernel

## Set up the kernel: 

In [None]:
svc = svm.SVC(C=100, kernel='rbf', gamma=10, probability=True)
svc

Score for fitting data with Gaussian Kernel is:

In [None]:
svc.fit(data[['X1', 'X2']], data['y'])
svc.score(data[['X1', 'X2']], data['y'])

## SVM decision boundry visualization using `predict_proba` from `svm.SVC` library. 

In [None]:
prediction_prob = svc.predict_proba(data[['X1', 'X2']])[:, 0]

In [None]:
fig, ax = plt.subplots(figsize=(8,6))
ax.scatter(data['X1'], data['X2'], s=60, c=prediction_prob, cmap='Reds')
ax.set_title('Fitting correctness probability')
ax.set_xlabel('X1')
ax.set_ylabel('X2')