# Support Vector Machines

Support Vector Machines is used to find a hyperplane in an dimensional space that distinctly classifies the data points. The objective is to find a plane that has the maximum margin, the max distance between data points of both classes. Maximizing the margin distance provides reinforcement so that future data points cana be classified with more confidence.

In [1]:
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import numpy as np
import os

In [2]:
flag = pd.read_csv("flags_with_headers_v_5.csv")
flag.head()

Unnamed: 0.1,Unnamed: 0,name,landmass,zone,area,population,language,religion,bars,stripes,...,sunstars,crescent,triangle,icon,animate,text,topleft,botright,area_sq_miles,pop_density
0,0,Afghanistan,5,1,648.0,16.0,10,1,0,3,...,1,0,0,1,0,0,black,green,250193.1,63.95
1,1,Albania,3,1,29.0,3.0,6,1,0,0,...,1,0,0,0,1,0,red,red,11196.9,267.93
2,2,Algeria,4,1,2388.0,20.0,8,1,2,0,...,1,1,0,0,0,0,green,white,922007.7,21.69
3,3,American_Samoa,6,3,0.2,0.032646,1,0,0,0,...,0,0,1,1,1,0,blue,red,77.2,422.88
4,4,Andorra,3,1,0.495,0.0416,6,0,3,0,...,0,0,0,0,0,0,blue,red,191.1,217.69


In [3]:
flag.religion.unique()

array([1, 0, 4, 2, 3], dtype=int64)

In [4]:
flag = pd.get_dummies(flag)
flag.head()

Unnamed: 0.1,Unnamed: 0,landmass,zone,area,population,language,religion,bars,stripes,colors,...,topleft_red,topleft_white,botright_black,botright_blue,botright_brown,botright_gold,botright_green,botright_orange,botright_red,botright_white
0,0,5,1,648.0,16.0,10,1,0,3,5,...,0,0,0,0,0,0,1,0,0,0
1,1,3,1,29.0,3.0,6,1,0,0,3,...,1,0,0,0,0,0,0,0,1,0
2,2,4,1,2388.0,20.0,8,1,2,0,3,...,0,0,0,0,0,0,0,0,0,1
3,3,6,3,0.2,0.032646,1,0,0,0,5,...,0,0,0,0,0,0,0,0,1,0
4,4,3,1,0.495,0.0416,6,0,3,0,3,...,0,0,0,0,0,0,0,0,1,0


In [5]:
# Assign X (data) and y (target)

X = flag.drop(columns=['religion', 'Unnamed: 0'], axis=1)
y = flag.religion

In [6]:
X

Unnamed: 0,landmass,zone,area,population,language,bars,stripes,colors,red,green,...,topleft_red,topleft_white,botright_black,botright_blue,botright_brown,botright_gold,botright_green,botright_orange,botright_red,botright_white
0,5,1,648.000,16.000000,10,0,3,5,1,1,...,0,0,0,0,0,0,1,0,0,0
1,3,1,29.000,3.000000,6,0,0,3,1,0,...,1,0,0,0,0,0,0,0,1,0
2,4,1,2388.000,20.000000,8,2,0,3,1,1,...,0,0,0,0,0,0,0,0,0,1
3,6,3,0.200,0.032646,1,0,0,5,1,0,...,0,0,0,0,0,0,0,0,1,0
4,3,1,0.495,0.041600,6,3,0,3,1,0,...,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
188,6,3,2.850,0.158130,1,0,0,3,1,0,...,0,0,0,0,0,0,0,0,1,0
189,3,1,256.000,22.000000,6,0,3,4,1,0,...,0,0,0,0,0,0,0,0,1,0
190,4,2,905.000,28.000000,10,0,0,4,1,1,...,0,0,0,0,0,0,1,0,0,0
191,4,2,753.000,6.000000,10,3,0,4,1,1,...,0,0,0,0,1,0,0,0,0,0


In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=35)

In [8]:
#Create a StandardScaler model and fit it to the training data
from sklearn.preprocessing import StandardScaler

X_scaler = StandardScaler().fit(X_train)

In [9]:
#Transform the training and testing data using the X_scaler and y_scaler models

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [10]:
# X_train.shape

In [11]:
# y_train.shape

In [12]:
# # Support vector machine linear classifier
# from sklearn.svm import SVC 
# model = SVC(kernel='linear')
# model.fit(X_train, y_train)

In [13]:
# Support vector machine linear classifier
from sklearn.svm import LinearSVC
model = LinearSVC(random_state=0, tol=1e-5)
model.fit(X_train, y_train.ravel()) 



LinearSVC(random_state=0, tol=1e-05)

In [14]:
# Model Accuracy
print('Test Acc: %.3f' % model.score(X_test, y_test))

Test Acc: 0.388


In [15]:
# Calculate classification report
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
print(classification_report(y_test, predictions, target_names =['Christian', 'Muslim', 'Buddhist', 'Hindu', 'Other']))

              precision    recall  f1-score   support

   Christian       0.57      0.59      0.58        27
      Muslim       0.20      0.33      0.25         9
    Buddhist       0.00      0.00      0.00         3
       Hindu       0.00      0.00      0.00         1
       Other       0.00      0.00      0.00         9

    accuracy                           0.39        49
   macro avg       0.15      0.19      0.17        49
weighted avg       0.35      0.39      0.37        49



  _warn_prf(average, modifier, msg_start, len(result))


Analysis: Better at predicting Other Christian than any other religion. 

# Prediction

In [17]:
#Show results of the prediction of testing values
model.fit(X_train, y_train)

X_test[1:2]



Unnamed: 0,landmass,zone,area,population,language,bars,stripes,colors,red,green,...,topleft_red,topleft_white,botright_black,botright_blue,botright_brown,botright_gold,botright_green,botright_orange,botright_red,botright_white
96,5,1,10.0,3.0,8,0,2,4,1,1,...,1,0,0,0,0,0,0,0,1,0


In [18]:
model.predict(X_test)

array([1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 3,
       3, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 3, 4, 3, 0, 0,
       0, 0, 0, 3, 1], dtype=int64)

In [19]:
y_test.values

array([0, 1, 0, 0, 4, 0, 4, 2, 0, 1, 1, 0, 4, 0, 0, 0, 1, 4, 1, 0, 0, 2,
       0, 4, 1, 0, 0, 4, 4, 0, 0, 0, 0, 0, 0, 4, 4, 1, 0, 0, 3, 0, 0, 1,
       2, 0, 1, 0, 0], dtype=int64)