# Support Vector Machine

## Load Data

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
from sklearn import svm
from sklearn.model_selection import train_test_split, cross_val_score
import matplotlib.pyplot as plt
import pickle

# Environment settings
data_path = 'Data/'

# Deserialize previously saved data from "preprocessing"
with open(data_path+'train_pp.obj', 'rb') as train_pp, \
open(data_path+'test_pp.obj', 'rb') as test_pp:
    df_train = pickle.load(train_pp)
    df_test = pickle.load(test_pp)

## Data processing and model training

In [2]:
# Preprocessing
dv_train_X = df_train.drop(['Survived'], axis=1).values
dv_train_y = df_train['Survived'].values
dv_test_X = df_test.values

In [3]:
# Prepare training set
X_train, X_test, y_train, y_test = train_test_split(
    dv_train_X, dv_train_y, test_size=0.3, random_state=1, stratify=dv_train_y);

In [4]:
# Model training
svc_params = {
    'kernel': 'linear', # kernel type
    'C': 1.0 #regularization parameter
}

svc = svm.SVC(**svc_params).fit(X_train, y_train)

# Score

In [5]:
# Test set score
testset_score = svc.score(X_test, y_test)
print("Accuracy with test set: {}".format(round(testset_score,2)))

Accuracy with test set: 0.84


In [6]:
# Cross-validation score
cv_iterations = 5
cv_score = cross_val_score(svc, dv_train_X, dv_train_y, cv=cv_iterations)
print("Accuracy with cross-validation (split size = {}): {} (+/- {})"
      .format(cv_iterations, round(cv_score.mean(),2), round(cv_score.std() * 2,2)))

Accuracy with cross-validation (split size = 5): 0.83 (+/- 0.05)
