# Preparation 

In [4]:
import math
# Example points in 3-dimensional space...
x = (5, 6, 7)
y = (8, 9, 9)
distance = math.sqrt(sum([(a - b) ** 2 for a, b in zip(x, y)]))
print("Euclidean distance from x to y: ",distance)

Euclidean distance from x to y:  4.69041575982343


# SVM with Grid Search Method

In [1]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd 
import numpy as np

In [3]:
df = pd.read_csv('titanic_clean1.csv')

In [4]:
x = df.drop(['Survived', 'Unnamed: 0'], axis =1)
y = df['Survived']

In [18]:
X_train, X_test, y_train, y_test = train_test_split(x, np.ravel(y),test_size = 0.30, random_state = 22)

In [19]:
# mdelsvm = SVC(kernel='rbf', random_state = 1)
model = SVC() # model = SVC(C=1000, gamma=0.001, kernel='rbf')
model.fit(X_train, y_train)

SVC()

In [20]:
predictions = model.predict(X_test)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.58      0.94      0.72       219
           1       0.65      0.14      0.23       174

    accuracy                           0.59       393
   macro avg       0.61      0.54      0.47       393
weighted avg       0.61      0.59      0.50       393



In [8]:
from sklearn.model_selection import GridSearchCV

In [9]:
space = dict()
space['kernel'] = ['rbf']
space['gamma'] = [1, 0.1, 0.01, 0.001, 0.0001]
space['C'] = [0.1, 1, 10, 100, 1000]

In [10]:
search = GridSearchCV(model, space, scoring='accuracy', n_jobs=-1)

In [11]:
result = search.fit(x, y)
# summarize result
print('Best Score: %s' % result.best_score_)
print('Best Hyperparameters: %s' % result.best_params_)

Best Score: 0.7049896171507122
Best Hyperparameters: {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}


# Building model from scratch

In [364]:
import numpy as np
np.random.seed(0)

d1 = np.random.randint(100, size=(1,100))
d2 = np.random.randint(200, size=(1,100))
d3 = np.random.randint(300, size=(1,100))

tc = np.random.randint(2, size=(1,100)).transpose()

In [365]:
d1t = d1.transpose()
d2t = d2.transpose()
d3t = d3.transpose()

In [366]:
d1

array([[44, 47, 64, 67, 67,  9, 83, 21, 36, 87, 70, 88, 88, 12, 58, 65,
        39, 87, 46, 88, 81, 37, 25, 77, 72,  9, 20, 80, 69, 79, 47, 64,
        82, 99, 88, 49, 29, 19, 19, 14, 39, 32, 65,  9, 57, 32, 31, 74,
        23, 35, 75, 55, 28, 34,  0,  0, 36, 53,  5, 38, 17, 79,  4, 42,
        58, 31,  1, 65, 41, 57, 35, 11, 46, 82, 91,  0, 14, 99, 53, 12,
        42, 84, 75, 68,  6, 68, 47,  3, 76, 52, 78, 15, 20, 99, 58, 23,
        79, 13, 85, 48]])

In [367]:
d1.shape, d1t.shape, tc.shape

((1, 100), (100, 1), (100, 1))

In [368]:
concat = np.concatenate((d1t, d2t, d3t), axis=1)

In [369]:
concat.shape

(100, 3)

In [370]:
df = pd.DataFrame(concat, columns=['a', 'b', 'c'])

In [371]:
df

Unnamed: 0,a,b,c
0,44,49,117
1,47,69,83
2,64,169,161
3,67,163,228
4,67,192,251
...,...,...,...
95,23,152,111
96,79,157,91
97,13,149,39
98,85,110,150


In [372]:
df['tc'] = tc

In [373]:
df

Unnamed: 0,a,b,c,tc
0,44,49,117,0
1,47,69,83,1
2,64,169,161,0
3,67,163,228,0
4,67,192,251,1
...,...,...,...,...
95,23,152,111,0
96,79,157,91,1
97,13,149,39,0
98,85,110,150,1


In [374]:
xt = df.drop(['tc'], axis =1)
yt = df['tc']

In [375]:
trainX, testX, trainY, testY = train_test_split(xt, np.ravel(yt),test_size = 0.30, random_state = 0)

In [376]:
# mdelsvm = SVC(kernel='rbf', random_state = 1)
model = SVC() # model = SVC(C=1000, gamma=0.001, kernel='rbf')
model.fit(trainX, trainY)

SVC()

In [377]:
predictionS = model.predict(testX)
print(classification_report(testY, predictionS))

              precision    recall  f1-score   support

           0       0.36      0.25      0.30        16
           1       0.37      0.50      0.42        14

    accuracy                           0.37        30
   macro avg       0.37      0.38      0.36        30
weighted avg       0.37      0.37      0.36        30



In [378]:
result = search.fit(xt, yt)
# summarize result
print('Best Score: %s' % result.best_score_)
print('Best Hyperparameters: %s' % result.best_params_)

Best Score: 0.5399999999999999
Best Hyperparameters: {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
