# Grid Search with SVM

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import random
import numpy as np
import matplotlib.axes as axes

from sklearn.metrics import (
    confusion_matrix,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score
)
from sklearn.preprocessing import StandardScaler

import torch
import random

In [2]:
seed = 42

random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
np.random.seed(seed)

In [3]:
data = pd.read_csv('./data/dataset.csv', sep=',')

In [4]:
# remove nan values
data = data.dropna()
# shuffle the data for the splitting
data = data.sample(frac=1)

list_of_classes = sorted(data['Class'].unique())
classes_to_idx = {list_of_classes[i]: i for i in range(len(list_of_classes))}
nb_classes = len(list_of_classes)
nb_rows = data.shape[0]
train_split = 0.8
val_split = 0.0

train_data = data.iloc[:int(nb_rows*train_split), :]
val_data = data.iloc[int(nb_rows*train_split):int(nb_rows*(train_split+val_split)), :]
test_data = data.iloc[int(nb_rows*(train_split+val_split)):, :]


In [5]:
# Get the train, validation and test data
x_train = train_data.drop(columns=['Class']).to_numpy()
y_train = [ classes_to_idx[classe] for classe in train_data['Class'].to_list() ]
x_val = val_data.drop(columns=['Class']).to_numpy()
y_val = [ classes_to_idx[classe] for classe in val_data['Class'].to_list() ]
x_test = test_data.drop(columns=['Class']).to_numpy()
y_test = [ classes_to_idx[classe] for classe in test_data['Class'].to_list() ]

x_train_scaled = None
x_val_scaled = None
x_test_scaled = None

config = {  'dataset': {
                'normalize': False,
                'normalize_type': 'scale',
            }
        }

# Apply the transformations to the data before fitting the selected model
if config['dataset']['normalize']:
    # Normalize the data
    # here we do not normalize
    if config['dataset']['normalize_type'] == 'scale':
        # Scale the data
        scaler = StandardScaler()
        x_train_scaled = scaler.fit_transform(x_train)
        if len(x_val) > 0:
            x_val_scaled = scaler.transform(x_val)
        x_test_scaled = scaler.transform(x_test)

# find peaks
from scipy.signal import find_peaks

def find_peaks_in_data(data):
    peaks = []
    max_len = 40
    for i in range(data.shape[0]):
        p, _ = find_peaks(data[i, :], height=200, distance=20)

        features = []
        
        for j in range(len(p)):
            features.append( ( p[j], data[i, p[j]]) )

        max_len = max(max_len, len(p))
        peaks.append(features)

    for i in range(len(peaks)):
        for j in range(len(peaks[i]), max_len):
            peaks[i].append( (0, 0) )
        
    return np.asfarray(peaks)

# x_train = find_peaks_in_data(x_train)
# #peaks_val = find_peaks_in_data(x_val)
# x_test = find_peaks_in_data(x_test)


print(x_train.shape)


(2560, 800)


In [6]:

# import support vector classifier 
# "Support Vector Classifier"
from sklearn.svm import SVC  
from sklearn.model_selection import GridSearchCV
clf = SVC(kernel='rbf', C=1, gamma='auto')

# defining parameter range
clf.fit(x_train, y_train)

y_pred = clf.predict(x_test)

In [7]:

# import support vector classifier 
# "Support Vector Classifier"
from sklearn.svm import SVC  
from sklearn.model_selection import GridSearchCV
clf = SVC() 

# Define the hyperparameters and their potential values
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'kernel': ['linear', 'rbf', 'poly'],  # Kernel type
    'gamma': [0.01, 0.1, 1, 'auto'],  # Kernel coefficient (for 'rbf' and 'poly')
    'degree': [2, 3, 4]  # Degree of the polynomial kernel (for 'poly')
}

# Perform grid search with cross-validation
grid_search = GridSearchCV(estimator=clf, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=4)
grid_search.fit(x_train if x_train_scaled is None else x_train_scaled, y_train)

# Get the best hyperparameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

# Evaluate the model with the best hyperparameters on the test set
best_svm_model = grid_search.best_estimator_

Best Hyperparameters: {'C': 0.1, 'degree': 2, 'gamma': 0.01, 'kernel': 'poly'}


In [8]:
# fitting x samples and y classes 
best_svm_model.fit(x_train if x_train_scaled is None else x_train_scaled, y_train) 
y_pred = best_svm_model.predict(x_test if x_test_scaled is None else x_test_scaled)

In [9]:
_f1_score = f1_score(y_test, y_pred, average ="weighted")
print(_f1_score)

_accuracy_score = accuracy_score(y_test, y_pred.tolist())
print(_accuracy_score)

conf_matrix = confusion_matrix(y_test, y_pred.tolist())
conf_matrix

0.9082281857522945
0.9079563182527302


array([[149,  12,  10,   1],
       [ 13, 138,   1,   2],
       [ 15,   2, 127,   0],
       [  2,   0,   1, 168]], dtype=int64)