In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.metrics import accuracy_score

In [None]:
X = pd.read_csv("x_train.csv")
y = pd.read_csv("y_train.csv")

In [None]:
y = y['family']

In [None]:
y

0       BruteForce
1         Spoofing
2       BruteForce
3             DDoS
4            Recon
           ...    
5471         Mirai
5472      Spoofing
5473         Recon
5474         Mirai
5475           Web
Name: family, Length: 5476, dtype: object

In [None]:
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100],
              'degree': [1, 2, 3],
              'gamma': [0.001, 0.01, 0.1, 1, 10, 'scale', 'auto'],
              'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
              'max_iter': [20000]
              }

In [None]:
param_grid

{'C': [0.001, 0.01, 0.1, 1, 10, 100],
 'degree': [1, 2, 3],
 'gamma': [0.001, 0.01, 0.1, 1, 10, 'scale', 'auto'],
 'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
 'max_iter': [20000]}

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f'The training dataset has {len(X_train)} records.')
print(f'The testing dataset has {len(X_test)} records.')

The training dataset has 4380 records.
The testing dataset has 1096 records.


In [None]:
base_SVC_model = SVC()
base_SVC_model.fit(X_train, y_train)

In [None]:
y_predict = base_SVC_model.predict(X_test)

In [None]:
# Get performance metrics
precision, recall, fscore, support = score(y_test, y_predict)
# Print result
print(f'The precision value for the baseline SVC model is {precision[1]:.4f}')
print(f'The recall value for the baseline SVC model is {recall[1]:.4f}')
print(f'The fscore value for the baseline SVC model is {fscore[1]:.4f}')
print("The fscore macro value for the baseline SVC model is:",f1_score(y_test, y_predict, average='macro'))
print("The fscore micro value for the baseline SVC model is:",f1_score(y_test, y_predict, average='micro'))
print("The fscore weighted value for the baseline SVC model is: ",f1_score(y_test, y_predict, average='weighted'))
print("The accuracy value for the baseline SVC model is:",accuracy_score(y_test,y_predict))

The precision value for the baseline SVC model is 0.3544
The recall value for the baseline SVC model is 0.6733
The fscore value for the baseline SVC model is 0.4644
The fscore macro value for the baseline SVC model is: 0.5922103125332732
The fscore micro value for the baseline SVC model is: 0.5875912408759124
The fscore weighted value for the baseline SVC model is:  0.585617861274322
The accuracy value for the baseline SVC model is: 0.5875912408759124


max_iter is set to 20000 because if not it takes too long to run

In [None]:
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100],
              'degree': [1, 2, 3],
              'gamma': [0.001, 0.01, 0.1, 1, 10, 'scale', 'auto'],
              'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
              'max_iter': [20000]
              }

In [None]:
scoring = ['f1_macro','f1_micro','f1_weighted','accuracy']
refit_metric = 'f1_macro'

In [None]:
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [None]:
grid_search = GridSearchCV(SVC(), param_grid,  cv=kfold, verbose=2, n_jobs=-1, scoring = scoring, refit= refit_metric)
grid_search.fit(X, y)

Fitting 5 folds for each of 504 candidates, totalling 2520 fits


In [None]:
print("Tuned Hyperparameters :", grid_search.best_params_)
print("Best parameters:", grid_search.best_params_)
print(f"Best '{refit_metric}' score:", grid_search.best_score_)
best_model = grid_search.best_estimator_

Tuned Hyperparameters : {'C': 100, 'degree': 1, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': 20000}
Best parameters: {'C': 100, 'degree': 1, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': 20000}
Best 'f1_macro' score: 0.6551787665893827


In [None]:
best_model

In [None]:
y_train_pred = best_model.predict(X)
train_accuracy = accuracy_score(y, y_train_pred)
print(f"Training Accuracy: {train_accuracy}")

Training Accuracy: 0.7218772826880935


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=3a3c41eb-c99a-4899-842d-6de17d10af25' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>