## Importing Required Libraries

In [2]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, recall_score, make_scorer
from sklearn.model_selection import GridSearchCV
import joblib

## Loading the Dataset

In [6]:
train = pd.read_csv("./dataset/train.csv")
test = pd.read_csv("./dataset/test.csv")

## Splitting Features and Target

In [9]:
X_train = train.iloc[:, :-1]
y_train = train.iloc[:, -1]

In [11]:
X_test = test.iloc[:, :-1]
y_test = test.iloc[:, -1]

## Grid Search for Hyperparameter Tuning

In [113]:
grid = {
    'gamma' : [0.01, 0.1, 1, 10, 100], 
    'C' : [0.01, 0.1, 1, 10 ,100]
}

In [122]:
#creating a custom scorer that ensures a balance between accuracy and recall
def custom_scorer(y_test, y_hat):
    accuracy = accuracy_score(y_test, y_hat),
    recall = recall_score(y_test, y_hat, average = 'weighted')
    score = (accuracy + recall) / 2
    return score

scorer = make_scorer(custom_scorer)

In [127]:
grid_search = GridSearchCV(SVC(kernel = 'rbf'),
                          param_grid = grid, 
                           scoring = scorer, 
                           cv = 10)

## Training the Models

In [130]:
grid_search.fit(X_train, y_train)

In [131]:
grid_search.best_params_

{'C': 1, 'gamma': 0.1}

## Evaluation on Test Data

In [135]:
svm_model = grid_search.best_estimator_

In [136]:
y_hat = svm_model.predict(X_test)
print('Accuracy:', accuracy_score(y_test, y_hat))
print('Recall:', recall_score(y_test, y_hat))

Accuracy: 0.8947368421052632
Recall: 0.9345794392523364


## Saving Model to File

In [138]:
joblib.dump(svm_model, './models/svm_model.joblib')

['./models/svm_model.joblib']