# import datasets

In [1]:
import pandas as pd

train_x_fs = pd.read_csv('/kaggle/input/trial2-lendingclub-datasets/train_x_fs.csv')
test_x_fs = pd.read_csv('/kaggle/input/trial2-lendingclub-datasets/test_x_fs.csv')
train_y = pd.read_csv('/kaggle/input/trial2-lendingclub-datasets/train_y.csv')
test_y = pd.read_csv('/kaggle/input/trial2-lendingclub-datasets/test_y.csv')

# Function to evaluate the model

In [2]:
# Following codes will create a function to test the performance of models using the testing dataset
def model_performance(model_name, model_type, test_x, test_y):
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

    # Predict the y value using the model
    y_pred = model_name.predict(test_x)
    # Calculate evaluation metrics using metrics in sklearn
    accuracy = accuracy_score(test_y, y_pred)
    precision = precision_score(test_y, y_pred)
    recall = recall_score(test_y, y_pred)
    f1 = f1_score(test_y, y_pred)

    # Print the evaluation metrics
    print(f'-----{model_type}-----')
    print('Accuracy:', accuracy)
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1 Score:', f1)


# Train SVM model

inspired by https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html#sklearn.svm.LinearSVC

In the selection of LinearSVV is used instead of SVC for the building of SVM model. LinearSVC is almost similar to the SVC with kernel parameter set at 'linear'. LinearSVC is based on liblinear while SVC is based on libsvm It is just that LinearSVC is different than SVC in terms of the default loss function used and the handling of the interecept regularisation. Thus, LinearSVC is found to be more flexible in using penalties and loss function, it can scale better when it comes to sample with large size.


In [3]:
#Import svm model
from sklearn import svm
#Create a svm Classifier
svm_model = svm.LinearSVC(verbose=0, max_iter=1000, random_state=123)
#Train the model using the training datasets
svm_model.fit(train_x_fs, train_y)

  y = column_or_1d(y, warn=True)


In [4]:
# Save the model
import pickle
with open('/kaggle/working/svm_model.pkl','wb') as file:
    pickle.dump(svm_model, file)

In [5]:
model_performance(svm_model,'SVM model',test_x_fs,test_y)

-----SVM model-----
Accuracy: 0.6441068097231375
Precision: 0.6431178791342502
Recall: 0.6475469176365762
F1 Score: 0.6453247990528429


# Fine tuned the SVM model by using random search

In [7]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn import svm

# Define the parameter grid for random search
svm_param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'loss': ['hinge', 'squared_hinge'],
    'penalty': ['l1', 'l2'],
    'max_iter': [1000, 2000, 3000]  
}

# Create the LinearSVC classifier
linear_svm = svm.LinearSVC(random_state=123)

# Create the RandomizedSearchCV object
random_search = RandomizedSearchCV(estimator=linear_svm, param_distributions=svm_param_grid, 
                                   n_iter=100, cv=5, scoring='accuracy', n_jobs=-1, random_state=123)

# Perform the random search
random_search.fit(train_x_fs, train_y)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

In [8]:
# Get the best combination of parameters for the LinearSVC
linearsvm_best_param = random_search.best_params_

In [9]:
# Export the random search results as a DataFrame
svm_random_search_results_df = pd.DataFrame(random_search.cv_results_)

# Save the results to a CSV file
svm_random_search_results_df.to_csv('svm_random_search_results.csv', index=False)

In [10]:
# Now, build the model using best param
svm = svm.LinearSVC(**linearsvm_best_param, random_state=123)
svm_finetuned_model = svm.fit(train_x_fs, train_y)

In [11]:
# Save the finetuned model
import pickle
with open('/kaggle/working/svm_finetuned_model.pkl','wb') as f:
    pickle.dump(svm_finetuned_model,f)

In [12]:
# show model performance
model_performance(svm_finetuned_model,'SVM Fine-Tuned model', test_x_fs, test_y)

-----SVM Fine-Tuned model-----
Accuracy: 0.6441532164502566
Precision: 0.6430663792626983
Recall: 0.6479367377624324
F1 Score: 0.6454923717059639
