# Evaluate the models' performance

## Import the datasets

In [4]:
import os
new_dir = r'C:\Users\'
os.chdir(new_dir)

In [None]:
# Load all the datasets 
import pandas as pd
train_x_fs = pd.read_csv('train_x_fs.csv')
train_y = pd.read_csv('train_y.csv')
test_y = pd.read_csv('test_y.csv')
test_x_fs = pd.read_csv('test_x_fs.csv')
train_x = pd.read_csv('train_x.csv')
test_x = pd.read_csv('test_x.csv')

In [6]:
train_x_fs.shape,train_y.shape,test_y.shape,test_x_fs.shape

((430971, 35), (430971, 1), (107743, 1), (107743, 35))

## Load the trained models

The trained models will be loaded into the environment of the notebook for model evaluation and comparison.

In [None]:
import pickle

# Import fine-tuned logistic regression model
with open('LogisticRegression_model/lr_finetuned_model.pkl','rb') as f:
    lr_finetuned_model = pickle.load(f)

# Import fine-tuned LightGBM model
with open('LightGBM_model/lgbm_finetuned_model.pkl','rb') as f:
    lightgbm_finetuned_model = pickle.load(f)
    
# Import fine-tuned SVM model
with open('SVM_model/svm_finetuned_model.pkl','rb') as f:
    svm_finetuned_model = pickle.load(f)

## Function to evaluate the model

A function to evaluate the performance of models was defined and created in the following codes.

In [17]:
# Following codes will create a function to test the performance of models using the testing dataset
def model_performance(model_name, model_type, test_x, test_y):
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

    # Predict the y value using the model
    y_pred = model_name.predict(test_x)
    # Calculate evaluation metrics using metrics in sklearn
    accuracy = accuracy_score(test_y, y_pred)
    precision = precision_score(test_y, y_pred)
    recall = recall_score(test_y, y_pred)
    f1 = f1_score(test_y, y_pred)

    # Make sure that the metrics to output result in four significant figures 
    accuracy = format(accuracy, '.4f')
    precision = format(precision, '.4f')
    recall = format(recall, '.4f')
    f1 = format(f1, '.4f')

    # Print the evaluation metrics
    print(f'-----{model_type}-----')
    print('Accuracy:', accuracy)
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1 Score:', f1)

    # And also return the evaluation metrics as a dictionary
    return {
        'Model Type': model_type,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1
        }


# Evaluate the models using the metric in scikit learn package

In [19]:
model_performance(lr_finetuned_model,'Logistic Regression model', test_x_fs, test_y)
print('\n')
model_performance(lightgbm_finetuned_model,'LightGBM model', test_x_fs, test_y)
print('\n')
model_performance(svm_finetuned_model,'Support vector Machine model', test_x_fs, test_y)

-----Logistic Regression model-----
Accuracy: 0.6439
Precision: 0.6426
Recall: 0.6485
F1 Score: 0.6455


-----LightGBM model-----
Accuracy: 0.6526
Precision: 0.6488
Recall: 0.6655
F1 Score: 0.6570


-----Support vector Machine model-----
Accuracy: 0.6442
Precision: 0.6431
Recall: 0.6479
F1 Score: 0.6455


{'Model Type': 'Support vector Machine model',
 'Accuracy': '0.6442',
 'Precision': '0.6431',
 'Recall': '0.6479',
 'F1 Score': '0.6455'}

In [20]:
# Create a dictiobary and store model performances into dictionary
model_evaluation_dict = []

# Now add the result of each model to the dict
# Calculate and store performance for each model
model_evaluation_dict.append(model_performance(lr_finetuned_model, 'Logistic Regression', test_x_fs, test_y))
model_evaluation_dict.append(model_performance(lightgbm_finetuned_model, 'LightGBM', test_x_fs, test_y))
model_evaluation_dict.append(model_performance(svm_finetuned_model, 'Support Vector Machine', test_x_fs, test_y))

-----Logistic Regression-----
Accuracy: 0.6439
Precision: 0.6426
Recall: 0.6485
F1 Score: 0.6455
-----LightGBM-----
Accuracy: 0.6526
Precision: 0.6488
Recall: 0.6655
F1 Score: 0.6570
-----Support Vector Machine-----
Accuracy: 0.6442
Precision: 0.6431
Recall: 0.6479
F1 Score: 0.6455


In [22]:
model_evaluation_result = pd.DataFrame(model_evaluation_dict)
model_evaluation_result.to_csv('mdels_performance.csv',index = False)