### Import needed packages

In [1]:
import pickle

import numpy as np
import pandas as pd

from sklearn.metrics import (f1_score, precision_score, recall_score, roc_auc_score, accuracy_score, confusion_matrix)
from sklearn.calibration import CalibratedClassifierCV, calibration_curve

In [2]:
try:
    test_data_path = os.environ['TEST_SPLIT']
except:
    test_data_path = 'test.csv'
    
test_data = pd.read_csv(test_data_path, index_col=[0])

In [3]:
#Read the trained models
model = pickle.load(open('best_model.pkl', 'rb'))

#Read the preprocessing flow parameters
prep = pickle.load(open('prep_parameters.pkl', 'rb'))

#Import the prep method 
if prep['Imputation']:
    imputer = pickle.load(open('prep_parameters.pkl', 'rb'))

### Preprocess the holdout set

In [4]:
X = test_data.dropna()
y = np.round(pd.DataFrame(X['SeriousDlqin2yrs']))
X = X.drop('SeriousDlqin2yrs', axis=1)

In [5]:
pred_proba = model.predict(X)

In [6]:
recall = recall_score(y, pred_proba)
roc_auc = roc_auc_score(y, pred_proba)
f1 = f1_score(y, pred_proba)
precision = precision_score(y, pred_proba)
accuracy = accuracy_score(y, pred_proba)
print(f'Roc_auc: {roc_auc}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'Accuracy: {accuracy}')
print(f'F1 Score: {f1}')

Roc_auc: 0.9223712079895398
Precision: 0.6146234194612424
Recall: 0.8858954041204438
Accuracy: 0.9538150415391342
F1 Score: 0.7257383966244726


## Ouputs

In [7]:
import json

metrics = {
'metrics': [
    {
        'name': 'f1-score',
        'numberValue':  f1,
        'format': 'PERCENTAGE'
    },
    {
        'name': 'recall-score',
        'numberValue':  recall,
        'format': 'PERCENTAGE'       
    },
    {
        'name': 'roc-auc-score',
        'numberValue':  roc_auc,
        'format': 'PERCENTAGE'       
    },
    {
        'name': 'precision-score',
        'numberValue':  precision,
        'format': 'PERCENTAGE'       
    },
    {
        'name': 'wdata-imp-strategy',
        'numberValue': prep['Imputation'],
        'format': 'RAW'
    },
    {
        'name': 'wdata-aug-strategy',
        'numberValue': prep['Balancing'],
        'format': 'RAW'
    },
]
}

with open('mlpipeline-metrics.json', 'w') as f:
    json.dump(metrics, f)