In [5]:
"""
Python script for training a model version
"""
# Core Packages
import os
import json

# Third Party
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold, cross_validate, cross_val_score
from sklearn.calibration import calibration_curve
from sklearn import metrics
import utils.credit as utils

# Bedrock
from bedrock_client.bedrock.analyzer.model_analyzer import ModelAnalyzer
from bedrock_client.bedrock.analyzer import ModelTypes
from bedrock_client.bedrock.api import BedrockApi
from bedrock_client.bedrock.metrics.service import ModelMonitoringService
import pickle
import logging

In [22]:
# Environmental params for Bedrock HCL
DATA_DIR_LOCAL = "data/creditdata"
SEED = 0
TH = 0.5
LR_REGULARIZER = 1e-1
RF_N_ESTIMATORS = 100
CB_ITERATIONS = 100

OUTPUT_MODEL_PATH = "/artefact/model.pkl"
FEATURE_COLS_PATH = "/artefact/feature_cols.pkl"

In [25]:
ENV_PARAMS = {
    "DATA_DIR_LOCAL": DATA_DIR_LOCAL, 
    "SEED": SEED, 
    "TH": TH,
    "LR_REGULARIZER": LR_REGULARIZER,
    "RF_N_ESTIMATORS": RF_N_ESTIMATORS,
    "CB_ITERATIONS": CB_ITERATIONS
}

In [8]:
# ---------------------------------
# Bedrock functions
# ---------------------------------

def compute_log_metrics(model, x_train, 
                        x_test, y_test, 
                        best_th=0.5,
                        model_name="tree_model", 
                        model_type=ModelTypes.TREE):
    """Compute and log metrics."""
    test_prob = model.predict_proba(x_test)[:, 1]
    test_pred = np.where(test_prob > best_th, 1, 0)

    acc = metrics.accuracy_score(y_test, test_pred)
    precision = metrics.precision_score(y_test, test_pred)
    recall = metrics.recall_score(y_test, test_pred)
    f1_score = metrics.f1_score(y_test, test_pred)
    roc_auc = metrics.roc_auc_score(y_test, test_prob)
    avg_prc = metrics.average_precision_score(y_test, test_prob)
    print("Evaluation\n"
          f"  Accuracy          = {acc:.4f}\n"
          f"  Precision         = {precision:.4f}\n"
          f"  Recall            = {recall:.4f}\n"
          f"  F1 score          = {f1_score:.4f}\n"
          f"  ROC AUC           = {roc_auc:.4f}\n"
          f"  Average precision = {avg_prc:.4f}")

    # --- Bedrock-native Integrations ---
    # Bedrock Logger: captures model metrics
    bedrock = BedrockApi(logging.getLogger(__name__))

    # Log into a chart
    bedrock.log_chart_data(y_test.astype(int).tolist(),
                           test_prob.flatten().tolist())

    # Log key-value pairs
    bedrock.log_metric("Accuracy", acc)
    bedrock.log_metric("Precision", precision)
    bedrock.log_metric("Recall", recall)
    bedrock.log_metric("F1 score", f1_score)
    bedrock.log_metric("ROC AUC", roc_auc)
    bedrock.log_metric("Avg precision", avg_prc)

    # Bedrock Model Analyzer: generates model explainability and fairness metrics
    # Analyzer (optional): generate explainability metrics
    analyzer = ModelAnalyzer(model[1], model_name=model_name, model_type=model_type)\
                    .train_features(x_train)\
                    .test_features(x_test)
    
    # Analyzer (optional): generate fairness metrics
    analyzer.fairness_config(CONFIG_FAI)\
        .test_labels(y_test)\
        .test_inference(test_pred)
    
    # Return the 4 metrics
    return analyzer.analyze()

In [13]:
# Extraneous columns (as might be determined through feature selection)
drop_cols = ['ID']

# --- Data ETL ---
# Load into pandas dataframes
# x_<name> : features
# y_<name> : labels
x_train, y_train = utils.load_dataset(os.path.join(DATA_DIR_LOCAL, 'creditdata_train_v2.csv'), drop_columns=drop_cols)
x_test, y_test = utils.load_dataset(os.path.join(DATA_DIR_LOCAL, 'creditdata_test_v2.csv'), drop_columns=drop_cols)


# --- Candidate Binary Classification Algos ---
# MODEL 1: LOGISTIC REGRESSION
# Use best parameters from a model selection and threshold tuning process
model = utils.train_log_reg_model(x_train, y_train, seed=SEED, C=LR_REGULARIZER, upsample=True, verbose=True)
model_name = "logreg_model"
model_type = ModelTypes.LINEAR

# MODEL 2: RANDOM FOREST
# Uses default threshold of 0.5 and model parameters
# model = utils.train_rf_model(x_train, y_train, seed=SEED, upsample=True, verbose=True)
# model_name = "randomforest_model"
# model_type = ModelTypes.TREE

# MODEL 3: CATBOOST
# Uses default threshold of 0.5 and model parameters
# model = utils.train_catboost_model(x_train, y_train, seed=SEED, upsample=True, verbose=True)
# model_name = "catboost_model"
# model_type = ModelTypes.TREE


# --- Bedrock-native Integrations ---
# Bedrock Model Analyzer: generated values
# (
#     shap_values, 
#     base_shap_values, 
#     global_explainability, 
#     fairness_metrics,
# ) = compute_log_metrics(model=model, x_train=x_train, 
#                         x_test=x_test, y_test=y_test, 
#                         best_th=TH,
#                         model_name=model_name, model_type=model_type)

# IMPORTANT: Saving the Model Artefact  Bedrock
# with open(OUTPUT_MODEL_PATH, "wb") as model_file:
#     pickle.dump(model, model_file)

# Bedrock Model Monitoring: pre-requisite for monitoring concept drift
# Prepare the inference probabilities
# train_prob = model.predict_proba(x_train)[:, 1]
# train_pred = np.where(train_prob > TH, 1, 0)

# This step initialises the distribution from model training     
# ModelMonitoringService.export_text(
#     features=x_train.iteritems(),
#     inference=train_prob.tolist(),
# )
# --- End of Bedrock-native Integrations ---

upsampling...
scaling...
fitting...
C: 0.1
chaining pipeline...
done.


# Logging an Experiment over REST

In [15]:
import os
import bdrk
import logging
import sys
import pickle
from sklearn import metrics

# You can customize the logging logic here
_logger = logging.getLogger(bdrk.utils.vars.Constants.MAIN_LOG)
_logger.setLevel(logging.INFO)
if not _logger.handlers:
    _logger.addHandler(logging.StreamHandler(stream=sys.stdout))
    
# Visit https://bedrock.basis-ai.com/setting/token/ to get the personal access token.
os.environ["BEDROCK_API_TOKEN"]

'eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJ0b2tlbl9pZCI6ImNkNWMwOTgwLWE3M2UtNDA5Ni05ZTc0LTFlZTBiY2YzNTg5YiIsInVzZXJfaWQiOiI5NmU4Y2NmMS0zODFkLTQ2Y2ItYmU2MC1lYjQxODlmYmYxNGIiLCJpYXQiOjE2MjYyNDM4NDMsIm5iZiI6MTYyNjI0Mzg0M30.gb-60JgiNY8eXfYZ6iGQKNHVy3Cj07x9Db_LQriFaPLAW7AXfUFGLgVK2-j0jxM7dOb5ne_7WOEMpHpObiJstmZSMu_CZphPjI8vwmYYKlbemVE4N-_0es9zNolErvHBBcfRPbUzF_GFxt613KV560PoD-PEkh-niBStPOY2hvMdYpaBtbnEQArabBTS8M-pnZWYCBRovILifPryN8EylXycSiLKADcQYqEUQ7sOQxYS4pkqP-BBEZepoNF1GU69Z8MYkT2cOCuBEpMpZkRYzr0bgmAhje-BMZO8mFjm9vnUDb3XX-vP6hJai6cgCVjazUTUJuRISMS7pqdZjWvZwwAyvsMvOaQKf17mXiW9QCpQ7fOv9evAFUz5VmjiP9SqOVTzpEnMfggYXMxKrrvhaOBHQRCd4eLXHa8C2L6SuA4PEpg1l_PFCDjjkC7gor0MtonVWSfvWAQHboJ1nIIenW0rlfDp0YglVlqnFzW_qi0XBU-vdRE8_msp5bM5L1w_XB8ONMS_VXfde7i2zgqdt02ENXtZbr6BIrDPLqNSUX8gozCDEoQGkF1y3OTEp_qH_yaU9GKcj1i44uZ7y1C2Vg67izFiKEp1WWbw8rKp4yK7ktMAMHOA_NHNgckb6AmBx37BS1dU4um7GJBOh7vL9pUcGQPvcZRUpeCbhOtYGvM'

In [16]:
PROJECT_ID = "victor-sandbox"
PIPELINE_ID = "rest-credit-scoring"
ENVIRONMENT_ID = "sandbox-aws-production"

In [None]:
# # Initialize the project
# bdrk.init(project_id=PROJECT_ID)

# with bdrk.start_run(pipeline_id=PIPELINE_ID, environment_id=ENVIRONMENT_ID):
#     # Log related parameters
#     bdrk.log_params({"LR": LR, "NUM_LEAVES": NUM_LEAVES, "N_ESTIMATORS": N_ESTIMATORS})
    
#     # Log a single metric
#     bdrk.log_metric(key="Feature Count", value=len(FEATURE_COLS))
    
#     # Log metrics for each training step
#     evals = clf.evals_result_["valid_0"]
#     for iteration in range(N_ESTIMATORS):
#         bdrk.log_metrics(metrics={
#             "Training binary_logloss": evals["binary_logloss"][iteration],
#             "Training roc_auc": evals["roc_auc"][iteration]
#         }, x=iteration)
        
    
#     # Log final metrics in batch
#     y_pred = (y_probs > 0.5).astype(int)
#     bdrk.log_metrics(metrics={
#         "Accuracy": metrics.accuracy_score(y_test, y_pred),
#         "Precision": metrics.precision_score(y_test, y_pred),
#         "Recall": metrics.recall_score(y_test, y_pred),
#         "F1 score": metrics.f1_score(y_test, y_pred),
#         "Roc auc": metrics.roc_auc_score(y_test, y_pred),
#         "Avg precision": metrics.average_precision_score(y_test, y_pred),
#     })
    
#     # Log the binary classifier from the predicted data
#     bdrk.log_binary_classifier_metrics(actual=y_test, probability=y_probs)

#     # Saving and log the model
#     with open(OUTPUT_MODEL, "wb") as model_file:
#         pickle.dump(clf, model_file)
#     bdrk.log_model(OUTPUT_MODEL)

In [31]:
def compute_log_notebook_metrics(model, x_train, 
                                x_test, y_test, 
                                best_th=0.5,
                                model_name="tree_model", 
                                model_type=ModelTypes.TREE,
                                project_id=None,
                                pipeline_id=None,
                                environment_id=None,
                                env_params=None):
    
    bdrk.init(project_id=PROJECT_ID)
    with bdrk.start_run(pipeline_id=PIPELINE_ID, environment_id=ENVIRONMENT_ID):
    
        bdrk.log_params(env_params)
        
        """Compute and log metrics."""
        test_prob = model.predict_proba(x_test)[:, 1]
        test_pred = np.where(test_prob > best_th, 1, 0)

        acc = metrics.accuracy_score(y_test, test_pred)
        precision = metrics.precision_score(y_test, test_pred)
        recall = metrics.recall_score(y_test, test_pred)
        f1_score = metrics.f1_score(y_test, test_pred)
        roc_auc = metrics.roc_auc_score(y_test, test_prob)
        avg_prc = metrics.average_precision_score(y_test, test_prob)
        print("Evaluation\n"
              f"  Accuracy          = {acc:.4f}\n"
              f"  Precision         = {precision:.4f}\n"
              f"  Recall            = {recall:.4f}\n"
              f"  F1 score          = {f1_score:.4f}\n"
              f"  ROC AUC           = {roc_auc:.4f}\n"
              f"  Average precision = {avg_prc:.4f}")

        # --- Bedrock-native Integrations ---
        # Bedrock Logger: captures model metrics

        # Log the binary classifier from the predicted data
#         bdrk.log_binary_classifier_metrics(actual=y_test, probability=y_pred)

        # Log into a chart
        bdrk.log_binary_classifier_metrics(y_test.astype(int).tolist(),
                                           test_prob.flatten().tolist())

        # Log key-value pairs
        bdrk.log_metric("Accuracy", acc)
        bdrk.log_metric("Precision", precision)
        bdrk.log_metric("Recall", recall)
        bdrk.log_metric("F1 score", f1_score)
        bdrk.log_metric("ROC AUC", roc_auc)
        bdrk.log_metric("Avg precision", avg_prc)

        # Saving and log the model
        with open(OUTPUT_MODEL_PATH, "wb") as model_file:
            pickle.dump(model, model_file)
        bdrk.log_model(OUTPUT_MODEL_PATH)


In [32]:
compute_log_notebook_metrics(model=model, 
                             x_train=x_train, 
                             x_test=x_test, 
                             y_test=y_test, 
                             best_th=TH,
                             model_name=model_name, 
                             model_type=model_type,
                             project_id=PROJECT_ID,
                             pipeline_id=PIPELINE_ID,
                             environment_id=ENVIRONMENT_ID,
                             env_params=ENV_PARAMS)

BedrockClient initialized on project=victor-sandbox


INFO:bdrk:BedrockClient initialized on project=victor-sandbox


Run started: rest-credit-scoring-run7


INFO:bdrk:Run started: rest-credit-scoring-run7


Params updated: {'DATA_DIR_LOCAL': 'data/creditdata', 'SEED': '0', 'TH': '0.5', 'LR_REGULARIZER': '0.1', 'RF_N_ESTIMATORS': '100', 'CB_ITERATIONS': '100'}


INFO:bdrk:Params updated: {'DATA_DIR_LOCAL': 'data/creditdata', 'SEED': '0', 'TH': '0.5', 'LR_REGULARIZER': '0.1', 'RF_N_ESTIMATORS': '100', 'CB_ITERATIONS': '100'}


Evaluation
  Accuracy          = 0.6696
  Precision         = 0.8758
  Recall            = 0.6709
  F1 score          = 0.7598
  ROC AUC           = 0.7258
  Average precision = 0.8787
Confusion matrix logged


INFO:bdrk:Confusion matrix logged


New metrics logged: with x=0, metrics={'Accuracy': 0.6696}


INFO:bdrk:New metrics logged: with x=0, metrics={'Accuracy': 0.6696}


New metrics logged: with x=0, metrics={'Precision': 0.8757541899441341}


INFO:bdrk:New metrics logged: with x=0, metrics={'Precision': 0.8757541899441341}


New metrics logged: with x=0, metrics={'Recall': 0.6709467556925184}


INFO:bdrk:New metrics logged: with x=0, metrics={'Recall': 0.6709467556925184}


New metrics logged: with x=0, metrics={'F1 score': 0.7597906165180303}


INFO:bdrk:New metrics logged: with x=0, metrics={'F1 score': 0.7597906165180303}


New metrics logged: with x=0, metrics={'ROC AUC': 0.7257798817549945}


INFO:bdrk:New metrics logged: with x=0, metrics={'ROC AUC': 0.7257798817549945}


New metrics logged: with x=0, metrics={'Avg precision': 0.8787462755537497}


INFO:bdrk:New metrics logged: with x=0, metrics={'Avg precision': 0.8787462755537497}


Model logged: /artefact/model.pkl


INFO:bdrk:Model logged: /artefact/model.pkl


Model version created: rest-credit-scoring-v3


INFO:bdrk:Model version created: rest-credit-scoring-v3


Run Succeeded: rest-credit-scoring-run7


INFO:bdrk:Run Succeeded: rest-credit-scoring-run7


Run exitted


INFO:bdrk:Run exitted
