In [1]:
# Quick hack to load local library code
import os

os.chdir(os.path.join(os.getcwd(), ".."))

# Load API key and secret from environment variables
from dotenv import load_dotenv
load_dotenv()

import pandas as pd
import xgboost as xgb

from numpy import argmax
from sklearn.metrics import accuracy_score, precision_recall_curve
from sklearn.model_selection import train_test_split

%matplotlib inline

In [2]:
import validmind as vm

# For test environment use api_host="https://api.dev.vm.validmind.ai/api/v1/tracking"
vm.init(
    # project="cl2r3k1ri000009jweny7ba1g"
    project="cl1jyv16o000809lg98gi9tie"
)

True

In [3]:
df = pd.read_csv("notebooks/datasets/bank_customer_churn.csv")

vm_dataset = vm.init_dataset(
    dataset=df,
    target_column="Exited",
    class_labels={
        "0": "Did not exit",
        "1": "Exited",
    }
)

Pandas dataset detected. Initializing VM Dataset instance...
Inferring dataset types...


In [4]:
dataset_tests = vm.test_plans.TabularDataset(
    dataset=vm_dataset,
)

dataset_tests.run()

Running test plan 'tabular_dataset'...


0it [00:00, ?it/s]

Sending results of test plan execution 'tabular_dataset' to ValidMind...
|-- Running sub test plan - tabular_dataset_description
Running test plan 'tabular_dataset_description'...


  0%|          | 0/3 [00:00<?, ?it/s]

Running DatasetMetadata: dataset_metadata
Running Metric: dataset_description
Running Metric: dataset_correlations
Sending results of test plan execution 'tabular_dataset_description' to ValidMind...
Successfully logged dataset metadata and statistics.
Successfully logged metrics
|-- Running sub test plan - tabular_data_quality
Running test plan 'tabular_data_quality'...


  0%|          | 0/8 [00:00<?, ?it/s]

Running ThresholdTest: class_imbalance
Running ThresholdTest: duplicates
Running ThresholdTest: cardinality
Running ThresholdTest: pearson_correlation
Running ThresholdTest: missing
Running ThresholdTest: skewness
Running ThresholdTest: unique
Running ThresholdTest: zeros
Sending results of test plan execution 'tabular_data_quality' to ValidMind...
Successfully logged test results for test: class_imbalance
Successfully logged test results for test: duplicates
Successfully logged test results for test: cardinality
Successfully logged test results for test: pearson_correlation
Successfully logged test results for test: missing
Successfully logged test results for test: skewness
Successfully logged test results for test: unique
Successfully logged test results for test: zeros


In [5]:
df.drop(["RowNumber", "CustomerId", "Surname", "CreditScore"], axis=1, inplace=True)

genders = {"Male": 0, "Female": 1}
df.replace({"Gender": genders}, inplace=True)

df = pd.concat([df, pd.get_dummies(df["Geography"], prefix="Geography")], axis=1)
df.drop("Geography", axis=1, inplace=True)

train_df, test_df = train_test_split(df, test_size=0.20)

# This guarantees a 60/20/20 split
train_ds, val_ds = train_test_split(train_df, test_size=0.25)

# For training
x_train = train_ds.drop("Exited", axis=1)
y_train = train_ds.loc[:, "Exited"].astype(int)
x_val = val_ds.drop("Exited", axis=1)
y_val = val_ds.loc[:, "Exited"].astype(int)

# For testing
x_test = test_df.drop("Exited", axis=1)
y_test = test_df.loc[:, "Exited"].astype(int)

In [6]:
model = xgb.XGBClassifier(early_stopping_rounds=10)
model.set_params(
    eval_metric=["error", "logloss", "auc"],
)
model.fit(
    x_train,
    y_train,
    eval_set=[(x_train, y_train), (x_val, y_val)],
    verbose=False,
)

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=10, enable_categorical=False,
              eval_metric=['error', 'logloss', 'auc'], gamma=0, gpu_id=-1,
              grow_policy='depthwise', importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_bin=256, max_cat_to_onehot=4, max_delta_step=0, max_depth=6,
              max_leaves=0, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=0,
              num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, ...)

In [7]:
vm_model = vm.init_model(model)
vm_train_ds = vm.init_dataset(dataset=train_ds, type="generic", target_column="Exited")
vm_test_ds = vm.init_dataset(dataset=test_df, type="generic", target_column="Exited")

Pandas dataset detected. Initializing VM Dataset instance...
Inferring dataset types...
Pandas dataset detected. Initializing VM Dataset instance...
Inferring dataset types...


In [8]:
model_metrics = vm.test_plans.SKLearnClassifier(
    model=vm_model,
    train_ds=vm_train_ds,
    test_ds=vm_test_ds,
)

model_metrics.run()

Running test plan 'sklearn_classifier'...
Generating predictions train dataset...
Generating predictions test dataset...


0it [00:00, ?it/s]

Sending results of test plan execution 'sklearn_classifier' to ValidMind...
|-- Running sub test plan - sklearn_classifier_metrics
Running test plan 'sklearn_classifier_metrics'...


  0%|          | 0/13 [00:00<?, ?it/s]

Running ModelMetadata: model_metadata
Running Metric: accuracy
Running Metric: confusion_matrix
Running Metric: f1_score
Running Metric: pfi
Running Metric: pr_curve
Running Metric: precision
Running Metric: recall
Running Metric: roc_auc
Running Metric: roc_curve
Running Metric: csi
Running Metric: psi
Running SHAPGlobalImportance: shap


ntree_limit is deprecated, use `iteration_range` or model slicing instead.


Sending results of test plan execution 'sklearn_classifier_metrics' to ValidMind...
Successfully logged metrics
|-- Running sub test plan - sklearn_classifier_validation
Running test plan 'sklearn_classifier_validation'...


  0%|          | 0/4 [00:00<?, ?it/s]

Running ThresholdTest: accuracy_score
Running ThresholdTest: f1_score
Running ThresholdTest: roc_auc_score
Running ThresholdTest: training_test_degradation
Sending results of test plan execution 'sklearn_classifier_validation' to ValidMind...
Successfully logged test results for test: accuracy_score
Successfully logged test results for test: f1_score
Successfully logged test results for test: roc_auc_score
Successfully logged test results for test: training_test_degradation
