## Environment Preparation

* Load the SDK code from the local package directory
* Load the API key and secret in the .env file

In [1]:
# Quick hack to load local SDK code
import os

os.chdir(os.path.join(os.getcwd(), ".."))

In [2]:
# Load API key and secret from environment variables
from dotenv import load_dotenv
load_dotenv()

True

## ValidMind SDK Introduction

In [3]:
import pandas as pd
import xgboost as xgb

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [4]:
# Initialize ValidMind SDK
import validmind as vm

vm.init(project="cl4iqmeum0000immms7bola4c")

True

In [5]:
df = pd.read_csv("notebooks/datasets/bank_customer_churn.csv")

targets = vm.DatasetTargets(
    target_column="Exited",
    class_labels={
        "0": "Did not exit",
        "1": "Exited",
    }
)

vm.log_dataset(df, "training", analyze=True, targets=targets)

True

In [6]:
# Drop this until we make them one hot encoded
df.drop(["Surname", "Geography", "Gender"], axis=1, inplace=True)

train_ds, val_ds = train_test_split(df, test_size=0.20)

x_train = train_ds.drop("Exited", axis=1)
x_val = val_ds.drop("Exited", axis=1)
y_train = train_ds.loc[:, "Exited"].astype(int)
y_val = val_ds.loc[:, "Exited"].astype(int)

In [7]:
xgb_model = xgb.XGBClassifier(early_stopping_rounds=10)
xgb_model.fit(
    x_train,
    y_train,
    eval_set=[(x_train, y_train), (x_val, y_val)],
    eval_metric=["error", "logloss", "auc"],
    verbose=False,
)



XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=10, enable_categorical=False,
              eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
              importance_type=None, interaction_constraints='',
              learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
              missing=nan, monotone_constraints='()', n_estimators=100,
              n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, ...)

In [8]:
y_pred = xgb_model.predict_proba(x_val)[:, -1]
predictions = [round(value) for value in y_pred]
accuracy = accuracy_score(y_val, predictions)

print(f"Accuracy: {accuracy}")

Accuracy: 0.8525


In [9]:
vm.log_model(xgb_model)

True

In [10]:
vm.log_training_metrics(xgb_model, x_train, y_train)

Successfully logged training metrics


True