## Environment Preparation

* Load the SDK code from the local package directory
* Load the API key and secret in the .env file

In [1]:
# Quick hack to load local SDK code
import os

os.chdir(os.path.join(os.getcwd(), ".."))

In [2]:
# Load API key and secret from environment variables
from dotenv import load_dotenv
load_dotenv()

True

## ValidMind SDK Introduction

In [3]:
import pandas as pd
import validmind as vm
import xgboost as xgb

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

vm.init(project="cl2r3k1ri000009jweny7ba1g")

True

In [4]:
df = pd.read_csv("notebooks/datasets/bank_customer_churn.csv")

# Temp for now
df.drop(["Surname", "Geography", "Gender"], axis=1, inplace=True)

train_ds, val_ds = train_test_split(df, test_size=0.20)

x_train = train_ds.drop("Exited", axis=1)
x_val = val_ds.drop("Exited", axis=1)
y_train = train_ds.loc[:, "Exited"].astype(int)
y_val = val_ds.loc[:, "Exited"].astype(int)

In [12]:
from visions import typesets

typeset = typesets.VisionsTypeset()
typeset.infer_type(df)

TypeError: __init__() missing 1 required positional argument: 'types'

In [9]:
df.isna().sum()

RowNumber          0
CustomerId         0
CreditScore        0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [5]:
targets = vm.DatasetTargets(
    target_column="Exited",
    class_labels={
        "0": "Did not exit",
        "1": "Exited",
    }
)

vm.log_dataset(train_ds, "training", analyze=True, targets=targets)
vm.log_dataset(val_ds, "validation", analyze=True, targets=targets)

True

In [None]:
xgb_model = xgb.XGBClassifier(early_stopping_rounds=10)
xgb_model.fit(
    x_train,
    y_train,
    eval_set=[(x_val, y_val)],
    verbose=False,
)

In [None]:
y_pred = xgb_model.predict_proba(x_val)[:, -1]
predictions = [round(value) for value in y_pred]
accuracy = accuracy_score(y_val, predictions)

print(f"Accuracy: {accuracy}")

In [None]:
vm.log_model(xgb_model)