# Running Individual Tests

This notebook shows how to run individual metrics or thresholds tests, and how to pass custom parameters to them.

In [1]:
%load_ext dotenv
%dotenv dev.env

import pandas as pd
import xgboost as xgb

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

%matplotlib inline

In [2]:
import validmind as vm

vm.init(
  api_host = "http://localhost:3000/api/v1/tracking",
  project = "clh0yyhg700825x8h4ocp5i3u"
)

True

## Train a Model for Testing

We train a simple customer churn model for our test.

In [3]:
df = pd.read_csv("./datasets/bank_customer_churn.csv")

In [4]:
df.drop(["RowNumber", "CustomerId", "Surname", "CreditScore"], axis=1, inplace=True)

genders = {"Male": 0, "Female": 1}
df.replace({"Gender": genders}, inplace=True)

df = pd.concat([df, pd.get_dummies(df["Geography"], prefix="Geography")], axis=1)
df.drop("Geography", axis=1, inplace=True)

train_df, test_df = train_test_split(df, test_size=0.20)

# This guarantees a 60/20/20 split
train_ds, val_ds = train_test_split(train_df, test_size=0.25)

# For training
x_train = train_ds.drop("Exited", axis=1)
y_train = train_ds.loc[:, "Exited"].astype(int)
x_val = val_ds.drop("Exited", axis=1)
y_val = val_ds.loc[:, "Exited"].astype(int)

# For testing
x_test = test_df.drop("Exited", axis=1)
y_test = test_df.loc[:, "Exited"].astype(int)

In [5]:
model = xgb.XGBClassifier(early_stopping_rounds=10)
model.set_params(
    eval_metric=["error", "logloss", "auc"],
)
model.fit(
    x_train,
    y_train,
    eval_set=[(x_train, y_train), (x_val, y_val)],
    verbose=False,
)

In [6]:
vm_train_ds = vm.init_dataset(
    dataset=train_df,
    type="generic",
    target_column="Exited"
)

vm_test_ds = vm.init_dataset(
    dataset=test_df,
    type="generic",
    target_column="Exited"
)

vm_model = vm.init_model(
    model,
    train_ds=vm_train_ds,
    test_ds=vm_test_ds,
)

Pandas dataset detected. Initializing VM Dataset instance...
Inferring dataset types...
Pandas dataset detected. Initializing VM Dataset instance...
Inferring dataset types...


## Import and Run the Individual Test

We can now import the individual test and pass the required context and config parameters to it.

In [7]:
from validmind.vm_models.test_context import TestContext
from validmind.model_validation.sklearn.threshold_tests import WeakspotsDiagnosis

In [8]:
test_context = TestContext(model=vm_model)
ws_diagnostic = WeakspotsDiagnosis(test_context)

In [9]:
ws_diagnostic.run()

TestPlanTestResult(result_id="weak_spots", test_results)

In [10]:
ws_diagnostic.test_results.show()

In [11]:
ws_diagnostic.test_results.test_results.results

[TestResult(test_name='accuracy', column='Gender', passed=False, values={'slice': ['(-0.001, 0.1]', '(0.1, 0.2]', '(0.2, 0.3]', '(0.3, 0.4]', '(0.4, 0.5]', '(0.5, 0.6]', '(0.6, 0.7]', '(0.7, 0.8]', '(0.8, 0.9]', '(0.9, 1.0]', '(-0.001, 0.1]', '(0.1, 0.2]', '(0.2, 0.3]', '(0.3, 0.4]', '(0.4, 0.5]', '(0.5, 0.6]', '(0.6, 0.7]', '(0.7, 0.8]', '(0.8, 0.9]', '(0.9, 1.0]'], 'shape': [2674, 0, 0, 0, 0, 0, 0, 0, 0, 2126, 856, 0, 0, 0, 0, 0, 0, 0, 0, 744], 'accuracy': [0.9199700822737472, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8857008466603951, 0.8820093457943925, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8373655913978495], 'precision': [0.8969465648854962, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8825214899713467, 0.7448979591836735, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7279411764705882], 'recall': [0.556872037914692, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6039215686274509, 0.4899328859060403, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5409836065573771], 'f1': [0.6871345029239767, 0.