In [None]:
import xgboost as xgb
import validmind as vm
from validmind.datasets.classification import customer_churn

vm.init()

raw_df = customer_churn.load_data()

train_df, validation_df, test_df = customer_churn.preprocess(raw_df)

x_train = train_df.drop(customer_churn.target_column, axis=1)
y_train = train_df[customer_churn.target_column]
x_val = validation_df.drop(customer_churn.target_column, axis=1)
y_val = validation_df[customer_churn.target_column]

model = xgb.XGBClassifier(early_stopping_rounds=10)
model.set_params(
    eval_metric=["error", "logloss", "auc"],
)
model.fit(
    x_train,
    y_train,
    eval_set=[(x_val, y_val)],
    verbose=False,
)

vm_raw_dataset = vm.init_dataset(
    dataset=raw_df,
    input_id="raw_dataset",
    target_column=customer_churn.target_column,
    class_labels=customer_churn.class_labels,
)

vm_train_ds = vm.init_dataset(
    dataset=train_df,
    input_id="train_dataset",
    target_column=customer_churn.target_column,
)

vm_test_ds = vm.init_dataset(
    dataset=test_df, input_id="test_dataset", target_column=customer_churn.target_column
)

vm_model = vm.init_model(
    model,
    input_id="model",
)

vm_train_ds.assign_predictions(
    model=vm_model,
)

vm_test_ds.assign_predictions(
    model=vm_model,
)

In [None]:
from validmind.tests import run_test

result = run_test(
    "validmind.model_validation.sklearn.ClassifierPerformance",
    inputs={"dataset": vm_test_ds, "model": vm_model},
    generate_description=False,
)

# Post-processing functions

## Simple Tabular Updates

In [None]:
from validmind.vm_models.result import TestResult


def add_class_labels(result: TestResult):
    result.tables[0].data["Class"] = (
        result.tables[0]
        .data["Class"]
        .map(lambda x: "Churn" if x == "1" else "No Churn" if x == "0" else x)
    )

    return result


result = run_test(
    "validmind.model_validation.sklearn.ClassifierPerformance",
    inputs={"dataset": vm_test_ds, "model": vm_model},
    generate_description=False,
    post_process_fn=add_class_labels,
)

## Adding Tables

In [None]:
from validmind.vm_models.result import ResultTable

def add_table(result: TestResult):
    # add legend table to show map of class value to class label
    result.add_table(
        ResultTable(
            title="Class Legend",
            data=[
                {"Class Value": "0", "Class Label": "No Churn"},
                {"Class Value": "1", "Class Label": "Churn"},
            ],
        )
    )

    return result


result = run_test(
    "validmind.model_validation.sklearn.ClassifierPerformance",
    inputs={"dataset": vm_test_ds, "model": vm_model},
    generate_description=False,
    post_process_fn=add_table,
)

## Removing Tables

In [None]:
def remove_table(result: TestResult):
    result.tables.pop(1)

    return result


result = run_test(
    "validmind.model_validation.sklearn.ClassifierPerformance",
    inputs={"dataset": vm_test_ds, "model": vm_model},
    generate_description=False,
    post_process_fn=remove_table,
)

## Creating Figure from Tables

In [None]:
from plotly_express import bar
from validmind.vm_models.figure import Figure


def create_figure(result: TestResult):
    fig = bar(result.tables[0].data, x="Variable", y="Total Count of Outliers")

    result.add_figure(
        Figure(
            figure=fig,
            key="outlier_count_by_variable",
            ref_id=result.ref_id,
        )
    )

    return result


result = run_test(
    "validmind.data_validation.IQROutliersTable",
    inputs={"dataset": vm_test_ds},
    generate_description=False,
    post_process_fn=create_figure,
)

## Creating Tables from Figures

In [None]:
def create_table(result: TestResult):
    for fig in result.figures:
        data = fig.figure.data[0]

        table_data = [
            {"Percentile": x, "Outlier Count": y}
            for x, y in zip(data.x, data.y)
        ]

        result.add_table(
            ResultTable(
                title=fig.figure.layout.title.text,
                data=table_data,
            )
        )

    return result


result = run_test(
    "validmind.data_validation.IQROutliersBarPlot",
    inputs={"dataset": vm_test_ds},
    generate_description=False,
    # post_process_fn=create_table,
)

In [None]:
result

In [None]:
raise Exception("stop")

## Re-Draw Confusion Matrix

In [None]:
import matplotlib.pyplot as plt


def re_draw_class_imbalance(result: TestResult):
    data = result.tables[0].data
    # Exited Percentage of Rows (%) Pass/Fail
    # 0       0                 80.25%      Pass
    # 1       1                 19.75%      Pass

    result.figures = []

    # use matplotlib to plot the confusion matrix
    fig = plt.figure()

    # show a bar plot of the class imbalance with matplotlib
    plt.bar(data["Exited"], data["Percentage of Rows (%)"])
    plt.xlabel("Exited")
    plt.ylabel("Percentage of Rows (%)")
    plt.title("Class Imbalance")

    result.add_figure(
        Figure(
            figure=fig,
            key="confusion_matrix",
            ref_id=result.ref_id,
        )
    )

    plt.close()

    return result


result = run_test(
    "validmind.data_validation.ClassImbalance",
    inputs={"dataset": vm_test_ds},
    generate_description=False,
    post_process_fn=re_draw_class_imbalance,
)

In [None]:
result = run_test(
    "validmind.data_validation.ClassImbalance",
    inputs={"dataset": vm_test_ds},
    generate_description=False,
)

In [None]:
def post_process_class_imbalance(result: TestResult):
    result.passed = None
    result.figures = []

    return result


result = run_test(
    "validmind.data_validation.ClassImbalance",
    inputs={"dataset": vm_test_ds},
    generate_description=False,
    post_process_fn=post_process_class_imbalance,
)

In [None]:
result

In [None]:
result = run_test(
    "validmind.model_validation.sklearn.ROCCurve",
    inputs={"dataset": vm_test_ds, "model": vm_model},
    generate_description=False,
)

In [None]:
def post_process_roc_curve(result: TestResult):
    result.raw_data.fpr

In [None]:
import random
import pandas as pd
import numpy as np
from plotly_express import bar
from validmind.vm_models.figure import Figure
from validmind.vm_models.result import TestResult
import plotly.graph_objects as go


@vm.test("my_custom_tests.Sensitivity")
def sensitivity_test(strike=None):
    """This is sensitivity test"""
    price = strike * random.random()

    return pd.DataFrame({"Option price": [price]})


def process_results(result: TestResult):

    df = pd.DataFrame(result.tables[0].data)

    fig = go.Figure()

    fig.add_trace(
        go.Scatter(x=df["strike"].values, y=df["Option price"].values, mode="lines")
    )

    fig.update_layout(
        # title=params["title"],
        # xaxis_title=params["xlabel"],
        # yaxis_title=params["ylabel"],
        showlegend=True,
        template="plotly_white",  # Adds a grid by default
    )

    result.add_figure(
        Figure(
            figure=fig,
            key="sensitivity_to_strike",
            ref_id=result.ref_id,
        )
    )

    return result


result = run_test(
    "my_custom_tests.Sensitivity:ToStrike",
    param_grid={
        "strike": list(np.linspace(0, 100, 20)),
    },
    post_process_fn=process_results,
)

In [None]:
from validmind.tests import list_tests

list_tests()