# Quickstart for Classification Models (Full Suite)


This notebooks provides a quick introduction to documenting a model using the ValidMind developer framework. We will use sample datasets provided by the library and train a simple classification model.


## Initialize ValidMind


In [1]:
%load_ext dotenv
%dotenv .env

import validmind as vm
import xgboost as xgb

vm.init(
    api_host = "http://localhost:3000/api/v1/tracking",
    project = "clini39or0000pimzpbf0tqai"
)

2023-06-09 11:41:00,149 - INFO - api_client - Connected to ValidMind. Project: Test Project (clini39or0000pimzpbf0tqai)


# View the Current Documentation Template


In [2]:
# you can call this function to see a preview of the documentation template for the current project
vm.preview_template()

Accordion(children=(Accordion(children=(HTML(value='<p>Empty Section</p>'), Accordion(children=(HTML(value='<p…

## Load the Demo Dataset


In [3]:
# You can also import customer_churn like this:
from validmind.datasets.classification import customer_churn as demo_dataset

# from validmind.datasets.classification import taiwan_credit as demo_dataset

df = demo_dataset.load_data()

In [4]:
vm_dataset = vm.init_dataset(
    dataset=df,
    target_column=demo_dataset.target_column,
    class_labels=demo_dataset.class_labels,
)

2023-06-09 11:41:00,278 - INFO - client - Pandas dataset detected. Initializing VM Dataset instance...
2023-06-09 11:41:00,279 - INFO - dataset - Inferring dataset types...


## Run the Full Data and Model Validation Test Suite


We will need to preprocess the dataset and produce the training, test and validation splits first.


### Prepocess the Raw Dataset


In [5]:
train_df, validation_df, test_df = demo_dataset.preprocess(df)

In [6]:
x_train = train_df.drop(demo_dataset.target_column, axis=1)
y_train = train_df[demo_dataset.target_column]
x_val = validation_df.drop(demo_dataset.target_column, axis=1)
y_val = validation_df[demo_dataset.target_column]

model = xgb.XGBClassifier(early_stopping_rounds=10)
model.set_params(
    eval_metric=["error", "logloss", "auc"],
)
model.fit(
    x_train,
    y_train,
    eval_set=[(x_val, y_val)],
    verbose=False,
)

In [7]:
vm_train_ds = vm.init_dataset(
    dataset=train_df, type="generic", target_column=demo_dataset.target_column
)

vm_test_ds = vm.init_dataset(
    dataset=test_df, type="generic", target_column=demo_dataset.target_column
)

vm_model = vm.init_model(
    model,
    train_ds=vm_train_ds,
    test_ds=vm_test_ds,
)

2023-06-09 11:41:00,503 - INFO - client - Pandas dataset detected. Initializing VM Dataset instance...
2023-06-09 11:41:00,503 - INFO - dataset - Inferring dataset types...
2023-06-09 11:41:00,545 - INFO - client - Pandas dataset detected. Initializing VM Dataset instance...
2023-06-09 11:41:00,545 - INFO - dataset - Inferring dataset types...


### Run the Template


In [8]:
suite_config = {
    "robustness": {
        "scaling_factor_std_dev_list": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5],
        "accuracy_decay_threshold": 4,
    }
}
full_suite = vm.run_template(
    dataset=vm_dataset,
    model=vm_model,
    config=suite_config,
)

HBox(children=(Label(value='Running test suite...'), IntProgress(value=0, max=54)))

VBox(children=(HTML(value='<h2>Test Suite Results: <i style="color: #DE257E">Binary Classification V2</i></h2>…