# Tutorial: Build a demonstrator for Classification studies

Welcome!

This tutorial will walk you through the steps of selecting a model for a classification task, and creating a demonstrator on top of it.

In [None]:
# stdlib
import sys
import warnings

# third party
import numpy as np
import pandas as pd

warnings.filterwarnings("ignore")

# autoprognosis absolute
import autoprognosis.logger as log

In [None]:
log.add(sink=sys.stderr, level="INFO")

## Load dataset


In [None]:
# third party
from sklearn.datasets import load_breast_cancer

X, Y = load_breast_cancer(return_X_y=True, as_frame=True)

df = X.copy()
df["target"] = Y

##  Select model

In [None]:
# stdlib
from pathlib import Path

# autoprognosis absolute
from autoprognosis.studies.classifiers import ClassifierStudy

workspace = Path("workspace")
workspace.mkdir(parents=True, exist_ok=True)

study_name = "test_demonstrator_classification"

study = ClassifierStudy(
    study_name=study_name,
    dataset=df,  # pandas DataFrame
    target="target",  # the label column in the dataset
    timeout=60,  # timeout for optimization for each classfier. Default: 600 seconds
    classifiers=["logistic_regression", "xgboost"],
    workspace=workspace,
)

In [None]:
study.run()

## Build the demonstrator

In [None]:
# stdlib
from pathlib import Path

# autoprognosis absolute
from autoprognosis.deploy.build import Builder
from autoprognosis.deploy.proto import NewClassificationAppProto

dataset_path = workspace / "demo_dataset_classification.csv"
df.to_csv(dataset_path, index=None)

name = "AutoPrognosis demo: Classification"
model_path = workspace / study_name / "model.p"

target_column = "target"
task_type = "classification"

task = Builder(
    NewClassificationAppProto(
        **{
            "name": name,
            "type": task_type,
            "dataset_path": str(dataset_path),
            "model_path": str(model_path),
            "target_column": target_column,
            "explainers": ["kernel_shap"],
            "imputers": [],
            "plot_alternatives": [],
            "comparative_models": [
                (
                    "Logistic regression",  # display name
                    "logistic_regression",  # autoprognosis plugin name
                    {},  # plugin args
                ),
            ],
            "auth": False,
        }
    ),
)

app_path = task.run()

app_path

## Run the demonstrator

In [None]:
# Prepare the entry file for Streamlit - app.py
script = f"""
from pathlib import Path
from autoprognosis.deploy.run import start_app_server

app_path = '{app_path}'
start_app_server(Path(app_path))
"""

with open(workspace / "app_classification.py", "w") as f:
    f.write(script)

In [None]:
!streamlit run workspace/app_classification.py

# Congratulations!

Congratulations on completing this notebook tutorial! If you enjoyed this and would like to join the movement towards Machine learning and AI for medicine, you can do so in the following ways!

### Star AutoPrognosis on GitHub

The easiest way to help our community is just by starring the Repos! This helps raise awareness of the tools we're building.

- [Star AutoPrognosis](https://github.com/vanderschaarlab/autoprognosis)
- [Star HyperImpute](https://github.com/vanderschaarlab/hyperimpute)
