# Tutorial: Build a demonstrator for Survival Analysis

Welcome!

This tutorial will walk you through the steps of selecting a model for a survival analysis task, and creating a demonstrator on top of it.

In [None]:
# stdlib
import sys
import warnings

# third party
import numpy as np
import pandas as pd

warnings.filterwarnings("ignore")

# autoprognosis absolute
import autoprognosis.logger as log
from autoprognosis.studies.risk_estimation import RiskEstimationStudy

In [None]:
log.add(sink=sys.stderr, level="INFO")

## Load dataset


In [None]:
# third party
from pycox import datasets

df = datasets.gbsg.read_df()
df = df[df["duration"] > 0]

X = df.drop(columns=["duration", "event"])
T = df["duration"]
Y = df["event"]

eval_time_horizons = [
    int(T[Y.iloc[:] == 1].quantile(0.25)),
    int(T[Y.iloc[:] == 1].quantile(0.50)),
    int(T[Y.iloc[:] == 1].quantile(0.75)),
]

In [None]:
dataset = X.copy()
dataset["target"] = Y
dataset["time_to_event"] = T

##  Select model

In [None]:
# stdlib
from pathlib import Path

workspace = Path("workspace")
workspace.mkdir(parents=True, exist_ok=True)

study_name = "test_demonstrator_survival"

study = RiskEstimationStudy(
    study_name=study_name,
    dataset=dataset,
    target="target",
    time_to_event="time_to_event",
    time_horizons=eval_time_horizons,
    num_iter=2,
    num_study_iter=1,
    timeout=60,
    risk_estimators=["cox_ph", "lognormal_aft", "survival_xgboost"],
    imputers=["mean", "ice", "median"],
    feature_scaling=["minmax_scaler", "nop"],
    score_threshold=0.4,
    workspace=workspace,
)

In [None]:
study.run()

## Build the demonstrator

In [None]:
# stdlib
from pathlib import Path

# autoprognosis absolute
from autoprognosis.deploy.build import Builder
from autoprognosis.deploy.proto import NewRiskEstimationAppProto

dataset_path = workspace / "demo_dataset_surv.csv"
dataset.to_csv(dataset_path, index=None)

name = "AutoPrognosis demo: Survival Analysis"
model_path = workspace / study_name / "model.p"

time_column = "time_to_event"
target_column = "target"
task_type = "risk_estimation"

task = Builder(
    NewRiskEstimationAppProto(
        **{
            "name": name,
            "type": task_type,
            "dataset_path": str(dataset_path),
            "model_path": str(model_path),
            "time_column": time_column,
            "target_column": target_column,
            "horizons": eval_time_horizons,
            "explainers": ["kernel_shap"],
            "imputers": [],
            "plot_alternatives": [],
            "comparative_models": [
                (
                    "Cox PH",  # display name
                    "cox_ph",  # autoprognosis plugin name
                    {},  # plugin args
                ),
            ],
            "auth": False,
            "extras_cbk": None,
        }
    ),
)

app_path = task.run()

app_path

## Run the demonstrator

In [None]:
# Prepare the entry file for Streamlit - app.py
script = f"""
from pathlib import Path
from autoprognosis.deploy.run import start_app_server

app_path = '{app_path}'
start_app_server(Path(app_path))
"""

with open(workspace / "app_surv.py", "w") as f:
    f.write(script)

In [None]:
!streamlit run workspace/app_surv.py

# Congratulations!

Congratulations on completing this notebook tutorial! If you enjoyed this and would like to join the movement towards Machine learning and AI for medicine, you can do so in the following ways!

### Star AutoPrognosis on GitHub

The easiest way to help our community is just by starring the Repos! This helps raise awareness of the tools we're building.

- [Star AutoPrognosis](https://github.com/vanderschaarlab/autoprognosis)
- [Star HyperImpute](https://github.com/vanderschaarlab/hyperimpute)
