In [None]:
from lifelines.datasets import load_rossi
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import json
import sys

import warnings
warnings.filterwarnings('ignore')

from adjutorium.studies.risk_estimation import RiskEstimationStudy
import adjutorium.logger as log

In [None]:
log.add(sink=sys.stderr, level="INFO")

## Load toy dataset


In [None]:
from lifelines.datasets import load_rossi

rossi = load_rossi()

X = rossi.drop(["week", "arrest"], axis=1)
Y = rossi["arrest"]
T = rossi["week"]

eval_time_horizons = [
    int(T[Y.iloc[:] == 1].quantile(0.50)),
]

X

In [None]:
# Simulate missingness
import random

total_len = len(X)

for col in ["age", "paro"]:
    indices = random.sample(range(0, total_len), 10)
    X.loc[indices, col] = np.nan
    
X.isnull().any()

In [None]:
dataset = X.copy()
dataset["target"] = Y
dataset["time_to_event"] = T


## Option 1: Predefined imputer

In [None]:
from pathlib import Path

workspace = Path("workspace")
study_name = "test_risk_estimation_studies"

study = RiskEstimationStudy(
    study_name=study_name,
    dataset=dataset,
    target="target",
    time_to_event="time_to_event",
    time_horizons=eval_time_horizons,
    num_iter=2,
    num_study_iter=1,
    timeout=60, 
    risk_estimators=["cox_ph", "lognormal_aft", "survival_xgboost"],
    imputers = ["mean"],
    feature_scaling = ["minmax_scaler", "nop"],
    score_threshold=0.4,
    workspace=workspace,
)

In [None]:
study.run()

In [None]:
from adjutorium.utils.serialization import load_model_from_file
from adjutorium.utils.tester import evaluate_survival_estimator
from adjutorium.plugins.imputers import Imputers

model_path = workspace / study_name / "model.p"

model = load_model_from_file(model_path)

X_imp = Imputers().get("mean").fit_transform(X)

evaluate_survival_estimator(model, X_imp, T, Y, eval_time_horizons)

##  Option 2: Let the optimizer find the best imputer

In [None]:
from pathlib import Path

workspace = Path("workspace")
workspace.mkdir(parents=True, exist_ok=True)

study_name = "test_risk_estimation_studies_v2"

study = RiskEstimationStudy(
    study_name=study_name,
    dataset=dataset,
    target="target",
    time_to_event="time_to_event",
    time_horizons=eval_time_horizons,
    num_iter=2,
    num_study_iter=1,
    timeout=60, 
    risk_estimators=["cox_ph", "lognormal_aft", "survival_xgboost"],
    imputers = ["mean", "ice", "median"],
    feature_scaling = ["minmax_scaler", "nop"],
    score_threshold=0.4,
    workspace=workspace,
)

In [None]:
study.run()

In [None]:
from adjutorium.utils.serialization import load_model_from_file
from adjutorium.utils.tester import evaluate_survival_estimator

model_path = workspace / study_name / "model.p"

model = load_model_from_file(model_path)

evaluate_survival_estimator(model, X, T, Y, eval_time_horizons)