# Predict the censoring(E) based on the synthetic covariates

In [None]:
!pip install autoprognosis

In [None]:
import sys
from pathlib import Path

import synthcity.logger as log
from synthcity.benchmark import Benchmarks
from synthcity.plugins import Plugins
from synthcity.plugins.core.dataloader import SurvivalAnalysisDataLoader
from synthcity.utils.serialization import load_from_file, save_to_file

from datasets import get_dataset

log.remove()
log.add(sink=sys.stderr, level="INFO")

out_dir = Path("output")


def evaluate_dataset(dataset: str, plugin: tuple, repeats: int = 2):
    df, duration_col, event_col, time_horizons = get_dataset(dataset)
    dataloader = SurvivalAnalysisDataLoader(
        df,
        target_column=event_col,
        time_to_event_column=duration_col,
        time_horizons=time_horizons,
    )
    bkp = out_dir / f"metrics.{dataset}_{plugin[0]}.bkp"

    if bkp.exists():
        score = load_from_file(bkp)
    else:
        score = Benchmarks.evaluate(
            [plugin],
            dataloader,
            task_type="survival_analysis",
            target_column=event_col,
            time_to_event_column=duration_col,
            time_horizons=time_horizons,
            synthetic_size=len(df),
            repeats=repeats,
            metrics={
                "performance": ["linear_model", "xgb"],
                "stats": ["survival_km_distance"],
            },
        )
        save_to_file(bkp, score)

    return score

In [None]:
eval_plugin = (
    "survival_predicting_censoring",
    "survival_gan",
    {"censoring_strategy": "covariate_dependent"},
)

In [None]:
from autoprognosis.utils.tester import evaluate_estimator
from xgboost import XGBClassifier

df, duration_col, event_col, time_horizons = get_dataset("aids")
X = df.drop(columns=[duration_col, event_col])
T = df[duration_col]
E = df[event_col]

evaluate_estimator(XGBClassifier(), X, E)

In [None]:
from autoprognosis.utils.tester import evaluate_estimator
from xgboost import XGBClassifier

df, duration_col, event_col, time_horizons = get_dataset("aids")
X = df.drop(columns=[duration_col, event_col])
T = df[duration_col]
E = df[event_col]

(E == 1).sum()

In [None]:
evaluate_dataset("aids", eval_plugin)

In [None]:
score = evaluate_dataset("aids", eval_plugin)
Benchmarks.print(score)