In [None]:
# stdlib
import json
import sys
import warnings

# third party
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

warnings.filterwarnings("ignore")

# autoprognosis absolute
import autoprognosis.logger as log
from autoprognosis.studies.classifiers import ClassifierStudy

In [None]:
log.add(sink=sys.stderr, level="INFO")

## Load toy dataset


In [None]:
# third party
from sklearn.datasets import load_breast_cancer

X, Y = load_breast_cancer(return_X_y=True, as_frame=True)

X

In [None]:
# stdlib
# Simulate missingness
import random

total_len = len(X)

for col in ["mean texture", "mean compactness"]:
    indices = random.sample(range(0, total_len), 10)
    X.loc[indices, col] = np.nan

X.isnull().any()

In [None]:
dataset = X.copy()
dataset["target"] = Y

In [None]:
# List available classifiers

# autoprognosis absolute
from autoprognosis.plugins.prediction import Classifiers

Classifiers().list()

## Option 1: Predefined imputer

In [None]:
# stdlib
from pathlib import Path

workspace = Path("workspace")
workspace.mkdir(parents=True, exist_ok=True)

study_name = "test_classification_studies"

study = ClassifierStudy(
    study_name=study_name,
    dataset=dataset,
    target="target",
    num_iter=2,
    num_study_iter=1,
    timeout=60,
    classifiers=["logistic_regression", "lda"],
    imputers=["mean"],
    feature_scaling=[],  # feature preprocessing is disabled
    score_threshold=0.4,
    workspace=workspace,
)

In [None]:
study.run()

In [None]:
# autoprognosis absolute
from autoprognosis.plugins.imputers import Imputers
from autoprognosis.utils.serialization import load_model_from_file
from autoprognosis.utils.tester import evaluate_estimator

model_path = workspace / study_name / "model.p"

model = load_model_from_file(model_path)

X_imp = Imputers().get("mean").fit_transform(X)

evaluate_estimator(model, X_imp, Y, metric="aucroc")

In [None]:
evaluate_estimator(model, X_imp, Y, metric="aucprc")

##  Option 2: Let the optimizer find the best imputer

In [None]:
# stdlib
from pathlib import Path

workspace = Path("workspace")
study_name = "test_classification_studies_v2"

study = ClassifierStudy(
    study_name=study_name,
    dataset=dataset,
    target="target",
    num_iter=100,
    num_study_iter=1,
    timeout=60,
    imputers=["mean", "ice", "median"],
    classifiers=["logistic_regression", "lda"],
    feature_scaling=[],  # feature preprocessing is disabled
    score_threshold=0.4,
    workspace=workspace,
)

In [None]:
study.run()

In [None]:
# autoprognosis absolute
from autoprognosis.plugins.imputers import Imputers
from autoprognosis.utils.serialization import load_model_from_file
from autoprognosis.utils.tester import evaluate_estimator

model_path = workspace / study_name / "model.p"

model = load_model_from_file(model_path)

X_imp = Imputers().get("mean").fit_transform(X)

evaluate_estimator(model, X_imp, Y, metric="aucroc")

In [None]:
evaluate_estimator(model, X_imp, Y, metric="aucprc")

# Congratulations!

Congratulations on completing this notebook tutorial! If you enjoyed this and would like to join the movement towards Machine learning and AI for medicine, you can do so in the following ways!

### Star AutoPrognosis on GitHub

The easiest way to help our community is just by starring the Repos! This helps raise awareness of the tools we're building.

- [Star AutoPrognosis](https://github.com/vanderschaarlab/autoprognosis)
- [Star HyperImpute](https://github.com/vanderschaarlab/hyperimpute)
