In [None]:
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import json
import sys

import warnings
warnings.filterwarnings('ignore')

from adjutorium.studies.classifiers import ClassifierStudy
import adjutorium.logger as log

In [None]:
log.add(sink=sys.stderr, level="INFO")

## Load toy dataset


In [None]:
from sklearn.datasets import load_breast_cancer

X, Y = load_breast_cancer(return_X_y=True, as_frame=True)

X

In [None]:
# Simulate missingness
import random

total_len = len(X)

for col in ["mean texture", "mean compactness"]:
    indices = random.sample(range(0, total_len), 10)
    X.loc[indices, col] = np.nan
    
X.isnull().any()

In [None]:
dataset = X.copy()
dataset["target"] = Y

In [None]:
# List available classifiers

from adjutorium.plugins.prediction import Classifiers

Classifiers().list()

## Option 1: Predefined imputer

In [None]:
from pathlib import Path

workspace = Path("workspace")
workspace.mkdir(parents=True, exist_ok=True)

study_name = "test_classification_studies"

study = ClassifierStudy(
    study_name=study_name,
    dataset=dataset,
    target="target",
    num_iter=2,
    num_study_iter=1,
    timeout=60, 
    classifiers=["logistic_regression", "lda"],
    imputers = ["mean"],
    feature_scaling = [], # feature preprocessing is disabled
    score_threshold=0.4,
    workspace=workspace,
)

In [None]:
study.run()

In [None]:
from adjutorium.utils.serialization import load_model_from_file
from adjutorium.utils.tester import evaluate_estimator
from adjutorium.plugins.imputers import Imputers

model_path = workspace / study_name / "model.p"

model = load_model_from_file(model_path)

X_imp = Imputers().get("mean").fit_transform(X)

evaluate_estimator(model, X_imp, Y, metric = "aucroc")

In [None]:
evaluate_estimator(model, X_imp, Y, metric = "aucprc")

##  Option 2: Let the optimizer find the best imputer

In [None]:
from pathlib import Path

workspace = Path("workspace")
study_name = "test_classification_studies_v2"

study = ClassifierStudy(
    study_name=study_name,
    dataset=dataset,
    target="target",
    num_iter=2,
    num_study_iter=1,
    timeout=60, 
    imputers = ["mean", "ice", "median"],
    classifiers=["logistic_regression", "lda"],
    feature_scaling = [], # feature preprocessing is disabled
    score_threshold=0.4,
    workspace=workspace,
)

In [None]:
study.run()

In [None]:
from adjutorium.utils.serialization import load_model_from_file
from adjutorium.utils.tester import evaluate_estimator
from adjutorium.plugins.imputers import Imputers

model_path = workspace / study_name / "model.p"

model = load_model_from_file(model_path)

X_imp = Imputers().get("mean").fit_transform(X)

evaluate_estimator(model, X_imp, Y, metric = "aucroc")

In [None]:
evaluate_estimator(model, X_imp, Y, metric = "aucprc")

  self._optuna_storage = optuna.storages.RedisStorage(url=self.url)
  self._optuna_storage = optuna.storages.RedisStorage(url=self.url)
  self._optuna_storage = optuna.storages.RedisStorage(url=self.url)
