In [22]:
import ray
from pathlib import Path
from ray import tune
import time
from ray.tune.search.hyperopt import HyperOptSearch
from ray.tune.schedulers import AsyncHyperBandScheduler
from mads_datasets.base import BaseDatastreamer
from mltrainer.preprocessors import BasePreprocessor

In [None]:
trainstreamer = BaseDatastreamer(traindataset, preprocessor = BasePreprocessor(), batchsize=config.batchsize)
teststreamer = BaseDatastreamer(testdataset, preprocessor = BasePreprocessor(), batchsize=config.batchsize)

In [17]:
MAX_EPOCHS = 5
N_EXPERIMENTS = 18
tune_dir = Path("models/ray/").resolve()

In [9]:
search = HyperOptSearch()
scheduler = AsyncHyperBandScheduler(
    time_attr="training_iteration", grace_period=1, reduction_factor=3, max_t=1
)

In [19]:
timer = {}
best_config = {}

In [None]:
config = {
    "input_size": 3,
    "output_size": 20,
    "dropout": 0.1,
    "epochs": MAX_EPOCHS,
    "hidden_size": tune.randint(16, 512),
    "num_layers": tune.randint(1, 8),
    "tune_dir": tune_dir,
}

In [None]:
def tune_model(config):
    """
    Function to tune a model using Ray Tune.
    Args:
        config (dict): Hyperparameter configuration passed by Ray Tune.
    """
    import mlflow
    from mltrainer import Trainer, TrainerSettings, ReportTypes
    from dataclasses import asdict
    import torch
    from metrics import accuracy, f1micro, f1macro, precision, recall  # Ensure these metrics are imported

    loss_fn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam

    # Start an MLflow run
    with mlflow.start_run():
        settings = TrainerSettings(
            epochs=5,  # Or adjust based on experiment
            metrics=[accuracy, f1micro, f1macro, precision, recall],
            logdir="logs/heart2D",  # Adjust as necessary
            train_steps=len(trainstreamer),  # Ensure trainstreamer is globally accessible
            valid_steps=len(teststreamer),  # Ensure teststreamer is globally accessible
            reporttypes=[ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
            scheduler_kwargs=None,
            earlystop_kwargs=None
        )

        # Set tags and log parameters
        mlflow.set_tag("model", "CNN")
        mlflow.set_tag("dataset", "heart2D")
        mlflow.set_tag("dev", dev)  # Ensure `dev` is defined
        mlflow.log_param("scheduler", "None")
        mlflow.log_param("earlystop", "None")
        mlflow.log_params(asdict(config))
        mlflow.log_param("epochs", settings.epochs)
        mlflow.log_param("matrix0", config["matrixshape"][0])
        mlflow.log_param("matrix1", config["matrixshape"][1])
        mlflow.log_param("optimizer", str(optimizer))
        mlflow.log_params(settings.optimizer_kwargs)

        # Initialize Trainer
        trainer = Trainer(
            model=model,  # Ensure model is globally accessible or passed in config
            settings=settings,
            loss_fn=loss_fn,
            optimizer=optimizer,
            traindataloader=trainstreamer.stream(),
            validdataloader=teststreamer.stream(),
            scheduler=None,
            device=device,  # Ensure device is defined
        )

        # Training loop
        trainer.loop()

        # Calculate metrics
        cfm = caluclate_cfm(model, teststreamer)  # Ensure caluclate_cfm function is available
        for i, tp in enumerate(np.diag(cfm)):
            mlflow.log_metric(f"TP_{i}", tp)


In [20]:
tic = time.time()
analysis = tune.run(
    tune_model,
    config=config,
    metric="valid_loss",
    mode="min",
    local_dir=str(tune_dir),
    num_samples=N_EXPERIMENTS,
    stop={"training_iteration": MAX_EPOCHS},
    scheduler=scheduler,
    verbose=1,
)

timer["ray_hyperband"] = time.time() - tic

best = analysis.get_best_config()
best["accuracy"] = analysis.best_result["accuracy"]
best_config["hyperband"] = best

NameError: name 'tune_model' is not defined