In [None]:
from datetime import datetime
from pathlib import Path

from hyperopt import STATUS_OK
from loguru import logger
from mads_datasets import DatasetFactoryProvider, DatasetType
import mlflow
from mltrainer import ReportTypes, Trainer, TrainerSettings, metrics
from mltrainer.preprocessors import BasePreprocessor
import torch
import torch.nn as nn
import torch.optim as optim
from torchinfo import summary

In [3]:
batchsize = 64
fashionfactory = DatasetFactoryProvider.create_factory(DatasetType.FASHION)
preprocessor = BasePreprocessor()
streamers = fashionfactory.create_datastreamer(
    batchsize=batchsize,
    preprocessor=preprocessor,
)
train = streamers["train"]
valid = streamers["valid"]
trainstreamer = train.stream()
validstreamer = valid.stream()

[32m2025-11-04 10:38:02.484[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at /Users/koendirkvanesterik/.cache/mads_datasets/fashionmnist[0m
[32m2025-11-04 10:38:02.486[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m124[0m - [1mFile already exists at /Users/koendirkvanesterik/.cache/mads_datasets/fashionmnist/fashionmnist.pt[0m


In [4]:
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("mlflow-fashion-cnn-hyperopt")

2025/11/04 10:38:06 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2025/11/04 10:38:06 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.


<Experiment: artifact_location='/Users/koendirkvanesterik/Projects/mads/mads-ml-koenvanesterik/notebooks/cnns/mlruns/1', creation_time=1762157648748, experiment_id='1', last_update_time=1762157648748, lifecycle_stage='active', name='mlflow-fashion-cnn-hyperopt', tags={'mlflow.experimentKind': 'custom_model_development'}>

In [19]:
class CNN(nn.Module):
    def __init__(
        self,
        batch_norm=True,
        filters=128,
        input_channels=1,
        kernel_size=3,
        max_pool_per_layer=1,
        num_classes=10,
        num_layers=3,
    ):
        super().__init__()
        self.filters = filters
        self.input_channels = input_channels
        self.kernel_size = kernel_size
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.max_pool_per_layer = max_pool_per_layer
        self.convolutions = nn.ModuleList()

        for i in range(self.num_layers):
            self.convolutions.extend(
                [
                    nn.Conv2d(
                        self.input_channels if i == 0 else self.filters,
                        self.filters,
                        kernel_size=self.kernel_size,
                        stride=1,
                        padding=1,
                    ),
                    nn.BatchNorm2d(self.filters)
                    if batch_norm
                    else nn.Identity(),
                    nn.ReLU(),
                ]
            )

            if i % self.max_pool_per_layer == 0:
                self.convolutions.append(
                    nn.MaxPool2d(
                        kernel_size=self.kernel_size,
                    )
                )

        self.dense = nn.Sequential(
            nn.Flatten(),
            nn.Linear(
                self.filters,
                self.filters,
            ),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(
                self.filters,
                self.num_classes,
            ),
        )

    def forward(self, x):
        for convolution in self.convolutions:
            x = convolution(x)
        x = self.dense(x)
        return x

In [7]:
settings = TrainerSettings(
    epochs=3,
    metrics=[metrics.Accuracy()],
    logdir="modellogs",
    train_steps=100,
    valid_steps=100,
    reporttypes=[ReportTypes.MLFLOW, ReportTypes.TOML],
)

In [47]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "convolutional-neural-network")
        mlflow.set_tag("dev", "vanesterik")
        mlflow.log_params(params)
        mlflow.log_param("batchsize", f"{batchsize}")

        model = CNN(
            batch_norm=params["batch_norm"],
            num_layers=params["num_layers"],
            max_pool_per_layer=params["max_pool_per_layer"],
        )

        trainer = Trainer(
            model=model,
            settings=settings,
            loss_fn=nn.CrossEntropyLoss(),
            optimizer=optim.Adam,
            traindataloader=trainstreamer,
            validdataloader=validstreamer,
            scheduler=optim.lr_scheduler.ReduceLROnPlateau,
            device=torch.device("mps"),
        )
        trainer.loop()

        tag = datetime.now().strftime("%Y%m%d-%H%M")
        models_dir = Path("models").resolve()

        if not models_dir.exists():
            models_dir.mkdir()
            logger.info(f"Created {models_dir}")

        models_path = models_dir / (tag + "model.pt")
        torch.save(model, models_path)

        mlflow.log_artifact(
            local_path=models_path, artifact_path="pytorch_models"
        )

        return {
            "model": summary(model),
            "loss": trainer.test_loss,
            "status": STATUS_OK,
        }


In [61]:
search_space = {
    "batch_norm": True,
    "num_layers": 16,
    "max_pool_per_layer": 6,
}

In [62]:
results = objective(search_space)

# results = fmin(
#     fn=objective, space=search_space, algo=tpe.suggest, max_evals=3, trials=Trials()
# )

[32m2025-11-04 11:46:20.884[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m24[0m - [1mLogging to modellogs/20251104-114620[0m
[32m2025-11-04 11:46:20.919[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m68[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:04<00:00, 20.45it/s]
[32m2025-11-04 11:46:27.198[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m209[0m - [1mEpoch 0 train 0.9947 test 1.8016 metric ['0.4480'][0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:04<00:00, 20.58it/s]
[32m2025-11-04 11:46:33.435[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m209[0m - [1mEpoch 1 train 0.6078 test 1.1864 metric ['0.6639'][0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:04<00:00, 20.56it/s]
[32m2025-11-04 11:46:39.716[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[3

In [63]:
print(results["model"])

Layer (type:depth-idx)                   Param #
CNN                                      --
├─ModuleList: 1-1                        --
│    └─Conv2d: 2-1                       1,280
│    └─BatchNorm2d: 2-2                  256
│    └─ReLU: 2-3                         --
│    └─MaxPool2d: 2-4                    --
│    └─Conv2d: 2-5                       147,584
│    └─BatchNorm2d: 2-6                  256
│    └─ReLU: 2-7                         --
│    └─Conv2d: 2-8                       147,584
│    └─BatchNorm2d: 2-9                  256
│    └─ReLU: 2-10                        --
│    └─Conv2d: 2-11                      147,584
│    └─BatchNorm2d: 2-12                 256
│    └─ReLU: 2-13                        --
│    └─Conv2d: 2-14                      147,584
│    └─BatchNorm2d: 2-15                 256
│    └─ReLU: 2-16                        --
│    └─Conv2d: 2-17                      147,584
│    └─BatchNorm2d: 2-18                 256
│    └─ReLU: 2-19                    