In [74]:
import os, pathlib
repo_root = pathlib.Path.cwd()
if repo_root.name == "notebooks":
    repo_root = repo_root.parent
os.environ["MLFLOW_TRACKING_URI"] = f"file://{repo_root / 'mlruns'}"
print(os.environ["MLFLOW_TRACKING_URI"])
TRACKING_URI = os.environ["MLFLOW_TRACKING_URI"]

file:///home/zano/Documents/TESI/FOLDER_CINECA/mlruns


In [94]:
import os
from pathlib import Path
from mlflow.tracking import MlflowClient
from mlflow.entities import ViewType

client = MlflowClient(tracking_uri=TRACKING_URI)

def list_experiments(prefixes=None):
    exps = client.search_experiments(view_type=ViewType.ACTIVE_ONLY)
    if prefixes:
        prefixes = tuple(prefixes)
        exps = [e for e in exps if e.name.startswith(prefixes)]
    for e in exps:
        print(f"id={e.experiment_id} | name={e.name} | artifact={e.artifact_location}")

list_experiments(prefixes=["DS1", "DS2"])  # change/omit prefixes to see all active

id=870194084958402490 | name=DS2_3c_MSA_vs_PD_supervised | artifact=file:///home/zano/Documents/TESI/FOLDER_CINECA/mlruns/870194084958402490
id=914421233606463568 | name=DS2_4c_MSA_vs_PD_supervised | artifact=file:///home/zano/Documents/TESI/FOLDER_CINECA/mlruns/914421233606463568
id=161682510127360652 | name=DS2_4c_MSA-P_vs_PD_supervised | artifact=file:///home/zano/Documents/TESI/FOLDER_CINECA/mlruns/161682510127360652
id=120358550691752282 | name=DS2_3c_MSA-P_vs_PD_supervised | artifact=file:///home/zano/Documents/TESI/FOLDER_CINECA/mlruns/120358550691752282
id=248261519201116701 | name=DS2_pretrained_MSA-P_vs_PD_supervised | artifact=file:///home/zano/Documents/TESI/FOLDER_CINECA/mlruns/248261519201116701
id=160785092358134542 | name=DS2_pretrained_MSA_vs_PD_supervised | artifact=file:///home/zano/Documents/TESI/FOLDER_CINECA/mlruns/160785092358134542
id=836292825509124052 | name=DS1_pretrained_MSA_vs_PD_supervised | artifact=file:///home/zano/Documents/TESI/FOLDER_CINECA/mlruns/83

In [95]:
import mlflow

EXPERIMENT_NAME = "DS2_4c_MSA_vs_PD_supervised"

mlflow.set_tracking_uri(TRACKING_URI)
exp = mlflow.get_experiment_by_name(EXPERIMENT_NAME)
if exp is None:
    raise ValueError(f"Experiment '{EXPERIMENT_NAME}' not found at {TRACKING_URI}")

print(f"Experiment '{EXPERIMENT_NAME}' (id={exp.experiment_id}) runs:")
runs_df = mlflow.search_runs(experiment_ids=[exp.experiment_id])
for _, row in runs_df.iterrows():
    run_id = row["run_id"]
    run_name = row.get("tags.mlflow.runName", "Unnamed")
    status = row.get("status", "UNKNOWN")
    print(f"  run_id: {run_id} | name: {run_name} | status: {status}")

Experiment 'DS2_4c_MSA_vs_PD_supervised' (id=914421233606463568) runs:
  run_id: cb888978ceb64b05a1a26cfa19115082 | name: ViT_monai_11-03_at:00-06-52 | status: FINISHED
  run_id: ba62da81a2454bf59253f24d37cc6dda | name: Resnet18_oversamp_monai_11-02_at:22-34-52 | status: FINISHED
  run_id: ccae05cc6a3c4b5ea17255e3de2af932 | name: Densenet169_oversamp_monai_11-02_at:19-13-37 | status: FINISHED
  run_id: 2c2e7113057d45859a0c881f3861f2e5 | name: Densenet121_oversamp_monai_11-02_at:15-43-52 | status: FINISHED


# MLflow run → train predictions + explanations

Loads a trained model from a chosen MLflow run, rebuilds the training set with the stored config, runs predictions over the full set, and saves GradCAMs (CNNs) or attention rollouts (ViT).

In [96]:
import os
import sys
from pathlib import Path
import mlflow
import torch

# --- REQUIRED: set the MLflow run you want to load ---
RUN_ID = "2c2e7113057d45859a0c881f3861f2e5"

# If you want to force CPU set USE_GPU = False
USE_GPU = torch.cuda.is_available()

PROJECT_ROOT = Path.cwd()
if PROJECT_ROOT.name == "notebooks":
    PROJECT_ROOT = PROJECT_ROOT.parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

ARTIFACT_DIR = PROJECT_ROOT / "notebooks" / "models" / RUN_ID
OUTPUT_DIR = PROJECT_ROOT / "notebooks" / "images" / f"{RUN_ID}_explanations"
DATA_ROOT = Path(os.environ.get("DATA_ROOT", PROJECT_ROOT / "data"))

device = torch.device("cuda" if USE_GPU and torch.cuda.is_available() else "cpu")

mlflow.set_tracking_uri(TRACKING_URI)
ARTIFACT_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

print(f"Tracking URI: {mlflow.get_tracking_uri()}")
print(f"Project root: {PROJECT_ROOT}")
print(f"Artifact cache: {ARTIFACT_DIR}")
print(f"Output dir: {OUTPUT_DIR}")
print(f"Data root: {DATA_ROOT}")
print(f"Using device: {device}")


Tracking URI: file:///home/zano/Documents/TESI/FOLDER_CINECA/mlruns
Project root: /home/zano/Documents/TESI/FOLDER_CINECA
Artifact cache: /home/zano/Documents/TESI/FOLDER_CINECA/notebooks/models/2c2e7113057d45859a0c881f3861f2e5
Output dir: /home/zano/Documents/TESI/FOLDER_CINECA/notebooks/images/2c2e7113057d45859a0c881f3861f2e5_explanations
Data root: /home/zano/Documents/TESI/FOLDER_CINECA/data
Using device: cuda


In [97]:
from pathlib import Path
import mlflow
from configs.ConfigLoader import ConfigLoader
import utils.transformations_functions as tf
import pickle 

run = mlflow.get_run(RUN_ID)
print(
    f"Run name: {run.data.tags.get('mlflow.runName', 'n/a')} | Experiment: {run.info.experiment_id}"
)

# Download config artifacts from mlflow logged run and rebuild the original config
config_dir = Path(
    mlflow.artifacts.download_artifacts(
        run_id=RUN_ID, artifact_path="config", dst_path=ARTIFACT_DIR
    )
)

#exclude base.yaml if other configs are present
config_files = [p for p in config_dir.glob("*.yaml") if p.name != "base.yaml"]
print(f"Config files found: {[p.name for p in config_files]}")
# if not config_files:
#     config_files = list(config_dir.glob("*.yaml"))
if not config_files:
    raise FileNotFoundError(f"No YAML config found in {config_dir}")

cfg_path = config_files[0]
cfg = ConfigLoader(str(cfg_path))

class_names = cfg.get_class_names()
num_channels = cfg.get_model_input_channels()
print(f"Loaded config {cfg_path.name} | classes: {class_names} | channels: {num_channels}")

# Load the trained model from MLflow
model_uri = f"runs:/{RUN_ID}/model"
model = mlflow.pytorch.load_model(
    model_uri,
    map_location=device,
    pickle_module=pickle,  # avoid cloudpickle.Unpickler issue
)
model.to(device).eval()
print("Model loaded and set to eval().")

# Use the validation/test transform path (no heavy train-time augmentations)
_, test_transforms,_ = tf.get_transforms(cfg)


################################ààààà
from monai.transforms import Compose

transforms_list = (
    test_transforms.transforms 
    if isinstance(test_transforms, Compose) 
    else test_transforms
)
for t in transforms_list:
    print(f"Test transform: {t}")

Run name: Densenet121_oversamp_monai_11-02_at:15-43-52 | Experiment: 914421233606463568


Downloading artifacts:   0%|          | 0/2 [00:00<?, ?it/s]



Config files found: ['densenet121.yaml']
Configuration loaded from /home/zano/Documents/TESI/FOLDER_CINECA/notebooks/models/2c2e7113057d45859a0c881f3861f2e5/config/base.yaml
Configuration: {'dataset': {'class_names': ['MSA', 'PD']}, 'data_splitting': {'random_seed': 42, 'val_set_size': 0.15, 'test_set_size': 0.1, 'num_folds': 8, 'lr_discovery_folds': 4}, 'data_augmentation': {'resize_spatial_size': [512, 512], 'rand_flip_prob': 0.3, 'rand_flip_spatial_axes': [0, 1], 'rand_rotate90_prob': 0.3, 'rand_rotate90_max_k': 3, 'rand_gaussian_noise_prob': 0.5, 'rand_gaussian_noise_mean': 0.0, 'rand_gaussian_noise_std': 0.1, 'crop_size': [256, 256], 'intensity_augmentation_preset': 'light', 'use_crop': False, 'crop_percentage': 0.95}, 'data_loading': {'batch_size': 32, 'num_workers': 4}, 'model': {'model_name': 'base', 'spatial_dims': 2, 'in_channels': 4, 'out_channels': 2, 'dropout_prob': 0.1, 'patch_size': [16, 16], 'library': 'torchvision'}, 'training': {'num_epochs': 210, 'early_stopping_pati

In [98]:
import numpy as np
from utils.train_functions import make_loader

data_subdir = "3c_MIP" if num_channels == 3 else "4c_MIP"
data_dir = DATA_ROOT / data_subdir
if not data_dir.exists():
    raise FileNotFoundError(f"{data_dir} not found; point DATA_ROOT to the dataset root")

image_paths, labels = [], []
for idx, cname in enumerate(class_names):
    class_dir = data_dir / cname
    tif_paths = sorted(class_dir.glob("*.tif"))
    filtered = [p for p in tif_paths if "vaso" not in p.name.lower()]
    image_paths.extend(str(p) for p in filtered)
    labels.extend([idx] * len(filtered))
    print(f"{cname}: {len(filtered)} files from {class_dir}")

train_images_np = np.array(image_paths)
train_labels_np = np.array(labels)
print(f"Total images: {len(train_images_np)}")

train_loader = make_loader(
    train_images_np,
    train_labels_np,
    transforms=test_transforms,
    cfg=cfg,
    shuffle=False,
)


MSA: 104 files from /home/zano/Documents/TESI/FOLDER_CINECA/data/4c_MIP/MSA
PD: 60 files from /home/zano/Documents/TESI/FOLDER_CINECA/data/4c_MIP/PD
Total images: 164


In [99]:
import torch
from utils.test_functions import calculate_classification_metrics

def predict_dataset(model, loader, class_names, device):
    model.eval()
    preds, labels, probs = [], [], []
    with torch.no_grad():
        for batch in loader:
            imgs = batch["image"].to(device)
            y = batch["label"].to(device).long()

            outputs = model(imgs)
            logits = outputs[0] if isinstance(outputs, (tuple, list)) else outputs
            batch_probs = torch.softmax(logits, dim=1)

            preds.append(batch_probs.argmax(dim=1).cpu())
            labels.append(y.cpu())
            probs.append(batch_probs.cpu())

    preds_np = torch.cat(preds).numpy()
    labels_np = torch.cat(labels).numpy()
    probs_np = torch.cat(probs).numpy()
    metrics = calculate_classification_metrics(
        labels_np, preds_np, probs=probs_np, class_names=class_names
    )
    return {
        "predictions": preds_np,
        "true_labels": labels_np,
        "probs": probs_np,
        "metrics": metrics,
    }

train_results = predict_dataset(model, train_loader, class_names, device)
train_results["metrics"]


{'accuracy': 0.49390243902439024,
 'precision': 0.24444444444444444,
 'recall': 0.18333333333333332,
 'f1': 0.20952380952380953,
 'balanced_accuracy': np.float64(0.42820512820512824),
 'confusion_matrix': array([[70, 34],
        [49, 11]]),
 'mcc': np.float64(-0.15500152720937346),
 'auc': np.float64(0.3918269230769231),
 'classification_report': {'MSA': {'precision': 0.5882352941176471,
   'recall': 0.6730769230769231,
   'f1-score': 0.6278026905829597,
   'support': 104.0},
  'PD': {'precision': 0.24444444444444444,
   'recall': 0.18333333333333332,
   'f1-score': 0.20952380952380953,
   'support': 60.0},
  'accuracy': 0.49390243902439024,
  'macro avg': {'precision': 0.41633986928104577,
   'recall': 0.42820512820512824,
   'f1-score': 0.4186632500533846,
   'support': 164.0},
  'weighted avg': {'precision': 0.46245815399330464,
   'recall': 0.49390243902439024,
   'f1-score': 0.4747738316588804,
   'support': 164.0}}}

In [100]:
import importlib
import utils.vit_explanation_functions as vit_expl
from utils import explainability_functions as explainability_functions

vit_expl = importlib.reload(vit_expl)
explainability_functions = importlib.reload(explainability_functions)  

In [93]:
print(class_names)

['MSA-P', 'PD']


In [102]:
print("Model Architecture (Truncated view of layers):")
for name, module in model.named_modules():
    # Filter to show only Conv2d layers or specific blocks to avoid clutter
    if isinstance(module, torch.nn.Conv2d) or "block" in name:
        print(f"{name}")

Model Architecture (Truncated view of layers):
features.conv0
features.denseblock1
features.denseblock1.denselayer1
features.denseblock1.denselayer1.layers
features.denseblock1.denselayer1.layers.norm1
features.denseblock1.denselayer1.layers.relu1
features.denseblock1.denselayer1.layers.conv1
features.denseblock1.denselayer1.layers.norm2
features.denseblock1.denselayer1.layers.relu2
features.denseblock1.denselayer1.layers.conv2
features.denseblock1.denselayer2
features.denseblock1.denselayer2.layers
features.denseblock1.denselayer2.layers.norm1
features.denseblock1.denselayer2.layers.relu1
features.denseblock1.denselayer2.layers.conv1
features.denseblock1.denselayer2.layers.norm2
features.denseblock1.denselayer2.layers.relu2
features.denseblock1.denselayer2.layers.conv2
features.denseblock1.denselayer3
features.denseblock1.denselayer3.layers
features.denseblock1.denselayer3.layers.norm1
features.denseblock1.denselayer3.layers.relu1
features.denseblock1.denselayer3.layers.conv1
features

In [104]:
from monai.visualize import GradCAM
from utils.data_visualization_functions import min_max_normalization
from utils.mlflow_functions import find_last_conv_layer
from utils.explainability_functions import process_and_save_batch_gradcam_and_Overlay
from utils.vit_explanation_functions import save_attention_overlays_side_by_side

model_name = cfg.get_model_name().lower()

if "vit" in model_name:
    print("Model identified as ViT-based; computing Attention Rollout...")
    attn_dir = OUTPUT_DIR / "attention_rollout"
    attn_dir.mkdir(parents=True, exist_ok=True)
    save_attention_overlays_side_by_side(
        data_loader=train_loader,
        model=model,
        output_directory=str(attn_dir),
        device=device,
        heatmap_alpha=0.6,
        input_order="gbr_graylast"
    )
    print(f"Attention rollouts saved to {attn_dir}")
else:
    print("Model identified as CNN-based; computing GradCAM...")
    target_layer, _ = find_last_conv_layer(model)
    if target_layer is None:
        raise RuntimeError("No convolutional layer found; GradCAM cannot be computed.")

    target_layer = "features.denseblock4.denselayer15.layers.conv2"
    gradcam = GradCAM(
        nn_module=model,
        target_layers=[target_layer],
        register_backward=True,
    )
    
    gradcam_dir = process_and_save_batch_gradcam_and_Overlay(
        model=model,
        test_loader=train_loader,
        gradcam_obj=gradcam,
        base_dir=OUTPUT_DIR,
        class_names=class_names,
        min_max_rescale_for_display=min_max_normalization,
        threshold=0.0,
        run_name=run.data.tags.get("mlflow.runName", RUN_ID),
        experiment_name=run.info.experiment_id,
    )
    print(f"GradCAM images saved to {gradcam_dir}")


Model identified as CNN-based; computing GradCAM...
Processing batch 1, shape: torch.Size([32, 4, 512, 512])
Saved GRADCAM overlay with DAPI-like channel to: /home/zano/Documents/TESI/FOLDER_CINECA/notebooks/images/2c2e7113057d45859a0c881f3861f2e5_explanations/gradcam_outputs/Densenet121_oversamp_monai_11-02_at:15-43-52/batch_0_img_0.png
Saved GRADCAM overlay with DAPI-like channel to: /home/zano/Documents/TESI/FOLDER_CINECA/notebooks/images/2c2e7113057d45859a0c881f3861f2e5_explanations/gradcam_outputs/Densenet121_oversamp_monai_11-02_at:15-43-52/batch_0_img_1.png
Saved GRADCAM overlay with DAPI-like channel to: /home/zano/Documents/TESI/FOLDER_CINECA/notebooks/images/2c2e7113057d45859a0c881f3861f2e5_explanations/gradcam_outputs/Densenet121_oversamp_monai_11-02_at:15-43-52/batch_0_img_2.png
Saved GRADCAM overlay with DAPI-like channel to: /home/zano/Documents/TESI/FOLDER_CINECA/notebooks/images/2c2e7113057d45859a0c881f3861f2e5_explanations/gradcam_outputs/Densenet121_oversamp_monai_11-