In [1]:
import os
import pandas as pd
import numpy as np
import mlflow
from mlflow.tracking import MlflowClient
from datetime import datetime
import re

In [2]:

# Adjust if your mlruns is elsewhere
MLRUNS_DIR_CANDIDATES = [
    # "/home/zano/Documents/TESI/FOLDER_CINECA/notebooks/mlruns",
    "/home/zano/Documents/TESI/FOLDER_CINECA/mlruns"
]

# def pick_existing_dir(candidates):
#     for d in candidates:
#         if os.path.isdir(d):
#             return d
#     raise FileNotFoundError(f"No mlruns directory found in: {candidates}")
mlruns_dir = MLRUNS_DIR_CANDIDATES[0] #pick_existing_dir(MLRUNS_DIR_CANDIDATES)
tracking_uri = f"file://{mlruns_dir}"
mlflow.set_tracking_uri(tracking_uri)
client = MlflowClient()

experiments = client.search_experiments() 

# --- Configuration for Filtering ---
EXCLUDE_PARAM_KEYS = {"seed", "random_state"}
EXCLUDE_PARAM_PREFIXES = ["debug_", "temp_"]
EXCLUDE_PARAM_REGEX = [r"^unused_.*"]

# --CONFIG FOR METRIC FILTERING
EXCLUDE_METRIC_KEYS = {"train_loss_step", "val_loss_step"}
EXCLUDE_METRIC_PREFIXES = ["raw_", "tmp_"]
EXCLUDE_METRIC_REGEX = [r"val_fold",r".*loss*"]

# --- Data Loading ---
rows = []
for exp in experiments:
    exp_id = exp.experiment_id
    runs = client.search_runs([exp_id], max_results=1000)
    for run in runs:
        info = run.info
        data = run.data
        params = {f"param.{k}": v for k, v in data.params.items()}
        metrics = {f"metric.{k}": v for k, v in data.metrics.items()}
        tags = {f"tag.{k}": v for k, v in data.tags.items()}
        row = {
            "experiment_id": exp_id,
            "experiment_name": exp.name,
            "run_id": info.run_id,
            "run_name": data.tags.get("mlflow.runName", ""),
            "artifact_uri": info.artifact_uri,
        }
        row.update(params)
        row.update(metrics)
        row.update(tags)
        rows.append(row)

# --- DataFrame Creation and Filtering ---
df = pd.DataFrame(rows).sort_values(["experiment_name"])
print(df.shape)
print(df.info())

display(df)

(217, 204)
<class 'pandas.core.frame.DataFrame'>
Index: 217 entries, 55 to 203
Columns: 204 entries, experiment_id to metric.test_auc_fold_0
dtypes: float64(153), object(51)
memory usage: 347.5+ KB
None


Unnamed: 0,experiment_id,experiment_name,run_id,run_name,artifact_uri,param.use_crop,param.test_pat_ids_per_fold,param.crop_percentage,param.dropout_rate,param.creation_time,...,metric.test_recall_fold_5,metric.test_mcc_fold_5,metric.test_mcc_fold_2,metric.test_precision_fold_5,metric.test_precision_fold_2,metric.test_auc_fold_7,metric.test_auc_fold_6,metric.test_recall_fold_7,metric.test_auc_fold_5,metric.test_auc_fold_0
55,788257751769076553,DS1_3c_MSA-P_vs_PD_supervised,6796411cc98f49e6af7dc871aaf85f3b,Densenet121_oversamp_torchvision_10-13_at:22-0...,file:///home/zano/Documents/TESI/FOLDER_CINECA...,False,,0.95,0.2,22:09:14,...,,,,,,,,,,
54,788257751769076553,DS1_3c_MSA-P_vs_PD_supervised,b33b0bb2146b4e9b82da134e99ba066d,Densenet169_mixup0.2_oversamp_torchvision_10-1...,file:///home/zano/Documents/TESI/FOLDER_CINECA...,False,,0.95,0.2,00:00:35,...,,,,,,,,,,
53,788257751769076553,DS1_3c_MSA-P_vs_PD_supervised,4c6d7ae304dd4b9889c0e3925a9ff53c,Densenet169_oversamp_torchvision_10-14_at:11-5...,file:///home/zano/Documents/TESI/FOLDER_CINECA...,False,,0.95,0.2,11:55:02,...,,,,,,,,,,
52,788257751769076553,DS1_3c_MSA-P_vs_PD_supervised,a57283a2c763498aa0b6d0d62a3514f9,Resnet18_oversamp_torchvision_10-14_at:12-25-18,file:///home/zano/Documents/TESI/FOLDER_CINECA...,False,,0.95,0.2,12:25:18,...,,,,,,,,,,
51,788257751769076553,DS1_3c_MSA-P_vs_PD_supervised,7fc11b854e3d42f69f9d7835c2fbfa18,ViT_monai_10-14_at:13-28-54,file:///home/zano/Documents/TESI/FOLDER_CINECA...,False,,0.95,0.2,13:28:54,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
170,650492548885392028,pretrained_subslice_MSA_vs_PD_supervised,f8bc44c3a3bb42c7a050e4fb5daa593d,Densenet121_oversamp_TL_pretrained:imagenet-mi...,file:///leonardo_work/pMI24_EleBr_1/lzanotto/F...,,,,0.2,,...,,,,,,,,,,
171,650492548885392028,pretrained_subslice_MSA_vs_PD_supervised,d1413c9a42af466e997d0de502529d5b,Densenet121_oversamp_TL_pretrained:microscopyn...,file:///leonardo_work/pMI24_EleBr_1/lzanotto/F...,,,,0.2,,...,,,,,,,,,,
159,650492548885392028,pretrained_subslice_MSA_vs_PD_supervised,49edb9eb8aed422fa6e02b8bf233ddb6,Densenet169_oversamp_TL_pretrained:torchvision...,file:///leonardo_work/pMI24_EleBr_1/lzanotto/F...,,,,0.2,19:38:53,...,,,,,,,,,,
168,650492548885392028,pretrained_subslice_MSA_vs_PD_supervised,41ce2810528a45408c81a87ca6940387,Resnet18_oversamp_TL_pretrained:microscopynet_...,file:///leonardo_work/pMI24_EleBr_1/lzanotto/F...,,,,0.2,,...,,,,,,,,,,


In [3]:
import numpy as np # Make sure numpy is imported

# Replace empty strings '' with the standard np.nan
df = df.replace('', np.nan)

import math # We'll need this to round up

# 1. Get the total number of rows (runs)
total_rows = len(df)

# 2. Calculate the threshold: 5% of total rows.
# This is the minimum *non-missing* values a column must have to be kept.
# We use math.ceil() to round up. e.g., if 5% is 10.8, we need at least 11 values.
min_non_nan_count = math.ceil(total_rows * 0.10)

print(f"Total rows: {total_rows}")
print(f"Columns will be KEPT if they have at least {min_non_nan_count} non-NaN values.")

# 3. Apply the drop
# Keep track of columns before for comparison
cols_before_drop = set(df.columns)

# axis='columns' applies this to columns, not rows
# thresh=... tells pandas the minimum *non-NaN* count to keep the column
df_cleaned = df.dropna(axis='columns', thresh=min_non_nan_count)

# 4. Report what happened
cols_after_drop = set(df.columns)
dropped_cols = cols_before_drop - cols_after_drop

print(f"\nDropped {len(dropped_cols)} columns with >95% missing values.")
if dropped_cols:
    print(f"Columns dropped: {list(dropped_cols)}")

# See the new shape
print(f"\nNew DataFrame shape: {df.shape}")

Total rows: 217
Columns will be KEPT if they have at least 22 non-NaN values.

Dropped 0 columns with >95% missing values.

New DataFrame shape: (217, 204)


In [4]:
param_cols_to_drop = []
for col in df.columns:
    if col.startswith("param."):
        param_name = col.replace("param.", "", 1)
        if (param_name in EXCLUDE_PARAM_KEYS or
            any(param_name.startswith(p) for p in EXCLUDE_PARAM_PREFIXES) or
            any(re.search(rx, param_name) for rx in EXCLUDE_PARAM_REGEX)):
            param_cols_to_drop.append(col)

metric_cols_to_drop = []
for col in df.columns:
    if col.startswith("metric."):
        metric_name = col.replace("metric.", "", 1)
        if (metric_name in EXCLUDE_METRIC_KEYS or
            any(metric_name.startswith(p) for p in EXCLUDE_METRIC_PREFIXES) or
            any(re.search(rx, metric_name) for rx in EXCLUDE_METRIC_REGEX)):
            metric_cols_to_drop.append(col)

cols_to_drop = param_cols_to_drop + metric_cols_to_drop
df.drop(columns=cols_to_drop, inplace=True)

print(f"Dropped {len(cols_to_drop)} parameter/metric columns based on exclusion rules.")
print(df.shape)

#drop columns with loss in their name
loss_cols = [col for col in df.columns if 'loss' in col.lower()]
df.drop(columns=loss_cols, inplace=True, errors='ignore')
print(f"Dropped {len(loss_cols)} columns containing 'loss': {loss_cols}")
print(df.shape)

print(f"After filtering, {len(df)} runs remain with experiment names starting with 'SL' or 'pretrained'")


Dropped 82 parameter/metric columns based on exclusion rules.
(217, 122)
Dropped 0 columns containing 'loss': []
(217, 122)
After filtering, 217 runs remain with experiment names starting with 'SL' or 'pretrained'


In [9]:
# Create a boolean mask for rows to keep
# keep only rows with experiment name starting with SL, pretrained, or DS
mask = (df['experiment_name'].str.startswith('SL', na=False) |
        df['experiment_name'].str.startswith('pretrained', na=False) |
        df['experiment_name'].str.startswith('DS', na=False))

# Apply the filter
n_cols_before = len(df.columns)
df = df[mask]
n_cols_after = len(df.columns)
print(f"Dropped {n_cols_before - n_cols_after} columns")
print(df.shape)

Dropped 0 columns
(212, 123)


In [13]:
import re

# 1. Compile a regex pattern to find 'fold_' + one or more digits
pattern = re.compile(r'fold_\d+')
# 2. Build a list of columns where the pattern is found ANYWHERE
columns_to_drop = [col for col in df.columns if pattern.search(col)]
# 3. Drop the identified columns
df = df.drop(columns=columns_to_drop)

print(f"Dropping {len(columns_to_drop)} columns containing 'fold': {columns_to_drop}")
# 2. Drop the identified columns

Dropping 32 columns containing 'fold': ['metric.test_recall_fold_4', 'metric.test_precision_fold_0', 'metric.test_recall_fold_1', 'metric.test_mcc_fold_0', 'metric.test_recall_fold_6', 'metric.test_auc_fold_3', 'metric.test_recall_fold_0', 'metric.test_auc_fold_4', 'metric.test_precision_fold_3', 'metric.test_mcc_fold_7', 'metric.test_auc_fold_2', 'metric.test_mcc_fold_3', 'metric.test_precision_fold_4', 'metric.test_mcc_fold_4', 'metric.test_mcc_fold_6', 'metric.test_precision_fold_6', 'metric.test_precision_fold_7', 'metric.test_recall_fold_3', 'metric.test_auc_fold_1', 'metric.test_mcc_fold_1', 'metric.test_precision_fold_1', 'metric.test_recall_fold_2', 'metric.test_recall_fold_5', 'metric.test_mcc_fold_5', 'metric.test_mcc_fold_2', 'metric.test_precision_fold_5', 'metric.test_precision_fold_2', 'metric.test_auc_fold_7', 'metric.test_auc_fold_6', 'metric.test_recall_fold_7', 'metric.test_auc_fold_5', 'metric.test_auc_fold_0']


## ADDING NEW COLUMNS

In [14]:
# --- Add DATASET Column ---
if 'param.test_counts' in df.columns:
    df['param.test_counts'] = pd.to_numeric(df['param.test_counts'], errors='coerce')
    df['dataset'] = np.where(df['param.test_counts'] < 25, 'DS1', 'DS2')
    print("Added 'dataset' column based on 'param.test_counts'.")
else:
    print("Warning: 'param.test_counts' column not found. Could not create 'dataset' column.")

Added 'dataset' column based on 'param.test_counts'.


In [15]:
# --- Add num_channels Column (MODIFIED LOGIC WITH FALLBACK) ---
# Step 1: Create the fallback values for all rows based on the original inference logic.
print("Inferring fallback 'num_channels' for all rows...")
condition_3c = df['experiment_name'].str.contains("3c", na=False)

if 'param.transfer_learning' in df.columns:
    condition_tl = (df['param.transfer_learning'] == 'True')
else:
    condition_tl = pd.Series([False] * len(df), index=df.index)
    print("Warning: 'param.transfer_learning' column not found for fallback. Assuming False.")

is_3_channels = condition_3c | condition_tl
fallback_channels = pd.Series(np.where(is_3_channels, 3, 4), index=df.index)

# Step 2: Use 'param.num_channels' as the priority source, and fill any missing values with the fallback.
if 'param.num_channels' in df.columns:
    print("Found 'param.num_channels'. Using it as priority, with inference as fallback.")
    # Convert the parameter column to numeric, forcing errors into NaN (Not a Number)
    priority_channels = pd.to_numeric(df['param.num_channels'], errors='coerce')
    # Use the priority values, and for any NaNs, fill with the corresponding value from the fallback Series.
    df['num_channels'] = priority_channels.fillna(fallback_channels)
else:
    print("Did not find 'param.num_channels'. Creating 'num_channels' based on inference only.")
    df['num_channels'] = fallback_channels

# Ensure the final column is an integer type, as channel counts are whole numbers.
df['num_channels'] = df['num_channels'].astype(int)
print("Added 'num_channels' column with priority-fallback logic.")

Inferring fallback 'num_channels' for all rows...
Found 'param.num_channels'. Using it as priority, with inference as fallback.
Added 'num_channels' column with priority-fallback logic.


In [16]:
# --- NEW: Add classi Column ---
# Create a condition to check if the experiment name contains the specific substring
condition_msap = df['experiment_name'].str.contains("_MSA-P_vs_PD_", na=False)

# Use np.where to assign the value based on the condition
df['classes'] = np.where(condition_msap, 'MSA-P vs PD', 'MSA vs PD')
print("Added 'classes' column based on experiment name.")

# --- Correct param.color_transforms based on run_name due to error in logging code---
if 'run_name' in df.columns and 'param.color_transforms' in df.columns:
    # Create a boolean mask for rows where run_name contains the specific string
    color_transform_mask = df['run_name'].str.contains('_color_transforms:True_', na=False)

    # Update the 'param.color_transforms' column to True where the mask is True
    df.loc[color_transform_mask, 'param.color_transforms'] = 'True'
    print("Updated 'param.color_transforms' based on 'run_name' content.")

# --- Create, Display, and Save Final Merged Table ---
# The main 'df' DataFrame now contains all info. We will reorder it for better readability.
# Define the desired column order, including the new 'classes' column
id_cols = ['experiment_name', 'run_name', 'dataset', 'classes', 'num_channels']
# Exclude the original param.num_channels from the list of param columns to avoid redundancy
param_cols = sorted([
    c for c in df.columns
    if c.startswith("param.") and c != 'param.num_channels'
])
metric_cols = sorted([c for c in df.columns if c.startswith("metric.")])

# Construct the final list of columns, ensuring they exist in the DataFrame
final_cols_order = [col for col in id_cols if col in df.columns] + \
                   [col for col in param_cols if col in df.columns] + \
                   [col for col in metric_cols if col in df.columns]

# Create the final, reordered dataframe
results_df = df[final_cols_order].copy()

# Replace 'torchvision' with 'imagenet' in the 'param.pretrained' column
if 'param.pretrained' in results_df.columns:
    results_df['param.pretrained'] = results_df['param.pretrained'].replace('torchvision', 'imagenet')
    print("Replaced 'torchvision' with 'imagenet' in 'param.pretrained' column.")

pd.set_option("display.max_columns", 200)
print("\n--- Final Merged Results Table ---")
# Use display() if in a notebook environment, otherwise print()
try:
    display(results_df)
except NameError:
    print(results_df)

# Save the single, merged DataFrame to a CSV file
results_df.to_csv("mlflow_results_merged.csv", index=False)
print("\nSaved the final merged table to 'mlflow_results_merged.csv'")

print(f"\nLoaded and processed {len(df)} runs from {len(experiments)} experiments at {tracking_uri}")

Added 'classes' column based on experiment name.
Updated 'param.color_transforms' based on 'run_name' content.
Replaced 'torchvision' with 'imagenet' in 'param.pretrained' column.

--- Final Merged Results Table ---


Unnamed: 0,experiment_name,run_name,dataset,classes,num_channels,param.batch_size,param.best_fold_idx,param.best_lr,param.color_transforms,param.creation_date,param.creation_time,param.crop_percentage,param.dropout_rate,param.epochs,param.fine_tuning,param.freezed_layer_index,param.intensity_augmentation_preset,param.lr_discovery_folds,param.lr_discovery_method,param.mixup_alpha,param.model_library,param.model_name,param.n_folds,param.optimizer_name,param.pretrained,param.test_counts,param.test_pat_ids_for_best_fold,param.test_pat_ids_per_fold,param.total_params,param.train_counts,param.trainable_params,param.transfer_learning,param.use_crop,param.use_lr_discovery,param.val_counts,param.weight_decay,metric.exec_time_min,metric.mean_patient_major_bal_acc,metric.mean_patient_major_mcc,metric.mean_patient_major_precision,metric.mean_patient_major_recall,metric.mean_patient_soft_bal_acc,metric.mean_patient_soft_mcc,metric.mean_patient_soft_precision,metric.mean_patient_soft_recall,metric.mean_test_accuracy,metric.mean_test_auc,metric.mean_test_balanced_acc,metric.mean_test_f1,metric.mean_test_mcc,metric.mean_test_precision,metric.mean_test_recall,metric.patient_major_bal_acc_formatted,metric.patient_major_mcc_formatted,metric.patient_major_precision_formatted,metric.patient_major_recall_formatted,metric.patient_soft_bal_acc_formatted,metric.patient_soft_mcc_formatted,metric.patient_soft_precision_formatted,metric.patient_soft_recall_formatted,metric.std_patient_major_bal_acc,metric.std_patient_major_mcc,metric.std_patient_major_precision,metric.std_patient_major_recall,metric.std_patient_soft_bal_acc,metric.std_patient_soft_mcc,metric.std_patient_soft_precision,metric.std_patient_soft_recall,metric.std_test_accuracy,metric.std_test_auc,metric.std_test_balanced_acc,metric.std_test_f1,metric.std_test_mcc,metric.std_test_precision,metric.std_test_recall
55,DS1_3c_MSA-P_vs_PD_supervised,Densenet121_oversamp_torchvision_10-13_at:22-0...,DS1,MSA-P vs PD,3,32,,0.00012207764786954147,,10-13,22:09:14,0.95,0.2,150,,,light,4,nested,0,torchvision,Densenet121,8,AdamW,imagenet,14.0,,,6955906,"{0: 92, 1: 93, 2: 91, 3: 91, 4: 91, 5: 90, 6: ...",6955906,False,False,,"{0: 17, 1: 17, 2: 17, 3: 17, 4: 17, 5: 17, 6: ...",1e-4,52.304000,0.656,0.347,0.549,0.750,0.656,0.318,0.562,0.688,0.676000,0.748000,0.663000,0.625000,0.339000,0.565000,0.703000,,,,,,,,,0.136,0.288,0.231,0.331,0.205,0.419,0.263,0.325,0.121000,0.173000,0.131000,0.261000,0.273000,0.234000,0.299000
54,DS1_3c_MSA-P_vs_PD_supervised,Densenet169_mixup0.2_oversamp_torchvision_10-1...,DS1,MSA-P vs PD,3,32,,0.00012207764786954147,,10-14,00:00:35,0.95,0.2,150,,,light,4,nested,0.2,torchvision,Densenet169,8,AdamW,imagenet,14.0,,,12487810,"{0: 92, 1: 93, 2: 91, 3: 91, 4: 91, 5: 90, 6: ...",12487810,False,False,,"{0: 17, 1: 17, 2: 17, 3: 17, 4: 17, 5: 17, 6: ...",1e-4,64.841000,0.700,0.433,0.594,0.844,0.692,0.395,0.629,0.771,0.667000,0.732000,0.672000,0.686000,0.360000,0.615000,0.792000,,,,,,,,,0.198,0.420,0.258,0.329,0.161,0.333,0.169,0.235,0.189000,0.177000,0.185000,0.197000,0.382000,0.173000,0.240000
53,DS1_3c_MSA-P_vs_PD_supervised,Densenet169_oversamp_torchvision_10-14_at:11-5...,DS1,MSA-P vs PD,3,32,,0.00012207764786954147,,10-14,11:55:02,0.95,0.2,150,,,light,4,nested,0,torchvision,Densenet169,8,AdamW,imagenet,14.0,,,12487810,"{0: 92, 1: 93, 2: 91, 3: 91, 4: 91, 5: 90, 6: ...",12487810,False,False,,"{0: 17, 1: 17, 2: 17, 3: 17, 4: 17, 5: 17, 6: ...",1e-4,57.341000,0.716,0.483,0.625,0.906,0.728,0.487,0.638,0.875,0.682000,0.737000,0.688000,0.708000,0.394000,0.630000,0.823000,,,,,,,,,0.164,0.329,0.166,0.248,0.159,0.330,0.156,0.250,0.163000,0.160000,0.157000,0.162000,0.326000,0.143000,0.196000
52,DS1_3c_MSA-P_vs_PD_supervised,Resnet18_oversamp_torchvision_10-14_at:12-25-18,DS1,MSA-P vs PD,3,32,,0.00012207764786954147,,10-14,12:25:18,0.95,0.2,150,,,light,4,nested,0,torchvision,Resnet18,8,AdamW,imagenet,14.0,,,11177538,"{0: 90, 1: 91, 2: 89, 3: 89, 4: 89, 5: 88, 6: ...",11177538,False,False,,"{0: 19, 1: 19, 2: 19, 3: 19, 4: 19, 5: 19, 6: ...",2e-4,29.903000,0.683,0.384,0.640,0.760,0.727,0.471,0.685,0.792,0.696000,0.772000,0.693000,0.706000,0.396000,0.671000,0.766000,,,,,,,,,0.158,0.324,0.180,0.230,0.165,0.331,0.155,0.243,0.146000,0.200000,0.148000,0.134000,0.301000,0.141000,0.170000
51,DS1_3c_MSA-P_vs_PD_supervised,ViT_monai_10-14_at:13-28-54,DS2,MSA-P vs PD,3,32,,0.00012207764786954147,,10-14,13:28:54,0.95,0.2,450,,,light,4,nested,0,monai,ViT,8,AdamW,,,,,1022210,"{0: 92, 1: 93, 2: 91, 3: 91, 4: 91, 5: 90, 6: ...",1022210,False,False,,"{0: 17, 1: 17, 2: 17, 3: 17, 4: 17, 5: 17, 6: ...",1e-5,58.457000,0.738,0.498,0.735,0.750,0.750,0.522,0.746,0.781,0.745000,0.800000,0.746000,0.732000,0.506000,0.751000,0.760000,,,,,,,,,0.204,0.412,0.221,0.280,0.165,0.335,0.180,0.263,0.160000,0.177000,0.161000,0.181000,0.321000,0.158000,0.235000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169,pretrained_subslice_MSA_vs_PD_supervised,Densenet169_oversamp_TL_pretrained:microscopyn...,DS2,MSA vs PD,3,32,,,False,,,,0.2,210,False,308,,,,,monai,Densenet169,8,,microscopynet,117.0,,,12487810,"{0: 589, 1: 588, 2: 586, 3: 594, 4: 597, 5: 59...",6742018,True,,,"{0: 105, 1: 104, 2: 104, 3: 105, 4: 106, 5: 10...",1e-4,79.836654,,,,,,,,,0.622708,0.650792,0.586062,0.468144,0.176650,0.463703,0.487588,,,,,,,,,,,,,,,,,0.106976,0.111636,0.081521,0.083341,0.173435,0.112981,0.086862
170,pretrained_subslice_MSA_vs_PD_supervised,Densenet121_oversamp_TL_pretrained:imagenet-mi...,DS2,MSA vs PD,3,32,,,False,,,,0.2,210,False,263,,,,,monai,Densenet121,8,,imagenet-microscopynet,117.0,,,6955906,"{0: 589, 1: 588, 2: 586, 3: 594, 4: 597, 5: 59...",2162178,True,,,"{0: 105, 1: 104, 2: 104, 3: 105, 4: 106, 5: 10...",1e-4,61.439694,,,,,,,,,0.679439,0.726629,0.659260,0.552457,0.310653,0.523816,0.602858,,,,,,,,,,,,,,,,,0.074489,0.097431,0.072226,0.063319,0.132420,0.050960,0.139658
171,pretrained_subslice_MSA_vs_PD_supervised,Densenet121_oversamp_TL_pretrained:microscopyn...,DS2,MSA vs PD,3,32,,,False,,,,0.2,210,False,263,,,,,monai,Densenet121,8,,microscopynet,117.0,,,6955906,"{0: 589, 1: 588, 2: 586, 3: 594, 4: 597, 5: 59...",2162178,True,,,"{0: 105, 1: 104, 2: 104, 3: 105, 4: 106, 5: 10...",1e-4,70.502704,,,,,,,,,0.646664,0.678267,0.619304,0.512312,0.236032,0.487049,0.552891,,,,,,,,,,,,,,,,,0.088157,0.094281,0.064719,0.047639,0.129710,0.063238,0.084442
159,pretrained_subslice_MSA_vs_PD_supervised,Densenet169_oversamp_TL_pretrained:torchvision...,DS2,MSA vs PD,3,32,,,False,09-12,19:38:53,,0.2,210,False,308,,,,,torchvision,Densenet169,8,,imagenet,117.0,,,12487810,"{0: 589, 1: 588, 2: 586, 3: 594, 4: 597, 5: 59...",6742018,True,,True,"{0: 105, 1: 104, 2: 104, 3: 105, 4: 106, 5: 10...",1e-4,61.435939,,,,,,,,,0.710036,0.757194,0.673159,0.572710,0.349712,0.571595,0.579258,,,,,,,,,,,,,,,,,0.088914,0.082708,0.080122,0.097093,0.163367,0.096955,0.115451



Saved the final merged table to 'mlflow_results_merged.csv'

Loaded and processed 212 runs from 38 experiments at file:///home/zano/Documents/TESI/FOLDER_CINECA/mlruns


In [None]:

# df_cleaned = df.drop(columns=columns_to_drop)

Dropping 0 columns containing 'fold': []


In [None]:

# Adjust if your mlruns is elsewhere
MLRUNS_DIR_CANDIDATES = [
    # "/home/zano/Documents/TESI/FOLDER_CINECA/notebooks/mlruns",
    "/home/zano/Documents/TESI/FOLDER_CINECA/mlruns"
]

# def pick_existing_dir(candidates):
#     for d in candidates:
#         if os.path.isdir(d):
#             return d
#     raise FileNotFoundError(f"No mlruns directory found in: {candidates}")
mlruns_dir = MLRUNS_DIR_CANDIDATES[0] #pick_existing_dir(MLRUNS_DIR_CANDIDATES)
tracking_uri = f"file://{mlruns_dir}"
mlflow.set_tracking_uri(tracking_uri)
client = MlflowClient()

experiments = client.search_experiments() 

# --- Configuration for Filtering ---
EXCLUDE_PARAM_KEYS = {"seed", "random_state"}
EXCLUDE_PARAM_PREFIXES = ["debug_", "temp_"]
EXCLUDE_PARAM_REGEX = [r"^unused_.*"]

# --CONFIG FOR METRIC FILTERING
EXCLUDE_METRIC_KEYS = {"train_loss_step", "val_loss_step"}
EXCLUDE_METRIC_PREFIXES = ["raw_", "tmp_"]
EXCLUDE_METRIC_REGEX = [r"val_fold",r".*loss*"]

# --- Data Loading ---
rows = []
for exp in experiments:
    exp_id = exp.experiment_id
    runs = client.search_runs([exp_id], max_results=1000)
    for run in runs:
        info = run.info
        data = run.data
        params = {f"param.{k}": v for k, v in data.params.items()}
        metrics = {f"metric.{k}": v for k, v in data.metrics.items()}
        tags = {f"tag.{k}": v for k, v in data.tags.items()}
        row = {
            "experiment_id": exp_id,
            "experiment_name": exp.name,
            "run_id": info.run_id,
            "run_name": data.tags.get("mlflow.runName", ""),
            "artifact_uri": info.artifact_uri,
        }
        row.update(params)
        row.update(metrics)
        row.update(tags)
        rows.append(row)

# --- DataFrame Creation and Filtering ---
df = pd.DataFrame(rows).sort_values(["experiment_name"])
print(df.shape)
print(df.info())

display(df)

(217, 204)
<class 'pandas.core.frame.DataFrame'>
Index: 217 entries, 55 to 203
Columns: 204 entries, experiment_id to metric.test_auc_fold_0
dtypes: float64(153), object(51)
memory usage: 347.5+ KB
None


Unnamed: 0,experiment_id,experiment_name,run_id,run_name,artifact_uri,param.use_crop,param.test_pat_ids_per_fold,param.crop_percentage,param.dropout_rate,param.creation_time,param.pretrained,param.num_channels,param.transfer_learning,param.intensity_augmentation_preset,param.val_counts,param.trainable_params,param.model_name,param.creation_date,param.best_lr,param.test_pat_ids_for_best_fold,param.batch_size,param.lr_discovery_folds,param.n_folds,param.best_fold_idx,param.weight_decay,param.lr_discovery_method,param.optimizer_name,param.mixup_alpha,param.total_params,param.test_counts,param.train_counts,param.model_library,param.epochs,param.freezed_layer_index,metric.mean_test_accuracy,metric.patient_major_mcc_formatted,metric.patient_major_recall_formatted,metric.mean_patient_soft_precision,metric.std_test_mcc,metric.std_patient_soft_mcc,metric.mean_patient_soft_recall,metric.mean_test_precision,metric.std_patient_soft_bal_acc,metric.mean_test_auc,metric.mean_patient_major_recall,metric.mean_patient_major_precision,metric.std_test_balanced_acc,metric.mean_patient_major_bal_acc,metric.std_test_auc,metric.exec_time_min,metric.std_patient_major_precision,metric.patient_major_precision_formatted,metric.std_patient_major_mcc,metric.mean_test_mcc,metric.std_test_f1,metric.std_test_loss,metric.patient_soft_mcc_formatted,metric.std_test_accuracy,metric.mean_test_balanced_acc,metric.mean_test_recall,metric.std_patient_major_recall,metric.mean_test_f1,metric.patient_major_bal_acc_formatted,metric.mean_patient_soft_bal_acc,metric.std_test_recall,metric.mean_patient_soft_mcc,metric.std_patient_soft_precision,metric.std_patient_major_bal_acc,metric.patient_soft_bal_acc_formatted,metric.std_patient_soft_recall,metric.std_test_precision,metric.patient_soft_precision_formatted,metric.mean_patient_major_mcc,metric.patient_soft_recall_formatted,metric.mean_test_loss,tag.patient_major_mcc_formatted,tag.mlflow.user,tag.mlflow.source.name,tag.patient_major_recall_formatted,tag.mlflow.runName,tag.patient_major_precision_formatted,tag.mlflow.source.type,tag.patient_soft_mcc_formatted,tag.mlflow.log-model.history,tag.patient_major_bal_acc_formatted,tag.mlflow.source.git.commit,tag.patient_soft_bal_acc_formatted,tag.patient_soft_precision_formatted,tag.patient_soft_recall_formatted,metric.val_fold_2/val_f1,metric.val_fold_2/val_mcc,metric.val_fold_2/val_precision,metric.val_fold_2/val_auc,metric.val_fold_2/val_recall,metric.val_fold_2/val_accuracy,metric.val_fold_2/val_balanced_accuracy,metric.val_fold_2/train_accuracy,metric.val_fold_2/train_loss,metric.val_fold_2/val_loss,metric.val_fold_5/val_f1,...,metric.val_fold_5/val_accuracy,metric.val_fold_5/val_balanced_accuracy,metric.val_fold_5/train_accuracy,metric.val_fold_5/train_loss,metric.val_fold_5/val_loss,metric.val_fold_3/val_f1,metric.val_fold_3/val_mcc,metric.val_fold_3/val_precision,metric.val_fold_3/val_auc,metric.val_fold_3/val_recall,metric.val_fold_3/val_accuracy,metric.val_fold_3/val_balanced_accuracy,metric.val_fold_3/train_accuracy,metric.val_fold_3/train_loss,metric.val_fold_3/val_loss,metric.val_fold_7/val_f1,metric.val_fold_7/val_mcc,metric.val_fold_7/val_precision,metric.val_fold_7/val_auc,metric.val_fold_7/val_recall,metric.val_fold_7/val_accuracy,metric.val_fold_7/val_balanced_accuracy,metric.val_fold_7/train_accuracy,metric.val_fold_7/train_loss,metric.val_fold_7/val_loss,metric.val_fold_6/val_f1,metric.val_fold_6/val_mcc,metric.val_fold_6/val_precision,metric.val_fold_6/val_auc,metric.val_fold_6/val_recall,metric.val_fold_6/val_accuracy,metric.val_fold_6/val_balanced_accuracy,metric.val_fold_6/train_accuracy,metric.val_fold_6/train_loss,metric.val_fold_6/val_loss,metric.val_fold_0/val_f1,metric.val_fold_0/val_mcc,metric.val_fold_0/val_precision,metric.val_fold_0/val_auc,metric.val_fold_0/val_recall,metric.val_fold_0/val_accuracy,metric.val_fold_0/val_balanced_accuracy,metric.val_fold_0/train_accuracy,metric.val_fold_0/train_loss,metric.val_fold_0/val_loss,metric.val_fold_1/val_f1,metric.val_fold_1/val_mcc,metric.val_fold_1/val_precision,metric.val_fold_1/val_auc,metric.val_fold_1/val_recall,metric.val_fold_1/val_accuracy,metric.val_fold_1/val_balanced_accuracy,metric.val_fold_1/train_accuracy,metric.val_fold_1/train_loss,metric.val_fold_1/val_loss,metric.val_fold_4/val_f1,metric.val_fold_4/val_mcc,metric.val_fold_4/val_precision,metric.val_fold_4/val_auc,metric.val_fold_4/val_recall,metric.val_fold_4/val_accuracy,metric.val_fold_4/val_balanced_accuracy,metric.val_fold_4/train_accuracy,metric.val_fold_4/train_loss,metric.val_fold_4/val_loss,param.fine_tuning,param.use_lr_discovery,param.color_transforms,metric.test_recall_fold_4,metric.test_precision_fold_0,metric.test_recall_fold_1,metric.test_mcc_fold_0,metric.test_recall_fold_6,metric.test_auc_fold_3,metric.test_recall_fold_0,metric.test_auc_fold_4,metric.test_precision_fold_3,metric.test_mcc_fold_7,metric.test_auc_fold_2,metric.test_mcc_fold_3,metric.test_precision_fold_4,metric.test_mcc_fold_4,metric.test_mcc_fold_6,metric.test_precision_fold_6,metric.test_precision_fold_7,metric.test_recall_fold_3,metric.test_auc_fold_1,metric.test_mcc_fold_1,metric.test_precision_fold_1,metric.test_recall_fold_2,metric.test_recall_fold_5,metric.test_mcc_fold_5,metric.test_mcc_fold_2,metric.test_precision_fold_5,metric.test_precision_fold_2,metric.test_auc_fold_7,metric.test_auc_fold_6,metric.test_recall_fold_7,metric.test_auc_fold_5,metric.test_auc_fold_0
55,788257751769076553,DS1_3c_MSA-P_vs_PD_supervised,6796411cc98f49e6af7dc871aaf85f3b,Densenet121_oversamp_torchvision_10-13_at:22-0...,file:///home/zano/Documents/TESI/FOLDER_CINECA...,False,,0.95,0.2,22:09:14,imagenet,3,False,light,"{0: 17, 1: 17, 2: 17, 3: 17, 4: 17, 5: 17, 6: ...",6955906,Densenet121,10-13,0.00012207764786954147,,32,4,8,,1e-4,nested,AdamW,0,6955906,14,"{0: 92, 1: 93, 2: 91, 3: 91, 4: 91, 5: 90, 6: ...",torchvision,150,,0.676000,,,0.562,0.273000,0.419,0.688,0.565000,0.205,0.748000,0.750,0.549,0.131000,0.656,0.173000,52.304000,0.231,,0.288,0.339000,0.261000,0.251000,,0.121000,0.663000,0.703000,0.331,0.625000,,0.656,0.299000,0.318,0.263,0.136,,0.325,0.234000,,0.347,,0.663000,,zano,train_3c.py,,Densenet121_oversamp_torchvision_10-13_at:22-0...,,LOCAL,,"[{""run_id"": ""6796411cc98f49e6af7dc871aaf85f3b""...",,09772da2bfc500c663a0ae5ea4abed390b5462c3,,,,0.875000,0.763889,0.875000,0.958333,0.875000,0.882353,0.881944,0.946809,0.149163,0.368421,0.700000,...,0.647059,0.659722,0.956522,0.226123,0.678090,0.875000,0.763889,0.875000,0.902778,0.875000,0.882353,0.881944,0.936170,0.192180,0.432033,0.875000,0.763889,0.875000,0.958333,0.875000,0.882353,0.881944,0.872340,0.236689,0.265380,0.777778,0.549350,0.700000,0.861111,0.875000,0.764706,0.770833,0.914894,0.257533,0.491114,0.800000,0.608581,0.666667,0.666667,1.000000,0.764706,0.777778,0.826087,0.423789,0.841332,0.714286,0.536739,0.833333,0.833333,0.625000,0.764706,0.756944,0.918367,0.225199,0.708584,0.800000,0.608581,0.666667,0.972222,1.000000,0.764706,0.777778,0.925532,0.194735,0.310570,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
54,788257751769076553,DS1_3c_MSA-P_vs_PD_supervised,b33b0bb2146b4e9b82da134e99ba066d,Densenet169_mixup0.2_oversamp_torchvision_10-1...,file:///home/zano/Documents/TESI/FOLDER_CINECA...,False,,0.95,0.2,00:00:35,imagenet,3,False,light,"{0: 17, 1: 17, 2: 17, 3: 17, 4: 17, 5: 17, 6: ...",12487810,Densenet169,10-14,0.00012207764786954147,,32,4,8,,1e-4,nested,AdamW,0.2,12487810,14,"{0: 92, 1: 93, 2: 91, 3: 91, 4: 91, 5: 90, 6: ...",torchvision,150,,0.667000,,,0.629,0.382000,0.333,0.771,0.615000,0.161,0.732000,0.844,0.594,0.185000,0.700,0.177000,64.841000,0.258,,0.420,0.360000,0.197000,0.476000,,0.189000,0.672000,0.792000,0.329,0.686000,,0.692,0.240000,0.395,0.169,0.198,,0.235,0.173000,,0.433,,0.718000,,zano,train_3c.py,,Densenet169_mixup0.2_oversamp_torchvision_10-1...,,LOCAL,,"[{""run_id"": ""b33b0bb2146b4e9b82da134e99ba066d""...",,09772da2bfc500c663a0ae5ea4abed390b5462c3,,,,0.800000,0.608581,0.666667,0.819444,1.000000,0.764706,0.777778,0.822812,0.405589,0.540406,0.736842,...,0.705882,0.715278,0.951647,0.195480,0.459072,0.736842,0.449701,0.636364,0.916667,0.875000,0.705882,0.715278,0.902223,0.284585,0.510616,0.875000,0.763889,0.875000,0.958333,0.875000,0.882353,0.881944,0.918920,0.297100,0.300764,0.736842,0.449701,0.636364,0.861111,0.875000,0.705882,0.715278,0.876004,0.364324,0.512006,0.700000,0.349934,0.583333,0.638889,0.875000,0.647059,0.659722,0.681737,0.599726,0.765333,0.888889,0.788811,0.800000,0.861111,1.000000,0.882353,0.888889,0.848806,0.409271,0.413630,0.777778,0.549350,0.700000,0.930556,0.875000,0.764706,0.770833,0.888853,0.373531,0.340210,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
53,788257751769076553,DS1_3c_MSA-P_vs_PD_supervised,4c6d7ae304dd4b9889c0e3925a9ff53c,Densenet169_oversamp_torchvision_10-14_at:11-5...,file:///home/zano/Documents/TESI/FOLDER_CINECA...,False,,0.95,0.2,11:55:02,imagenet,3,False,light,"{0: 17, 1: 17, 2: 17, 3: 17, 4: 17, 5: 17, 6: ...",12487810,Densenet169,10-14,0.00012207764786954147,,32,4,8,,1e-4,nested,AdamW,0,12487810,14,"{0: 92, 1: 93, 2: 91, 3: 91, 4: 91, 5: 90, 6: ...",torchvision,150,,0.682000,,,0.638,0.326000,0.330,0.875,0.630000,0.159,0.737000,0.906,0.625,0.157000,0.716,0.160000,57.341000,0.166,,0.329,0.394000,0.162000,0.638000,,0.163000,0.688000,0.823000,0.248,0.708000,,0.728,0.196000,0.487,0.156,0.164,,0.250,0.143000,,0.483,,0.802000,,zano,train_3c.py,,Densenet169_oversamp_torchvision_10-14_at:11-5...,,LOCAL,,"[{""run_id"": ""4c6d7ae304dd4b9889c0e3925a9ff53c""...",,09772da2bfc500c663a0ae5ea4abed390b5462c3,,,,0.800000,0.608581,0.666667,0.833333,1.000000,0.764706,0.777778,0.819149,0.375672,0.558083,0.761905,...,0.705882,0.722222,0.858696,0.320927,0.722697,0.777778,0.549350,0.700000,0.888889,0.875000,0.764706,0.770833,0.957447,0.152413,0.453210,0.933333,0.887412,1.000000,0.986111,0.875000,0.941176,0.937500,0.893617,0.222107,0.234881,0.761905,0.522976,0.615385,0.875000,1.000000,0.705882,0.722222,0.872340,0.331922,0.519129,0.761905,0.522976,0.615385,0.625000,1.000000,0.705882,0.722222,0.728261,0.585410,0.748836,0.875000,0.763889,0.875000,0.930556,0.875000,0.882353,0.881944,0.938776,0.183493,0.344591,0.842105,0.696311,0.727273,0.958333,1.000000,0.823529,0.833333,0.957447,0.119856,0.332396,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
52,788257751769076553,DS1_3c_MSA-P_vs_PD_supervised,a57283a2c763498aa0b6d0d62a3514f9,Resnet18_oversamp_torchvision_10-14_at:12-25-18,file:///home/zano/Documents/TESI/FOLDER_CINECA...,False,,0.95,0.2,12:25:18,imagenet,3,False,light,"{0: 19, 1: 19, 2: 19, 3: 19, 4: 19, 5: 19, 6: ...",11177538,Resnet18,10-14,0.00012207764786954147,,32,4,8,,2e-4,nested,AdamW,0,11177538,14,"{0: 90, 1: 91, 2: 89, 3: 89, 4: 89, 5: 88, 6: ...",torchvision,150,,0.696000,,,0.685,0.301000,0.331,0.792,0.671000,0.165,0.772000,0.760,0.640,0.148000,0.683,0.200000,29.903000,0.180,,0.324,0.396000,0.134000,0.259000,,0.146000,0.693000,0.766000,0.230,0.706000,,0.727,0.170000,0.471,0.155,0.158,,0.243,0.141000,,0.384,,0.559000,,zano,train_3c.py,,Resnet18_oversamp_torchvision_10-14_at:12-25-18,,LOCAL,,"[{""run_id"": ""a57283a2c763498aa0b6d0d62a3514f9""...",,09772da2bfc500c663a0ae5ea4abed390b5462c3,,,,0.736842,0.477778,0.700000,0.833333,0.777778,0.736842,0.738889,0.902174,0.223948,0.564750,0.736842,...,0.736842,0.738889,0.877778,0.312284,0.570735,0.800000,0.716115,1.000000,0.988889,0.666667,0.842105,0.833333,0.902174,0.205880,0.244002,0.875000,0.805076,1.000000,0.955556,0.777778,0.894737,0.888889,0.945652,0.200414,0.441003,0.782609,0.566947,0.642857,0.777778,1.000000,0.736842,0.750000,0.934783,0.221560,0.698692,0.761905,0.506048,0.666667,0.677778,0.888889,0.736842,0.744444,0.855556,0.371157,0.889954,0.818182,0.644503,0.692308,0.900000,1.000000,0.789474,0.800000,0.864583,0.267473,0.412843,0.842105,0.688889,0.800000,0.888889,0.888889,0.842105,0.844444,0.847826,0.343005,0.412065,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
51,788257751769076553,DS1_3c_MSA-P_vs_PD_supervised,7fc11b854e3d42f69f9d7835c2fbfa18,ViT_monai_10-14_at:13-28-54,file:///home/zano/Documents/TESI/FOLDER_CINECA...,False,,0.95,0.2,13:28:54,,3,False,light,"{0: 17, 1: 17, 2: 17, 3: 17, 4: 17, 5: 17, 6: ...",1022210,ViT,10-14,0.00012207764786954147,,32,4,8,,1e-5,nested,AdamW,0,1022210,"{'fold_0': 8, 'fold_1': 8, 'fold_2': 8, 'fold_...","{0: 92, 1: 93, 2: 91, 3: 91, 4: 91, 5: 90, 6: ...",monai,450,,0.745000,,,0.746,0.321000,0.335,0.781,0.751000,0.165,0.800000,0.750,0.735,0.161000,0.738,0.177000,58.457000,0.221,,0.412,0.506000,0.181000,0.244000,,0.160000,0.746000,0.760000,0.280,0.732000,,0.750,0.235000,0.522,0.180,0.204,,0.263,0.158000,,0.498,,0.567000,,zano,train_vit.py,,ViT_monai_10-14_at:13-28-54,,LOCAL,,"[{""run_id"": ""7fc11b854e3d42f69f9d7835c2fbfa18""...",,09772da2bfc500c663a0ae5ea4abed390b5462c3,,,,0.714286,0.536739,0.833333,0.888889,0.625000,0.764706,0.756944,0.901099,0.310681,0.474436,0.631579,...,0.588235,0.597222,0.788889,0.441957,0.767590,0.941176,0.888889,0.888889,0.986111,1.000000,0.941176,0.944444,0.857143,0.316443,0.240506,0.769231,0.684653,1.000000,0.930556,0.625000,0.823529,0.812500,0.849462,0.354117,0.413275,0.777778,0.549350,0.700000,0.833333,0.875000,0.764706,0.770833,0.802198,0.362657,0.517453,0.666667,0.408491,0.714286,0.777778,0.625000,0.705882,0.701389,0.880435,0.325742,0.595835,0.823529,0.652778,0.777778,0.875000,0.875000,0.823529,0.826389,0.838710,0.393463,0.433848,0.823529,0.652778,0.777778,0.972222,0.875000,0.823529,0.826389,0.890110,0.291051,0.403139,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
170,650492548885392028,pretrained_subslice_MSA_vs_PD_supervised,f8bc44c3a3bb42c7a050e4fb5daa593d,Densenet121_oversamp_TL_pretrained:imagenet-mi...,file:///leonardo_work/pMI24_EleBr_1/lzanotto/F...,,,,0.2,,imagenet-microscopynet,,True,,"{0: 105, 1: 104, 2: 104, 3: 105, 4: 106, 5: 10...",2162178,Densenet121,,,,32,,8,,1e-4,,,,6955906,117,"{0: 589, 1: 588, 2: 586, 3: 594, 4: 597, 5: 59...",monai,210,263,0.679439,,,,0.132420,,,0.523816,,0.726629,,,0.072226,,0.097431,61.439694,,,,0.310653,0.063319,7.950422,,0.074489,0.659260,0.602858,,0.552457,,,0.139658,,,,,,0.050960,,,,3.927710,,lzanotto,train_pretrained.py,,Densenet121_oversamp_TL_pretrained:imagenet-mi...,,LOCAL,,"[{""run_id"": ""f8bc44c3a3bb42c7a050e4fb5daa593d""...",,,,,,0.810127,0.715268,0.711111,0.949580,0.941176,0.855769,0.877731,1.000000,0.003956,9.742272,0.873239,...,0.914286,0.907143,0.998731,0.007718,4.810452,0.911765,0.870388,0.885714,0.962542,0.939394,0.942857,0.941919,0.992593,0.026215,10.435924,0.857143,0.786163,0.767442,0.942042,0.970588,0.892157,0.911765,0.997382,0.008579,41.715436,0.852941,0.787077,0.878788,0.972973,0.828571,0.908257,0.887259,1.000000,0.004324,0.222618,0.873239,0.810315,0.837838,0.973488,0.911765,0.914286,0.913629,0.998737,0.006690,3.769356,0.876712,0.811845,0.842105,0.953209,0.914286,0.913462,0.913665,0.997436,0.015080,29.870895,0.916667,0.873810,0.916667,0.993254,0.916667,0.943396,0.936905,1.000000,0.001416,0.139923,False,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
171,650492548885392028,pretrained_subslice_MSA_vs_PD_supervised,d1413c9a42af466e997d0de502529d5b,Densenet121_oversamp_TL_pretrained:microscopyn...,file:///leonardo_work/pMI24_EleBr_1/lzanotto/F...,,,,0.2,,microscopynet,,True,,"{0: 105, 1: 104, 2: 104, 3: 105, 4: 106, 5: 10...",2162178,Densenet121,,,,32,,8,,1e-4,,,,6955906,117,"{0: 589, 1: 588, 2: 586, 3: 594, 4: 597, 5: 59...",monai,210,263,0.646664,,,,0.129710,,,0.487049,,0.678267,,,0.064719,,0.094281,70.502704,,,,0.236032,0.047639,0.484641,,0.088157,0.619304,0.552891,,0.512312,,,0.084442,,,,,,0.063238,,,,1.314743,,lzanotto,train_pretrained.py,,Densenet121_oversamp_TL_pretrained:microscopyn...,,LOCAL,,"[{""run_id"": ""d1413c9a42af466e997d0de502529d5b""...",,,,,,0.891892,0.839424,0.825000,0.988235,0.970588,0.923077,0.935294,0.997468,0.012488,0.300463,0.865672,...,0.914286,0.892857,1.000000,0.000980,0.320039,0.906250,0.866086,0.935484,0.985690,0.878788,0.942857,0.925505,1.000000,0.002221,0.136970,0.794521,0.684737,0.743590,0.947232,0.852941,0.852941,0.852941,1.000000,0.006220,0.483077,0.774194,0.697811,0.888889,0.955985,0.685714,0.871560,0.822587,1.000000,0.005079,0.332732,0.909091,0.868328,0.937500,0.978459,0.882353,0.942857,0.927092,0.997475,0.006964,0.142001,0.800000,0.714333,0.866667,0.945342,0.742857,0.875000,0.842443,0.998718,0.037587,0.552248,0.857143,0.780247,0.804878,0.957143,0.916667,0.896226,0.901190,1.000000,0.010353,0.293766,False,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
159,650492548885392028,pretrained_subslice_MSA_vs_PD_supervised,49edb9eb8aed422fa6e02b8bf233ddb6,Densenet169_oversamp_TL_pretrained:torchvision...,file:///leonardo_work/pMI24_EleBr_1/lzanotto/F...,,,,0.2,19:38:53,torchvision,3,True,,"{0: 105, 1: 104, 2: 104, 3: 105, 4: 106, 5: 10...",6742018,Densenet169,09-12,,,32,,8,,1e-4,,,,12487810,117,"{0: 589, 1: 588, 2: 586, 3: 594, 4: 597, 5: 59...",torchvision,210,308,0.710036,,,,0.163367,,,0.571595,,0.757194,,,0.080122,,0.082708,61.435939,,,,0.349712,0.097093,0.598910,,0.088914,0.673159,0.579258,,0.572710,,,0.115451,,,,,,0.096955,,,,1.325446,,lzanotto,train_pretrained.py,,Densenet169_oversamp_TL_pretrained:torchvision...,,LOCAL,,"[{""run_id"": ""49edb9eb8aed422fa6e02b8bf233ddb6""...",,,,,,0.882353,0.825210,0.882353,0.979832,0.882353,0.923077,0.912605,1.000000,0.001014,0.200384,0.885714,...,0.923810,0.914286,0.998731,0.004928,0.367573,0.895522,0.846791,0.882353,0.972222,0.909091,0.933333,0.926768,1.000000,0.000792,0.271139,0.849315,0.770329,0.794872,0.942907,0.911765,0.892157,0.897059,1.000000,0.000414,0.494995,0.914286,0.873745,0.914286,0.983012,0.914286,0.944954,0.936873,1.000000,0.000267,0.166994,0.923077,0.890689,0.967742,0.997514,0.882353,0.952381,0.934134,1.000000,0.000054,0.085675,0.835821,0.759693,0.875000,0.954865,0.800000,0.894231,0.871014,1.000000,0.001334,0.410131,0.857143,0.787522,0.882353,0.951984,0.833333,0.905660,0.888095,1.000000,0.000354,0.438702,False,True,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
168,650492548885392028,pretrained_subslice_MSA_vs_PD_supervised,41ce2810528a45408c81a87ca6940387,Resnet18_oversamp_TL_pretrained:microscopynet_...,file:///leonardo_work/pMI24_EleBr_1/lzanotto/F...,,,,0.2,,microscopynet,,True,,"{0: 118, 1: 118, 2: 118, 3: 119, 4: 120, 5: 11...",4853762,Resnet18,,,,64,,8,,2e-4,,,,11177538,117,"{0: 576, 1: 574, 2: 572, 3: 580, 4: 583, 5: 58...",torchvision,210,50,0.685517,,,,0.085201,,,0.517154,,0.732656,,,0.045625,,0.067434,85.895663,,,,0.279322,0.096532,0.361173,,0.044608,0.638859,0.525146,,0.516812,,,0.121456,,,,,,0.070589,,,,1.071495,,lzanotto,train_pretrained.py,,Resnet18_oversamp_TL_pretrained:microscopynet_...,,LOCAL,,"[{""run_id"": ""41ce2810528a45408c81a87ca6940387""...",,,,,,0.886076,0.830227,0.853659,0.976974,0.921053,0.923729,0.923026,1.000000,0.004210,0.223324,0.827586,...,0.873950,0.880380,0.993506,0.026885,0.414519,0.923077,0.886258,0.900000,0.992853,0.947368,0.949580,0.948993,0.998737,0.008174,0.113088,0.948718,0.922744,0.948718,0.977356,0.948718,0.965517,0.961372,1.000000,0.007536,0.181405,0.886076,0.832341,0.897436,0.967771,0.875000,0.926829,0.913404,0.998762,0.014644,0.213125,0.931507,0.905485,1.000000,0.994807,0.871795,0.957627,0.935897,0.998711,0.005375,0.110464,0.888889,0.830985,0.878049,0.963462,0.900000,0.923729,0.917949,0.998688,0.011494,0.247528,0.829268,0.740661,0.829268,0.934856,0.829268,0.883333,0.870330,0.997389,0.019500,0.366317,False,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [None]:
import re

# 1. Compile a regex pattern to find 'fold_' + one or more digits
pattern = re.compile(r'fold_\d+')
# 2. Build a list of columns where the pattern is found ANYWHERE
columns_to_drop = [col for col in df.columns if pattern.search(col)]
# 3. Drop the identified columns
df = df.drop(columns=columns_to_drop)

print(f"Dropping {len(columns_to_drop)} columns containing 'fold': {columns_to_drop}")
# 2. Drop the identified columns
# df_cleaned = df.drop(columns=columns_to_drop)

Dropping 0 columns containing 'fold': []
