# Leaderboard Submission Code for the Tensor Reloaded Multi-Task MedMNIST Competition

This notebook loads the provided Kaggle `.npz` files, creates TensorFlow datasets for training, validation, and testing, and builds a multi-task model using a simple CNN backbone with mixed precision enabled. After training with early stopping based on the harmonic mean of macro F1 scores on the validation sets, we generate a submission CSV file in the required format.

In [1]:
import tensorflow as tf

# Check for available GPUs and set memory growth
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Enable memory growth for each GPU so TensorFlow only allocates what it needs.
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPUs found and configured:", gpus)
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)
else:
    print("No GPU found. Please switch your runtime to GPU in the Kaggle Notebook settings.")

GPUs found and configured: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]


In [2]:
!pip install medmnist --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for fire (setup.py) ... [?25l[?25hdone


In [3]:
import matplotlib.pyplot as plt
import medmnist
import numpy as np
import os
import pandas as pd
import seaborn as sns
import tensorflow as tf

from medmnist import INFO, Evaluator
from pathlib import Path
from sklearn.metrics import f1_score
from tensorflow import keras
from tensorflow.keras import layers

In [4]:
from tensorflow.keras.mixed_precision import set_global_policy
set_global_policy('mixed_float16')

TARGET_SIZE = (32, 32)   # Low resolution for memory efficiency
BATCH_SIZE = 16

In [5]:
def load_npz_data(npz_path):
    """
    Load train, val, and test arrays from a given .npz file.
    Expected keys: 'train_images', 'train_labels', 'val_images', 'val_labels', 'test_images', 'test_labels'
    """
    data = np.load(npz_path)
    train_images = data['train_images']
    train_labels = data['train_labels']
    val_images = data['val_images']
    val_labels = data['val_labels']
    test_images = data['test_images']
    test_labels = data['test_labels']
    return (train_images, train_labels), (val_images, val_labels), (test_images, test_labels)

def create_tf_dataset_from_numpy(images, labels, batch_size=BATCH_SIZE, augment=False):
    """
    Convert numpy arrays to a tf.data.Dataset:
      - Resize images to TARGET_SIZE.
      - Convert grayscale images (N, H, W) or (N, H, W, 1) to 3-channel RGB.
      - Optionally apply data augmentation.
    """
    if images.ndim == 3:
        images = np.expand_dims(images, axis=-1)
    if images.shape[-1] == 1:
        images = np.tile(images, (1, 1, 1, 3))
    
    images = images.astype(np.float32) / 255.0  # Normalize to [0, 1]

    def _process(image, label):
        image = tf.image.resize(image, TARGET_SIZE)
        if augment:
            image = tf.image.random_flip_left_right(image)
            image = tf.image.random_flip_up_down(image)
            image = tf.image.random_brightness(image, max_delta=0.1)
            image = tf.image.random_contrast(image, lower=0.9, upper=1.1)
        return image, label

    ds = tf.data.Dataset.from_tensor_slices((images, labels))
    ds = ds.map(_process, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.shuffle(buffer_size=len(images))
    ds = ds.batch(batch_size)
    ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
    return ds

In [6]:
# Base path for Kaggle data
base_path = Path("/kaggle/input/tensor-reloaded-multi-task-med-mnist/data")

# Define the 11 task names
task_names = [
    "pathmnist", "dermamnist", "octmnist", "pneumoniamnist", 
    "retinamnist", "breastmnist", "bloodmnist", "tissuemnist", 
    "organamnist", "organcmnist", "organsmnist"
]

# Map each task to its .npz file
task_to_npz = {task: base_path / f"{task}.npz" for task in task_names}

# Create dictionaries for train, validation, and test datasets
train_datasets_tf = {}
val_datasets_tf = {}
test_datasets_tf = {}

for task in task_names:
    npz_file = task_to_npz[task]
    (train_imgs, train_lbls), (val_imgs, val_lbls), (test_imgs, test_lbls) = load_npz_data(npz_file)
    train_datasets_tf[task] = create_tf_dataset_from_numpy(train_imgs, train_lbls, BATCH_SIZE, augment=True)
    val_datasets_tf[task]   = create_tf_dataset_from_numpy(val_imgs, val_lbls, BATCH_SIZE, augment=False)
    test_datasets_tf[task]  = create_tf_dataset_from_numpy(test_imgs, test_lbls, BATCH_SIZE, augment=False)
    print(f"{task}: Loaded {len(train_imgs)} train, {len(val_imgs)} val, {len(test_imgs)} test samples")

# Verify total test samples
total_test_samples = 0
for task in task_names:
    for images, labels in test_datasets_tf[task]:
        total_test_samples += images.shape[0]
print("Total test samples across all tasks:", total_test_samples)

pathmnist: Loaded 89996 train, 10004 val, 7180 test samples
dermamnist: Loaded 7007 train, 1003 val, 2005 test samples
octmnist: Loaded 97477 train, 10832 val, 1000 test samples
pneumoniamnist: Loaded 4708 train, 524 val, 624 test samples
retinamnist: Loaded 1080 train, 120 val, 400 test samples
breastmnist: Loaded 546 train, 78 val, 156 test samples
bloodmnist: Loaded 11959 train, 1712 val, 3421 test samples
tissuemnist: Loaded 165466 train, 23640 val, 47280 test samples
organamnist: Loaded 34581 train, 6491 val, 17778 test samples
organcmnist: Loaded 13000 train, 2392 val, 8268 test samples
organsmnist: Loaded 13940 train, 2452 val, 8829 test samples
Total test samples across all tasks: 96941


In [7]:
class MultiTaskKerasModel(keras.Model):
    def __init__(self, task_names, data_flag_to_info):
        super().__init__()
        self.task_names = task_names
        self.conv1 = layers.Conv2D(32, kernel_size=3, padding='same', activation='relu')
        self.pool1 = layers.MaxPooling2D()
        self.conv2 = layers.Conv2D(64, kernel_size=3, padding='same', activation='relu')
        self.pool2 = layers.MaxPooling2D()
        self.flatten = layers.Flatten()
        self.heads = {}
        for task in task_names:
            n_classes = len(data_flag_to_info[task]['label'])
            self.heads[task] = layers.Dense(n_classes, name=f'head_{task}')
        self.heads = dict(self.heads)
    
    def build(self, input_shape):
        # Replace None with a concrete batch size (e.g., 1) to force building.
        concrete_shape = (1,) + input_shape[1:]
        _ = self.call(tf.zeros(concrete_shape), training=False)
        super().build(input_shape)
    
    def call(self, x, **kwargs):
        task = kwargs.get('task', None)
        training = kwargs.get('training', False)
        x = self.conv1(x, training=training)
        x = self.pool1(x)
        x = self.conv2(x, training=training)
        x = self.pool2(x)
        x = self.flatten(x)
        if task is not None:
            return self.heads[task](x)
        else:
            outputs = {}
            for t in self.task_names:
                outputs[t] = self.heads[t](x)
            return outputs

# Use MedMNIST INFO for metadata
data_flag_to_info = {task: INFO[task] for task in task_names}

# Instantiate and build the model
model = MultiTaskKerasModel(task_names, data_flag_to_info)
model.build(input_shape=(None, TARGET_SIZE[0], TARGET_SIZE[1], 3))
model.summary()

In [8]:
# Create a dummy input of shape (1, 32, 32, 3)
dummy_input = tf.random.normal((1, 32, 32, 3))
for t in task_names:
    _ = model(dummy_input, task=t, training=False)

# Instantiate optimizer with a cosine decay learning rate scheduler.
lr_schedule = tf.keras.optimizers.schedules.CosineDecayRestarts(
    initial_learning_rate=1e-3,
    first_decay_steps=1000,
    t_mul=2.0,
    m_mul=1.0,
    alpha=1e-5)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

# Force the optimizer to register all current trainable variables by applying zero gradients.
dummy_grads_vars = [(tf.zeros_like(var), var) for var in model.trainable_variables]
optimizer.apply_gradients(dummy_grads_vars)

loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# Early stopping and training parameters
total_epochs = 50
patience = 5
best_hmean_f1 = -np.inf
patience_counter = 0
best_weights = None

In [9]:
print("\nStarting Training...")
for epoch in range(total_epochs):
    print(f"\nEpoch {epoch+1}/{total_epochs}")
    epoch_loss = 0.0
    total_batches = 0
    
    # Training loop: iterate over each task's training dataset
    for task in task_names:
        dataset = train_datasets_tf[task]
        for images, labels in dataset:
            with tf.GradientTape() as tape:
                outputs = model(images, task=task, training=True)
                loss_value = loss_fn(labels, outputs)
            grads = tape.gradient(loss_value, model.trainable_variables)
            grads_vars = [(g, v) for g, v in zip(grads, model.trainable_variables) if g is not None]
            optimizer.apply_gradients(grads_vars)
            epoch_loss += loss_value.numpy()
            total_batches += 1
    avg_loss = epoch_loss / total_batches if total_batches > 0 else 0.0
    print(f"  Training Loss: {avg_loss:.4f}")
    
    # Validation: compute macro F1 score for each task
    macro_f1_scores = {}
    for task in task_names:
        all_preds = []
        all_labels = []
        for images, labels in val_datasets_tf[task]:
            outputs = model(images, task=task, training=False)
            preds = tf.argmax(outputs, axis=1).numpy()
            all_preds.append(preds)
            all_labels.append(labels.numpy())
        all_preds = np.concatenate(all_preds)
        all_labels = np.concatenate(all_labels)
        f1 = f1_score(all_labels, all_preds, average='macro')
        macro_f1_scores[task] = f1
        print(f"  {task} Macro F1: {f1:.4f}")
    
    f1_values = np.array(list(macro_f1_scores.values()))
    harmonic_mean_f1 = len(f1_values) / np.sum(1.0 / (f1_values + 1e-8))
    print(f"  Harmonic Mean Macro F1: {harmonic_mean_f1:.4f}")
    
    # Early stopping check
    if harmonic_mean_f1 > best_hmean_f1:
        best_hmean_f1 = harmonic_mean_f1
        patience_counter = 0
        best_weights = model.get_weights()
        print("  Improvement detected. Saving best model weights.")
    else:
        patience_counter += 1
        print(f"  No improvement for {patience_counter} epoch(s).")
    
    if patience_counter >= patience:
        print("Early stopping triggered.")
        break

# Restore best model weights after training
if best_weights is not None:
    model.set_weights(best_weights)
    print("Best model weights restored.")


Starting Training...

Epoch 1/50
  Training Loss: 1.0789
  pathmnist Macro F1: 0.0766
  dermamnist Macro F1: 0.1145
  octmnist Macro F1: 0.4977
  pneumoniamnist Macro F1: 0.8217
  retinamnist Macro F1: 0.2515
  breastmnist Macro F1: 0.4222
  bloodmnist Macro F1: 0.0600
  tissuemnist Macro F1: 0.1895
  organamnist Macro F1: 0.5546
  organcmnist Macro F1: 0.7662
  organsmnist Macro F1: 0.6632
  Harmonic Mean Macro F1: 0.1899
  Improvement detected. Saving best model weights.

Epoch 2/50
  Training Loss: 0.9514
  pathmnist Macro F1: 0.0311
  dermamnist Macro F1: 0.1171
  octmnist Macro F1: 0.4819
  pneumoniamnist Macro F1: 0.7758
  retinamnist Macro F1: 0.1884
  breastmnist Macro F1: 0.7913
  bloodmnist Macro F1: 0.6798
  tissuemnist Macro F1: 0.1780
  organamnist Macro F1: 0.5604
  organcmnist Macro F1: 0.7537
  organsmnist Macro F1: 0.6898
  Harmonic Mean Macro F1: 0.1765
  No improvement for 1 epoch(s).

Epoch 3/50
  Training Loss: 0.9100
  pathmnist Macro F1: 0.0583
  dermamnist Macr

In [10]:
submission_rows = []
global_id = 0

for task in task_names:
    idx_in_task = 0
    for images, _ in test_datasets_tf[task]:
        outputs = model(images, task=task, training=False)
        preds = tf.argmax(outputs, axis=1).numpy()
        for pred in preds:
            submission_rows.append([global_id, idx_in_task, task, int(pred)])
            global_id += 1
            idx_in_task += 1

submission_df = pd.DataFrame(submission_rows, columns=["id", "id_image_in_task", "task_name", "label"])
print("Total submission rows:", len(submission_df))
submission_df.to_csv("submission.csv", index=False)
print("Submission file saved as submission.csv")

Total submission rows: 96941
Submission file saved as submission.csv
