<a href="https://colab.research.google.com/github/wvsvenkat/MyoPS2020/blob/main/Copy_of_nnUNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install nnunetv2 -q
!pip install nibabel pandas matplotlib -q

import os, shutil, json, re, glob, zipfile
import numpy as np
import nibabel as nib
from google.colab import drive

print("✓ nnU-Net packages installed")


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/211.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━[0m [32m112.6/211.5 kB[0m [31m3.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... 

In [9]:
drive.mount('/content/drive')

# UPDATE THESE PATHS
train_zip = "/content/drive/MyDrive/EMIDEC-DATA/emidec-dataset-1.0.1.zip"
test_zip = "/content/drive/MyDrive/EMIDEC-DATA/emidec-segmentation-testset-1.0.0.zip"

nnunet_root = "/content/nnUNet_data"
results_dir = "/content/drive/MyDrive/EMIDEC-DATA/nnunet_results"

os.makedirs(nnunet_root, exist_ok=True)
os.makedirs(results_dir, exist_ok=True)

# Set nnU-Net environment variables
os.environ['nnUNet_raw'] = f"{nnunet_root}/nnUNet_raw"
os.environ['nnUNet_preprocessed'] = f"{nnunet_root}/nnUNet_preprocessed"
os.environ['nnUNet_results'] = results_dir

print("✓ nnU-Net environment configured")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✓ nnU-Net environment configured


In [14]:
# ============================================
# 📦 Data Conversion for EMIDEC Structure (FIXED)
# ============================================
"""
EMIDEC dataset structure:
Training zips extract to separate folders:
  /emidec-dataset-1.0.1/
    ├── Case_N001/, Case_N002/, ...
    ├── Case_P001/, Case_P002/, ...

Test zip extracts separately:
  /emidec-segmentation-testset-1.0.0/
    ├── Case_101/, Case_102/, ...
"""

def extract_zip(zip_path, extract_to):
    if os.path.exists(zip_path):
        with zipfile.ZipFile(zip_path, 'r') as z:
            z.extractall(extract_to)
        print(f"✓ Extracted: {os.path.basename(zip_path)}")
        return True
    else:
        print(f"❌ Zip not found: {zip_path}")
        return False

# Extract data to SEPARATE directories
extract_base = "/content/emidec_extracted"
os.makedirs(extract_base, exist_ok=True)

train_zip = "/content/drive/MyDrive/EMIDEC-DATA/emidec-dataset-1.0.1.zip"
test_zip = "/content/drive/MyDrive/EMIDEC-DATA/emidec-segmentation-testset-1.0.0.zip"

train_extract = os.path.join(extract_base, "train_data")
test_extract = os.path.join(extract_base, "test_data")

os.makedirs(train_extract, exist_ok=True)
os.makedirs(test_extract, exist_ok=True)

print("Extracting training and test sets to separate folders...")
extract_zip(train_zip, train_extract)
extract_zip(test_zip, test_extract)

# ============================================
# Find the root EMIDEC directories
# ============================================
def find_emidec_root(root):
    """Find directory containing Case_* folders"""
    for dirpath, dirnames, _ in os.walk(root):
        if any(d.startswith('Case_') for d in dirnames):
            return dirpath
    return None

train_root = find_emidec_root(train_extract)
test_root = find_emidec_root(test_extract)

print(f"\nTraining root: {train_root}")
print(f"Test root: {test_root}")

if not train_root:
    print("❌ ERROR: Could not find training data with Case_* folders!")
    print(f"Contents of {train_extract}:")
    for item in os.listdir(train_extract):
        print(f"  - {item}")
    raise Exception("Training data not found in extracted files")

if not test_root:
    print("⚠️ WARNING: Could not find test data")

# ============================================
# Parse training cases
# ============================================
print(f"\nProcessing training data from: {train_root}")

train_case_folders = sorted([d for d in os.listdir(train_root) if d.startswith('Case_')])
print(f"Found {len(train_case_folders)} training cases")

train_cases = {}

for case_folder in train_case_folders:
    case_path = os.path.join(train_root, case_folder)

    # Match training format: Case_[NP]XXX
    match = re.match(r'Case_([NP])(\d+)', case_folder)
    if not match:
        print(f"⚠️ Skipping unknown case format: {case_folder}")
        continue

    case_type = match.group(1)  # 'N' or 'P'
    case_num = int(match.group(2))

    # Find Images and Contours files
    images_dir = os.path.join(case_path, 'Images')
    contours_dir = os.path.join(case_path, 'Contours')

    image_file = None
    label_file = None

    if os.path.exists(images_dir):
        images_list = glob.glob(os.path.join(images_dir, '*.nii.gz')) + \
                      glob.glob(os.path.join(images_dir, '*.nii'))
        if images_list:
            image_file = images_list[0]

    if os.path.exists(contours_dir):
        contours_list = glob.glob(os.path.join(contours_dir, '*.nii.gz')) + \
                        glob.glob(os.path.join(contours_dir, '*.nii'))
        if contours_list:
            label_file = contours_list[0]

    if image_file and label_file:
        train_cases[case_folder] = {
            'case_folder': case_folder,
            'case_type': case_type,
            'case_num': case_num,
            'image': image_file,
            'label': label_file
        }
    else:
        print(f"⚠️ Skipping {case_folder}: Missing image or label")

print(f"✓ Found {len(train_cases)} complete training cases")

# ============================================
# Parse test cases
# ============================================
print(f"\nProcessing test data from: {test_root}")

test_case_folders = sorted([d for d in os.listdir(test_root) if d.startswith('Case_')])
print(f"Found {len(test_case_folders)} test cases")

test_cases = {}

for case_folder in test_case_folders:
    case_path = os.path.join(test_root, case_folder)

    # Match test format: Case_XXX (no N/P prefix)
    match = re.match(r'Case_(\d+)', case_folder)
    if not match:
        print(f"⚠️ Skipping unknown case format: {case_folder}")
        continue

    case_num = int(match.group(1))
    case_type = 'U'  # Unknown

    # Find Images (no Contours in test set)
    images_dir = os.path.join(case_path, 'Images')

    image_file = None
    if os.path.exists(images_dir):
        images_list = glob.glob(os.path.join(images_dir, '*.nii.gz')) + \
                      glob.glob(os.path.join(images_dir, '*.nii'))
        if images_list:
            image_file = images_list[0]

    if image_file:
        test_cases[case_folder] = {
            'case_folder': case_folder,
            'case_type': case_type,
            'case_num': case_num,
            'image': image_file,
            'label': None
        }
    else:
        print(f"⚠️ Skipping {case_folder}: No image found")

print(f"✓ Found {len(test_cases)} test cases")

# Summary
print(f"\n{'='*60}")
print(f"📊 DATASET SUMMARY:")
print(f"{'='*60}")
train_normal = sum(1 for c in train_cases.values() if c['case_type'] == 'N')
train_path = sum(1 for c in train_cases.values() if c['case_type'] == 'P')
print(f"Training: {train_normal} Normal + {train_path} Pathologic = {len(train_cases)} total")
print(f"Test: {len(test_cases)} cases (labels unknown)")
print(f"Total: {len(train_cases) + len(test_cases)} cases")
print(f"{'='*60}\n")

if len(train_cases) == 0:
    print("❌ ERROR: No training cases found! Check zip file paths and structure.")
    raise Exception("Training dataset is empty")

# ============================================
# Setup nnU-Net structure
# ============================================
dataset_dir = os.path.join(os.environ['nnUNet_raw'], "Dataset001_EMIDEC")
os.makedirs(f"{dataset_dir}/imagesTr", exist_ok=True)
os.makedirs(f"{dataset_dir}/labelsTr", exist_ok=True)
os.makedirs(f"{dataset_dir}/imagesTs", exist_ok=True)

# ============================================
# Process training files
# ============================================
print("Copying training files...")
copied_train = 0
case_mapping = {}

for idx, (case_id, case_data) in enumerate(train_cases.items(), start=0):
    try:
        # Create new sequential ID
        new_case_id = f"{idx:04d}"

        # Copy image
        dest_img = f"{dataset_dir}/imagesTr/case_{new_case_id}_0000.nii.gz"
        shutil.copy(case_data['image'], dest_img)

        # Copy label
        dest_lbl = f"{dataset_dir}/labelsTr/case_{new_case_id}.nii.gz"
        shutil.copy(case_data['label'], dest_lbl)

        case_mapping[new_case_id] = {
            'original': case_id,
            'case_type': case_data['case_type'],
            'case_num': case_data['case_num'],
            'split': 'train'
        }

        copied_train += 1

        if (idx + 1) % 20 == 0:
            print(f"  ✓ Copied {idx + 1}/{len(train_cases)} training cases...")

    except Exception as e:
        print(f"❌ Error copying {case_id}: {str(e)}")

print(f"\n✓ Successfully copied {copied_train} training pairs")

# ============================================
# Process test files
# ============================================
print("Copying test files...")
copied_test = 0

for idx, (case_id, case_data) in enumerate(test_cases.items()):
    try:
        # Create new sequential ID for test (offset to avoid collision)
        new_case_id = f"{1000 + idx:04d}"

        # Copy image only
        dest_img = f"{dataset_dir}/imagesTs/case_{new_case_id}_0000.nii.gz"
        shutil.copy(case_data['image'], dest_img)

        case_mapping[new_case_id] = {
            'original': case_id,
            'case_type': case_data['case_type'],
            'case_num': case_data['case_num'],
            'split': 'test',
            'is_test': True
        }

        copied_test += 1

    except Exception as e:
        print(f"❌ Error copying test {case_id}: {str(e)}")

print(f"✓ Successfully copied {copied_test} test cases")

# Save case mapping
mapping_file = os.path.join(dataset_dir, 'case_mapping.json')
with open(mapping_file, 'w') as f:
    json.dump(case_mapping, f, indent=2)

print(f"\n💾 Case mapping saved to: {mapping_file}")
print(f"\n✅ Dataset preparation complete!")
print(f"   Training images: {copied_train}")
print(f"   Test images: {copied_test}")


Extracting training and test sets to separate folders...
✓ Extracted: emidec-dataset-1.0.1.zip
✓ Extracted: emidec-segmentation-testset-1.0.0.zip

Training root: /content/emidec_extracted/train_data/emidec-dataset-1.0.1
Test root: /content/emidec_extracted/test_data/emidec-segmentation-testset-1.0.0

Processing training data from: /content/emidec_extracted/train_data/emidec-dataset-1.0.1
Found 100 training cases
✓ Found 100 complete training cases

Processing test data from: /content/emidec_extracted/test_data/emidec-segmentation-testset-1.0.0
Found 50 test cases
✓ Found 50 test cases

📊 DATASET SUMMARY:
Training: 33 Normal + 67 Pathologic = 100 total
Test: 50 cases (labels unknown)
Total: 150 cases

Copying training files...
  ✓ Copied 20/100 training cases...
  ✓ Copied 40/100 training cases...
  ✓ Copied 60/100 training cases...
  ✓ Copied 80/100 training cases...
  ✓ Copied 100/100 training cases...

✓ Successfully copied 100 training pairs
Copying test files...
✓ Successfully copi

In [17]:
# ============================================
# 🔍 DEBUG: Check Dataset Structure
# ============================================
print("Checking dataset structure...")
print(f"\nDataset directory: {dataset_dir}")

# Check imagesTr
images_tr = glob.glob(f"{dataset_dir}/imagesTr/*.nii.gz")
print(f"\n📁 imagesTr: {len(images_tr)} files")
if images_tr:
    print(f"   Sample: {os.path.basename(images_tr[0])}")

# Check labelsTr
labels_tr = glob.glob(f"{dataset_dir}/labelsTr/*.nii.gz")
print(f"📁 labelsTr: {len(labels_tr)} files")
if labels_tr:
    print(f"   Sample: {os.path.basename(labels_tr[0])}")

# Check imagesTs
images_ts = glob.glob(f"{dataset_dir}/imagesTs/*.nii.gz")
print(f"📁 imagesTs: {len(images_ts)} files")
if images_ts:
    print(f"   Sample: {os.path.basename(images_ts[0])}")

# Check dataset.json
if os.path.exists(f"{dataset_dir}/dataset.json"):
    with open(f"{dataset_dir}/dataset.json", 'r') as f:
        dset_json = json.load(f)
    print(f"\n✓ dataset.json found")
    print(f"  Keys: {list(dset_json.keys())}")
    print(f"  Channel names: {dset_json.get('channel_names')}")
    print(f"  Labels: {dset_json.get('labels')}")
else:
    print(f"\n❌ dataset.json NOT found!")

# Check if copied correctly
if len(images_tr) > 0 and len(labels_tr) > 0:
    print(f"\n✅ Training data looks good: {len(images_tr)} images, {len(labels_tr)} labels")
elif len(images_tr) > 0 and len(labels_tr) == 0:
    print(f"\n❌ PROBLEM: Images copied but NO labels!")
else:
    print(f"\n❌ PROBLEM: Data not copied properly!")

    # ============================================
# 🔧 Fix dataset.json - Update numTraining
# ============================================
import json
from pathlib import Path

dataset_dir = Path(os.environ['nnUNet_raw']) / "Dataset001_EMIDEC"
json_file = dataset_dir / 'dataset.json'

print("Reading current dataset.json...")
with open(json_file, 'r') as f:
    dj = json.load(f)

print(f"Current numTraining: {dj['numTraining']}")

# Count actual training labels
labels_count = len(list((dataset_dir / 'labelsTr').glob('*.nii.gz')))
print(f"Actual training labels: {labels_count}")

# Update
dj['numTraining'] = labels_count

print(f"\nUpdating numTraining to: {labels_count}")

# Write back
with open(json_file, 'w') as f:
    json.dump(dj, f, indent=4)

# Verify
with open(json_file, 'r') as f:
    verify = json.load(f)

print(f"✓ Updated! New numTraining: {verify['numTraining']}")
print(f"\n✓ dataset.json fixed and ready for preprocessing")

# Show the complete dataset.json for reference
print(f"\nFull dataset.json content:")
print(json.dumps(verify, indent=2))



Checking dataset structure...

Dataset directory: /content/nnUNet_data/nnUNet_raw/Dataset001_EMIDEC

📁 imagesTr: 100 files
   Sample: case_0077_0000.nii.gz
📁 labelsTr: 100 files
   Sample: case_0019.nii.gz
📁 imagesTs: 51 files
   Sample: case_1044_0000.nii.gz

✓ dataset.json found
  Keys: ['channel_names', 'labels', 'numTraining', 'file_ending', 'name', 'description', 'reference', 'licence']
  Channel names: {'0': 'DE-MRI'}
  Labels: {'background': 0, 'cavity': 1, 'normal_myocardium': 2, 'myocardial_infarction': 3, 'no_reflow': 4}

✅ Training data looks good: 100 images, 100 labels
Reading current dataset.json...
Current numTraining: 0
Actual training labels: 100

Updating numTraining to: 100
✓ Updated! New numTraining: 100

✓ dataset.json fixed and ready for preprocessing

Full dataset.json content:
{
  "channel_names": {
    "0": "DE-MRI"
  },
  "labels": {
    "background": 0,
    "cavity": 1,
    "normal_myocardium": 2,
    "myocardial_infarction": 3,
    "no_reflow": 4
  },
  "num

In [None]:
# ============================================
# 🔧 nnU-Net Preprocessing (RETRY)
# ============================================
print("Starting nnU-Net preprocessing (attempt 2)...")
print("This will take 10-20 minutes\n")

!nnUNetv2_plan_and_preprocess -d 1 --verify_dataset_integrity

print("\n✅ Preprocessing complete!")


Starting nnU-Net preprocessing (attempt 2)...
This will take 10-20 minutes

Fingerprint extraction...
Dataset001_EMIDEC
NiftiImageIO (0x21d8e630): /content/nnUNet_data/nnUNet_raw/Dataset001_EMIDEC/imagesTr/case_0000_0000.nii.gz has unexpected scales in sform

NiftiImageIO (0x21d8e630): /content/nnUNet_data/nnUNet_raw/Dataset001_EMIDEC/imagesTr/case_0000_0000.nii.gz has unexpected scales in sform

Using <class 'nnunetv2.imageio.simpleitk_reader_writer.SimpleITKIO'> as reader/writer
NiftiImageIO (0x37b2cf50): /content/nnUNet_data/nnUNet_raw/Dataset001_EMIDEC/labelsTr/case_0000.nii.gz has unexpected scales in sform

NiftiImageIO (0x37b2cf50): /content/nnUNet_data/nnUNet_raw/Dataset001_EMIDEC/labelsTr/case_0000.nii.gz has unexpected scales in sform

NiftiImageIO (0x37b98580): /content/nnUNet_data/nnUNet_raw/Dataset001_EMIDEC/labelsTr/case_0001.nii.gz has unexpected scales in sform

NiftiImageIO (0x37b98580): /content/nnUNet_data/nnUNet_raw/Dataset001_EMIDEC/labelsTr/case_0001.nii.gz has un

In [None]:
print("🏋️ Training nnU-Net (this takes 6-8 hours)...")
print("Training with fold 0 for single-fold training")
!nnUNetv2_train 1 3d_fullres 0 --npz

# Optional: For ensemble results (takes 5x longer)
# !nnUNetv2_train 1 3d_fullres all --npz


In [None]:
print("🔮 Running inference on test set...")

test_output = f"{results_dir}/test_predictions"
os.makedirs(test_output, exist_ok=True)

!nnUNetv2_predict -i {dataset_dir}/imagesTs \
    -o {test_output} \
    -d 1 \
    -c 3d_fullres \
    -f 0

print(f"\n✅ Predictions saved to: {test_output}")

# Backup to Drive
backup = "/content/drive/MyDrive/EMIDEC-DATA/nnunet_backup"
os.makedirs(backup, exist_ok=True)
shutil.copytree(test_output, f"{backup}/predictions", dirs_exist_ok=True)

print("💾 Results backed up to Google Drive")
print("\n🎉 nnU-Net training complete!")
print("Expected performance: ~92% myocardium Dice, ~76% infarction Dice")
