In [1]:
import torch

In [2]:
print("Cuda available:", torch.cuda.is_available())

Cuda available: True


In [3]:
print("GPU", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU found")

GPU NVIDIA GeForce GTX 1650


In [6]:
"""
inspect_brats_dataset.py

Scan BraTS training dataset structure, verify all modalities, segmentation files,
and compute tumor slice statistics. Saves per-patient summary in CSV + JSON and
optionally saves 3-slice preview PNGs.

Usage:
    python scripts/inspect_brats_dataset.py
"""

import os
import glob
import csv
import json
import argparse
from tqdm import tqdm
import numpy as np
import nibabel as nib
import cv2

# -------------------------------
# ✅ CONFIGURATION (use raw strings on Windows)
# -------------------------------
BRA_TS_PATH = r"C:\Users\manav\Downloads\archive\BraTS2020_TrainingData\MICCAI_BraTS2020_TrainingData"
OUTPUT_DIR = r"outputs\inspect"
SAVE_PREVIEWS = True   # Set to False to skip image previews
MAX_PATIENTS = None    # e.g., 5 for quick test, or None for all

# -------------------------------
# 🔧 HELPER FUNCTIONS
# -------------------------------
def find_modality_file(patient_dir, pattern):
    matches = glob.glob(os.path.join(patient_dir, pattern))
    return matches[0] if matches else None

def normalize_uint8(img2d):
    img = np.nan_to_num(img2d)
    mn, mx = img.min(), img.max()
    if mx - mn < 1e-6:
        return (np.zeros_like(img) * 255).astype(np.uint8)
    norm = (img - mn) / (mx - mn)
    return (norm * 255).astype(np.uint8)

def make_preview(slice_list, outpath):
    imgs_rgb = []
    for s in slice_list:
        if s.ndim == 2:
            imgs_rgb.append(cv2.cvtColor(s, cv2.COLOR_GRAY2BGR))
        else:
            imgs_rgb.append(s)
    preview = np.hstack(imgs_rgb)
    cv2.imwrite(outpath, preview)

def inspect_patient(patient_dir, patient_name, save_previews=False, preview_outdir=None):
    patterns = {
        "t1": f"{patient_name}*t1.nii*",
        "t1ce": f"{patient_name}*t1ce.nii*",
        "t2": f"{patient_name}*t2.nii*",
        "flair": f"{patient_name}*flair.nii*",
        "seg": f"{patient_name}*seg.nii*",
    }

    info = {"patient": patient_name}
    for key, pat in patterns.items():
        f = find_modality_file(patient_dir, pat)
        info[f"{key}_file"] = f if f else ""
        info[f"{key}_present"] = bool(f)

    for key in ["t1", "t1ce", "t2", "flair", "seg"]:
        f = info.get(f"{key}_file")
        if f:
            try:
                nii = nib.load(f)
                shape = tuple(nii.header.get_data_shape())
                while len(shape) > 3 and shape[-1] == 1:
                    shape = shape[:-1]
                info[f"{key}_shape"] = shape
                info[f"{key}_slices"] = int(shape[2]) if len(shape) >= 3 else 0
            except Exception as e:
                info[f"{key}_shape"] = None
                info[f"{key}_slices"] = 0
                info[f"{key}_error"] = str(e)
        else:
            info[f"{key}_shape"] = None
            info[f"{key}_slices"] = 0

    # tumor slice computation
    seg_file = info.get("seg_file")
    if seg_file:
        try:
            seg_nii = nib.load(seg_file)
            seg_arr = seg_nii.get_fdata().astype(np.int16)
            if seg_arr.ndim >= 3:
                axis_slices = seg_arr.shape[2]
                tumor_mask_per_slice = np.any(seg_arr > 0, axis=(0, 1))
                tumor_slices = int(np.count_nonzero(tumor_mask_per_slice))
                tumor_pct = float(tumor_slices / max(1, axis_slices) * 100.0)
                info["tumor_slices"] = tumor_slices
                info["tumor_pct"] = tumor_pct
            else:
                info["tumor_slices"] = 0
                info["tumor_pct"] = 0.0
        except Exception as ex:
            info["tumor_slices"] = None
            info["tumor_pct"] = None
            info["seg_error"] = str(ex)
    else:
        info["tumor_slices"] = None
        info["tumor_pct"] = None

    if save_previews and preview_outdir:
        for pref in ["t1ce", "flair", "t1", "t2"]:
            f = info.get(f"{pref}_file")
            if f:
                try:
                    nii = nib.load(f)
                    arr = nii.get_fdata()
                    if arr.ndim < 3:
                        break
                    s0 = 0
                    s1 = arr.shape[2] // 2
                    s2 = max(0, arr.shape[2] - 1)
                    slices = [normalize_uint8(arr[:, :, s]) for s in [s0, s1, s2]]
                    os.makedirs(preview_outdir, exist_ok=True)
                    outname = os.path.join(preview_outdir, f"{patient_name}_{pref}_preview.png")
                    make_preview(slices, outname)
                    info["preview_saved"] = outname
                except Exception as e:
                    info["preview_saved"] = ""
                    info["preview_error"] = str(e)
                break

    return info

# -------------------------------
# 🧠 MAIN
# -------------------------------
if __name__ == "__main__":
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    preview_dir = os.path.join(OUTPUT_DIR, "previews")

    patients = [d for d in os.listdir(BRA_TS_PATH) if os.path.isdir(os.path.join(BRA_TS_PATH, d))]
    patients.sort()
    if MAX_PATIENTS:
        patients = patients[:MAX_PATIENTS]

    print(f"Found {len(patients)} patient folders in: {BRA_TS_PATH}")

    summary = []
    for patient in tqdm(patients, desc="Inspecting patients"):
        patient_path = os.path.join(BRA_TS_PATH, patient)
        info = inspect_patient(patient_path, patient, save_previews=SAVE_PREVIEWS, preview_outdir=preview_dir)
        summary.append(info)

    # write summary.csv
    csv_fields = sorted(list({k for d in summary for k in d.keys()}))
    csv_path = os.path.join(OUTPUT_DIR, "summary.csv")
    with open(csv_path, "w", newline="", encoding="utf-8") as cf:
        writer = csv.DictWriter(cf, fieldnames=csv_fields)
        writer.writeheader()
        for row in summary:
            writer.writerow(row)

    # write summary.json
    json_path = os.path.join(OUTPUT_DIR, "summary.json")
    with open(json_path, "w", encoding="utf-8") as jf:
        json.dump(summary, jf, indent=2)

    # print quick summary
    total = len(summary)
    missing_seg = sum(1 for s in summary if not s.get("seg_present"))
    avg_slices = np.mean([
        s.get("t1ce_slices") or s.get("t1_slices") or s.get("t2_slices") or s.get("flair_slices") or 0
        for s in summary
    ])
    avg_tumor_pct = np.nanmean([s["tumor_pct"] for s in summary if s.get("tumor_pct") not in (None,)])
    print("\nSUMMARY:")
    print(f"  Patients inspected: {total}")
    print(f"  Patients missing seg: {missing_seg}")
    print(f"  Avg slices per volume (approx): {avg_slices:.1f}")
    print(f"  Avg % of slices with tumor (for patients with seg): {avg_tumor_pct:.2f}%")
    print(f"\nCSV saved at: {csv_path}")
    print(f"JSON saved at: {json_path}")
    if SAVE_PREVIEWS:
        print(f"Previews saved at: {preview_dir}")


Found 369 patient folders in: C:\Users\manav\Downloads\archive\BraTS2020_TrainingData\MICCAI_BraTS2020_TrainingData


Inspecting patients: 100%|██████████| 369/369 [00:50<00:00,  7.37it/s]


SUMMARY:
  Patients inspected: 369
  Patients missing seg: 1
  Avg slices per volume (approx): 155.0
  Avg % of slices with tumor (for patients with seg): 42.70%

CSV saved at: outputs\inspect\summary.csv
JSON saved at: outputs\inspect\summary.json
Previews saved at: outputs\inspect\previews





In [8]:
"""
extract_slices_for_efficientnet_fixed.py
"""

import os, glob, random, shutil
import numpy as np
import nibabel as nib
import cv2
from tqdm import tqdm

# ------------------------------
# CONFIGURATION
# ------------------------------
DATASET_PATH = r"C:\Users\manav\Downloads\archive\BraTS2020_TrainingData\MICCAI_BraTS2020_TrainingData"
OUTPUT_PATH = r"dataset_slices"
MODALITIES = ["t1ce", "t2", "flair"]
IMG_SIZE = (224, 224)
VAL_SPLIT = 0.15
MAX_PATIENTS = None  # set small number to test, e.g., 5

# ------------------------------
def normalize(img):
    img = np.nan_to_num(img)
    mn, mx = np.percentile(img, (1, 99))
    if mx - mn < 1e-6:
        return np.zeros_like(img)
    img = np.clip((img - mn) / (mx - mn), 0, 1)
    return img

def ensure_dirs():
    for split in ["train", "val"]:
        for label in ["tumor", "normal"]:
            os.makedirs(os.path.join(OUTPUT_PATH, split, label), exist_ok=True)

def find_file(patient_path, patient_name, modality):
    # Finds nii OR nii.gz file for the given modality
    for ext in [".nii.gz", ".nii"]:
        files = glob.glob(os.path.join(patient_path, f"{patient_name}_{modality}{ext}"))
        if files:
            return files[0]
    # fallback: search more broadly if naming differs
    files = glob.glob(os.path.join(patient_path, f"*{modality}.nii*"))
    return files[0] if files else None

def main():
    ensure_dirs()

    patients = sorted([d for d in os.listdir(DATASET_PATH) if os.path.isdir(os.path.join(DATASET_PATH, d))])
    if MAX_PATIENTS:
        patients = patients[:MAX_PATIENTS]

    random.shuffle(patients)
    val_count = int(len(patients) * VAL_SPLIT)
    val_patients = set(patients[:val_count])

    total_slices = 0
    saved_slices = 0

    for patient in tqdm(patients, desc="Extracting slices"):
        pdir = os.path.join(DATASET_PATH, patient)
        seg_path = find_file(pdir, patient, "seg")
        if not seg_path:
            continue

        seg = nib.load(seg_path).get_fdata().astype(np.uint8)
        seg_bin = (seg > 0).astype(np.uint8)

        # Load all modalities
        channels = []
        for mod in MODALITIES:
            f = find_file(pdir, patient, mod)
            if f:
                channels.append(normalize(nib.load(f).get_fdata()))
        if len(channels) != 3:
            continue

        vol = np.stack(channels, axis=-1)  # (H, W, S, 3)
        slices = vol.shape[2]

        for z in range(slices):
            img = vol[:, :, z, :]
            mask = seg_bin[:, :, z]
            label = "tumor" if np.any(mask) else "normal"

            img_resized = cv2.resize((img * 255).astype(np.uint8), IMG_SIZE)
            split = "val" if patient in val_patients else "train"
            out_dir = os.path.join(OUTPUT_PATH, split, label)
            out_name = f"{patient}_z{z:03d}.png"
            cv2.imwrite(os.path.join(out_dir, out_name), img_resized)
            saved_slices += 1
        total_slices += slices

    print(f"\n✅ Slice extraction complete!")
    print(f"Total slices processed: {total_slices}")
    print(f"Total images saved: {saved_slices}")
    print(f"Output saved to: {OUTPUT_PATH}")

if __name__ == "__main__":
    main()


Extracting slices: 100%|██████████| 369/369 [10:58<00:00,  1.78s/it]


✅ Slice extraction complete!
Total slices processed: 57040
Total images saved: 57040
Output saved to: dataset_slices





In [2]:
"""
test_efficientnet_setup.py

Quick test for EfficientNetB0 + GPU + dataset loading.
"""

import torch
import torch.nn as nn
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
import os

# -------------------------------
# CONFIGURATION
# -------------------------------
DATA_DIR = r"C:\Users\manav\Documents\dataset_slices"
BATCH_SIZE = 16
IMG_SIZE = 224
USE_GPU = torch.cuda.is_available()

print(f"✅ GPU Available: {USE_GPU}")
if USE_GPU:
    print(f"Using device: {torch.cuda.get_device_name(0)}")

# -------------------------------
# DATA TRANSFORMS & LOADERS
# -------------------------------
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

train_dir = os.path.join(DATA_DIR, "train")
val_dir = os.path.join(DATA_DIR, "val")

train_data = datasets.ImageFolder(train_dir, transform=transform)
val_data = datasets.ImageFolder(val_dir, transform=transform)

train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

print(f"Train images: {len(train_data)} | Val images: {len(val_data)}")
print(f"Classes: {train_data.classes}")

# -------------------------------
# MODEL SETUP
# -------------------------------
model = models.efficientnet_b0(weights="IMAGENET1K_V1")
num_ftrs = model.classifier[1].in_features
model.classifier[1] = nn.Linear(num_ftrs, 2)  # binary classification

if USE_GPU:
    model = model.cuda()

# -------------------------------
# QUICK TEST LOOP
# -------------------------------
images, labels = next(iter(train_loader))
if USE_GPU:
    images, labels = images.cuda(), labels.cuda()

with torch.no_grad():
    outputs = model(images)
    preds = torch.argmax(outputs, dim=1)

print(f"\n✅ Forward pass successful!")
print(f"Batch size: {images.shape[0]}")
print(f"Predictions: {preds[:8].cpu().numpy()}")
print(f"True labels: {labels[:8].cpu().numpy()}")


✅ GPU Available: True
Using device: NVIDIA GeForce GTX 1650
Train images: 48515 | Val images: 8525
Classes: ['normal', 'tumor']

✅ Forward pass successful!
Batch size: 16
Predictions: [0 0 0 1 0 0 1 1]
True labels: [0 1 1 1 0 1 1 1]


In [3]:
"""
train_efficientnet_b0.py

Full training script for binary tumor classification (EfficientNetB0).
"""

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
import os, time, copy

# ------------------------------
# CONFIGURATION
# ------------------------------
DATA_DIR = r"C:\Users\manav\Documents\dataset_slices"
BATCH_SIZE = 32
EPOCHS = 10
IMG_SIZE = 224
LR = 1e-4
CHECKPOINT_PATH = "efficientnetb0_best.pth"

USE_GPU = torch.cuda.is_available()
device = torch.device("cuda" if USE_GPU else "cpu")
print(f"✅ Using device: {device}")

# ------------------------------
# DATA TRANSFORMS
# ------------------------------
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])
}

image_datasets = {
    'train': datasets.ImageFolder(os.path.join(DATA_DIR, 'train'), data_transforms['train']),
    'val': datasets.ImageFolder(os.path.join(DATA_DIR, 'val'), data_transforms['val'])
}
dataloaders = {
    'train': DataLoader(image_datasets['train'], batch_size=BATCH_SIZE, shuffle=True, num_workers=0),
    'val': DataLoader(image_datasets['val'], batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
print(f"Classes: {class_names} | Train: {dataset_sizes['train']} | Val: {dataset_sizes['val']}")

# ------------------------------
# MODEL SETUP
# ------------------------------
model = models.efficientnet_b0(weights="IMAGENET1K_V1")
num_ftrs = model.classifier[1].in_features
model.classifier[1] = nn.Linear(num_ftrs, 2)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)

# ------------------------------
# TRAINING LOOP
# ------------------------------
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
start_time = time.time()

for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    print("-" * 30)

    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()
        else:
            model.eval()

        running_loss, running_corrects = 0.0, 0

        for inputs, labels in dataloaders[phase]:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                _, preds = torch.max(outputs, 1)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = running_corrects.double() / dataset_sizes[phase]

        print(f"{phase} Loss: {epoch_loss:.4f} | Acc: {epoch_acc:.4f}")

        # deep copy the model
        if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(best_model_wts, CHECKPOINT_PATH)
            print(f"💾 Saved best model (Acc: {best_acc:.4f})")

total_time = (time.time() - start_time) / 60
print(f"\nTraining complete in {total_time:.1f} min | Best val Acc: {best_acc:.4f}")
model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), CHECKPOINT_PATH)
print(f"✅ Final model saved to: {CHECKPOINT_PATH}")


✅ Using device: cuda
Classes: ['normal', 'tumor'] | Train: 48515 | Val: 8525

Epoch 1/10
------------------------------
train Loss: 0.1746 | Acc: 0.9286
val Loss: 0.2010 | Acc: 0.9296
💾 Saved best model (Acc: 0.9296)

Epoch 2/10
------------------------------
train Loss: 0.1169 | Acc: 0.9549
val Loss: 0.1770 | Acc: 0.9348
💾 Saved best model (Acc: 0.9348)

Epoch 3/10
------------------------------
train Loss: 0.0989 | Acc: 0.9620
val Loss: 0.1887 | Acc: 0.9335

Epoch 4/10
------------------------------
train Loss: 0.0813 | Acc: 0.9693
val Loss: 0.2222 | Acc: 0.9360
💾 Saved best model (Acc: 0.9360)

Epoch 5/10
------------------------------
train Loss: 0.0708 | Acc: 0.9737
val Loss: 0.2139 | Acc: 0.9333

Epoch 6/10
------------------------------
train Loss: 0.0579 | Acc: 0.9778
val Loss: 0.2393 | Acc: 0.9299

Epoch 7/10
------------------------------
train Loss: 0.0505 | Acc: 0.9818
val Loss: 0.2403 | Acc: 0.9391
💾 Saved best model (Acc: 0.9391)

Epoch 8/10
------------------------------

In [2]:
import os

root = r"C:\Users\manav\Downloads\archive\BraTS2020_TrainingData\MICCAI_BraTS2020_TrainingData"
complete = 0
for pid in os.listdir(root):
    pdir = os.path.join(root, pid)
    if not os.path.isdir(pdir):
        continue
    needed = [f"{pid}_t1ce.nii.gz", f"{pid}_t2.nii.gz", f"{pid}_flair.nii.gz", f"{pid}_seg.nii.gz"]
    if all(os.path.exists(os.path.join(pdir, n)) for n in needed):
        complete += 1
print(f"Complete patients: {complete}")


Complete patients: 0
