In [32]:
import os
import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
from joblib import Parallel, delayed
from glob import glob
import subprocess
import shutil

# Cleaning BRATS

In [33]:
data_path = "/media/say26747/EC2426FA2426C782/Continual_learning_data/Brats_16_17/Task01_BrainTumour"
seg_paths = sorted(glob(os.path.join(data_path, "labelsTr", "*.nii.gz")))
img_paths = sorted(glob(os.path.join(data_path, "imagesTr", "*.nii.gz")))
print(len(seg_paths), len(img_paths))
print(seg_paths[0], img_paths[0])

484 484
/media/say26747/EC2426FA2426C782/Continual_learning_data/Brats_16_17/Task01_BrainTumour/labelsTr/BRATS_001.nii.gz /media/say26747/EC2426FA2426C782/Continual_learning_data/Brats_16_17/Task01_BrainTumour/imagesTr/BRATS_001.nii.gz


In [34]:
save_path = "/media/say26747/EC2426FA2426C782/Continual_learning_data/Brats_16_17/BRATS_splitted"
os.makedirs(save_path, exist_ok=True)

In [35]:
def worker(img_path, seg_path):
    img_data = nib.load(img_path)
    seg_data = nib.load(seg_path)
    img_affine = img_data.affine
    seg_affine = seg_data.affine
    img = img_data.get_fdata().astype(np.float32)
    seg = seg_data.get_fdata().astype(np.uint8)
    patient_name = os.path.basename(img_path).split(".")[0]
    os.makedirs(os.path.join(save_path, patient_name), exist_ok=True)
    for index, modality in enumerate(["FLR", "T1w", "T1c", "T2w"]):
        nib.save(
            nib.Nifti1Image(img[..., index], img_affine),
            os.path.join(save_path, patient_name, f"{patient_name}_{modality}.nii.gz"),
        )

    seg_dest_path = os.path.join(save_path, patient_name, f"{patient_name}_Seg.nii.gz")
    seg_merged = (seg > 0).astype(np.uint8)

    try:
        shutil.copy2(seg_path, seg_dest_path)
    except Exception as e:
        print(f"Error: {e}")

    nib.save(
        nib.Nifti1Image(seg_merged, seg_affine),
        os.path.join(save_path, patient_name, f"{patient_name}_MERGED_Seg.nii.gz"),
    )

    return

In [36]:
results = Parallel(n_jobs=-1)(
    delayed(worker)(img_path, seg_path)
    for img_path, seg_path in tqdm(zip(img_paths, seg_paths), total=len(img_paths))
)

  0%|          | 0/484 [00:00<?, ?it/s]

# Clean ISLES

In [45]:
data_path = "/media/say26747/EC2426FA2426C782/Continual_learning_data/ISLES_2015/SISS2015_Training"
img_paths = []
for modality in ["DWI", "Flair", "T1", "T2"]:
    img_paths.append(
        sorted(glob(os.path.join(data_path, "*", f"*MR_{modality}*", "*.nii")))
    )
seg_paths = sorted(glob(os.path.join(data_path, "*", "*OT*", "*.nii")))
print(len(seg_paths), len(img_paths))
print([len(img_path) for img_path in img_paths])
print(seg_paths[0], img_paths[0][0])
print(img_paths[1][0])
print(img_paths[2][0])
print(img_paths[3][0])

28 4
[28, 28, 28, 28]
/media/say26747/EC2426FA2426C782/Continual_learning_data/ISLES_2015/SISS2015_Training/1/VSD.Brain.XX.O.OT.70618/VSD.Brain.XX.O.OT.70618.nii /media/say26747/EC2426FA2426C782/Continual_learning_data/ISLES_2015/SISS2015_Training/1/VSD.Brain.XX.O.MR_DWI.70613/VSD.Brain.XX.O.MR_DWI.70613.nii
/media/say26747/EC2426FA2426C782/Continual_learning_data/ISLES_2015/SISS2015_Training/1/VSD.Brain.XX.O.MR_Flair.70614/VSD.Brain.XX.O.MR_Flair.70614.nii
/media/say26747/EC2426FA2426C782/Continual_learning_data/ISLES_2015/SISS2015_Training/1/VSD.Brain.XX.O.MR_T1.70615/VSD.Brain.XX.O.MR_T1.70615.nii
/media/say26747/EC2426FA2426C782/Continual_learning_data/ISLES_2015/SISS2015_Training/1/VSD.Brain.XX.O.MR_T2.70616/VSD.Brain.XX.O.MR_T2.70616.nii


In [46]:
save_path = (
    "/media/say26747/EC2426FA2426C782/Continual_learning_data/ISLES_2015/ISLES_splitted"
)
os.makedirs(save_path, exist_ok=True)

In [47]:
for index, seg_path in enumerate(seg_paths):
    img_path = [img_paths[i][index] for i in range(4)]
    img_data = [nib.load(img) for img in img_path]
    seg_data = nib.load(seg_path)
    img_affine = [img.affine for img in img_data]
    seg_affine = seg_data.affine
    parts = seg_path.split(os.sep)
    patient_name = None
    for part in parts:
        if part.isdigit():
            patient_name = f"patient_{part}"
            break
    if patient_name is None:
        raise ValueError("Patient name not found")
    os.makedirs(os.path.join(save_path, patient_name), exist_ok=True)
    for index, modality in enumerate(["DWI", "FLR", "T1w", "T2w"]):
        nib.save(
            nib.Nifti1Image(
                img_data[index].get_fdata().astype(np.float32), img_affine[index]
            ),
            os.path.join(save_path, patient_name, f"{patient_name}_{modality}.nii.gz"),
        )
    nib.save(
        nib.Nifti1Image(seg_data.get_fdata().astype(np.uint8), seg_affine),
        os.path.join(save_path, patient_name, f"{patient_name}_Seg.nii.gz"),
    )

# Cleaning WMH

In [51]:
root_path = "/media/say26747/EC2426FA2426C782/Continual_learning_data/WMH/dataverse_files/training"
img_paths = []
for modality in ["FLAIR", "T1"]:
    img_paths.append(
        sorted(
            glob(os.path.join(root_path, "*", "*", "*", "pre", f"{modality}.nii.gz"))
        )
    )
seg_paths = sorted(glob(os.path.join(root_path, "*", "*", "*", "wmh.nii.gz")))
print(len(seg_paths), len(img_paths))
print([len(img_path) for img_path in img_paths])
print(seg_paths[0], img_paths[0][0])
print(img_paths[1][0])

60 2
[60, 60]
/media/say26747/EC2426FA2426C782/Continual_learning_data/WMH/dataverse_files/training/Amsterdam/GE3T/100/wmh.nii.gz /media/say26747/EC2426FA2426C782/Continual_learning_data/WMH/dataverse_files/training/Amsterdam/GE3T/100/pre/FLAIR.nii.gz
/media/say26747/EC2426FA2426C782/Continual_learning_data/WMH/dataverse_files/training/Amsterdam/GE3T/100/pre/T1.nii.gz


In [52]:
save_path = "/media/say26747/EC2426FA2426C782/Continual_learning_data/WMH/WMH_splitted"
os.makedirs(save_path, exist_ok=True)

In [53]:
for index, seg_path in enumerate(seg_paths):
    img_path = [img_paths[i][index] for i in range(2)]
    img_data = [nib.load(img) for img in img_path]
    seg_data = nib.load(seg_path)
    img_affine = [img.affine for img in img_data]
    seg_affine = seg_data.affine
    parts = seg_path.split(os.sep)
    patient_name = None
    for part in parts:
        if part.isdigit():
            patient_name = f"patient_{part}"
            break
    if patient_name is None:
        raise ValueError("Patient name not found")

    os.makedirs(os.path.join(save_path, patient_name), exist_ok=True)
    for index, modality in enumerate(["FLR", "T1w"]):
        nib.save(
            nib.Nifti1Image(
                img_data[index].get_fdata().astype(np.float32), img_affine[index]
            ),
            os.path.join(save_path, patient_name, f"{patient_name}_{modality}.nii.gz"),
        )
    nib.save(
        nib.Nifti1Image((seg_data.get_fdata() > 0).astype(np.uint8), seg_affine),
        os.path.join(save_path, patient_name, f"{patient_name}_Seg.nii.gz"),
    )

# clean MSSEG-2016

In [55]:
root_path = "/media/say26747/EC2426FA2426C782/Continual_learning_data/MSSEG_2016"
img_paths = []
for modality in ["DP", "GADO", "FLAIR", "T1", "T2"]:
    img_paths.append(
        sorted(
            glob(
                os.path.join(
                    root_path,
                    "MSSEG*",
                    "T*",
                    "*",
                    "*",
                    "Preprocessed_Data",
                    f"*{modality}_preprocessed.nii.gz",
                )
            )
        )
    )
seg_paths = sorted(
    glob(os.path.join(root_path, "MSSEG*", "T*", "*", "*", "Masks", "Consensus.nii.gz"))
)
print(len(seg_paths), len(img_paths))
print([len(img_path) for img_path in img_paths])
print(seg_paths[0], img_paths[0][0])
print(img_paths[1][0])
print(img_paths[2][0])
print(img_paths[3][0])
print(img_paths[4][0])

53 5
[53, 53, 53, 53, 53]
/media/say26747/EC2426FA2426C782/Continual_learning_data/MSSEG_2016/MSSEG-Testing/Testing/Center_01/Patient_01/Masks/Consensus.nii.gz /media/say26747/EC2426FA2426C782/Continual_learning_data/MSSEG_2016/MSSEG-Testing/Testing/Center_01/Patient_01/Preprocessed_Data/DP_preprocessed.nii.gz
/media/say26747/EC2426FA2426C782/Continual_learning_data/MSSEG_2016/MSSEG-Testing/Testing/Center_01/Patient_01/Preprocessed_Data/GADO_preprocessed.nii.gz
/media/say26747/EC2426FA2426C782/Continual_learning_data/MSSEG_2016/MSSEG-Testing/Testing/Center_01/Patient_01/Preprocessed_Data/FLAIR_preprocessed.nii.gz
/media/say26747/EC2426FA2426C782/Continual_learning_data/MSSEG_2016/MSSEG-Testing/Testing/Center_01/Patient_01/Preprocessed_Data/T1_preprocessed.nii.gz
/media/say26747/EC2426FA2426C782/Continual_learning_data/MSSEG_2016/MSSEG-Testing/Testing/Center_01/Patient_01/Preprocessed_Data/T2_preprocessed.nii.gz


In [56]:
save_path = (
    "/media/say26747/EC2426FA2426C782/Continual_learning_data/MSSEG_2016/MSSEG_splitted"
)
os.makedirs(save_path, exist_ok=True)

In [57]:
for index, seg_path in tqdm(enumerate(seg_paths), total=len(seg_paths)):
    img_path = [img_paths[i][index] for i in range(5)]
    img_data = [nib.load(img) for img in img_path]
    seg_data = nib.load(seg_path)
    img_affine = [img.affine for img in img_data]
    seg_affine = seg_data.affine
    parts = seg_path.split(os.sep)
    test_or_train = "train" if "Training" in parts else "test"
    center_number = parts[-4].split("_")[-1]  # Extract '01' from 'Center_01'
    patient_number = parts[-3].split("_")[-1]  # Extract '01' from 'Patient_01'
    patient_name = f"{test_or_train}_center_{center_number}_patient_{patient_number}"

    os.makedirs(os.path.join(save_path, patient_name), exist_ok=True)
    for index, modality in enumerate(["PDw", "T1c", "FLR", "T1w", "T2w"]):
        nib.save(
            nib.Nifti1Image(
                img_data[index].get_fdata().astype(np.float32), img_affine[index]
            ),
            os.path.join(save_path, patient_name, f"{patient_name}_{modality}.nii.gz"),
        )
    nib.save(
        nib.Nifti1Image((seg_data.get_fdata() > 0).astype(np.uint8), seg_affine),
        os.path.join(save_path, patient_name, f"{patient_name}_Seg.nii.gz"),
    )

  0%|          | 0/53 [00:00<?, ?it/s]