In [None]:
# In pseudo-label annotation, if the original label exists, replace it with the original label.
import os
from tqdm import tqdm
import numpy as np
import nibabel as nib

pseudo_label_path = "./blackbean-pseudo-labels-FLARE23/"
origin_label_path = "./labelsTr2200/"
save_path = "./labelsJoin2200_withoutTumor/"

for i in tqdm(range(1,2201)):
    pseudo_label = nib.load(os.path.join(pseudo_label_path, "FLARE23_{:0>4}.nii.gz").format(i))
    pseudo_label_affine = pseudo_label.affine
    pseudo_label = np.array(pseudo_label.get_fdata()).astype(np.int16)

    origin_label = nib.load(os.path.join(origin_label_path, "FLARE23_{:0>4}.nii.gz").format(i))
    origin_label = np.array(origin_label.get_fdata()).astype(np.int16)

    for j in range(1,14):
        if sum(origin_label[origin_label == j]) != 0: # If the j-th organ has an original label, replace the corresponding portion of pseudo-labels.
            pseudo_label[pseudo_label == j] = 0 # Assign a value of 0 to the pseudo-labels for the j-th organ.
            pseudo_label[origin_label == j] = j # Replace the pseudo-labels for the j-th organ with the original label.

    nib.Nifti1Image(pseudo_label, pseudo_label_affine).to_filename(os.path.join(save_path, "FLARE23_{:0>4}.nii.gz").format(i))


In [None]:
# Partition the data based on organs containing tumors.
import os
from tqdm import tqdm
import numpy as np
import nibabel as nib
from skimage import measure

def crop_by_tumor(mask, tumor):
    arr = np.nonzero(tumor)
    minA, maxA, minB, maxB, minC, maxC = min(arr[0]), max(arr[0]), min(arr[1]), max(arr[1]), min(arr[2]), max(arr[2]) 

    mask = mask[minA:maxA, minB:maxB, minC:maxC]
    bbox = [minA, maxA, minB, maxB, minC, maxC]

    return mask, bbox

def crop_by_organ(image, mask, organ, MARGIN):
    arr = np.nonzero(organ)
    minA = max(0, min(arr[0]) - 5)
    maxA = min(len(mask), max(arr[0]) + 5)

    minB = max(0, min(arr[1]) - MARGIN)
    maxB = min(mask.shape[1], max(arr[1]) + MARGIN)
    minC = max(0, min(arr[2]) - MARGIN)
    maxC = min(mask.shape[2], max(arr[2]) + MARGIN)

    if (maxA - minA) % 8 != 0:
        max_A = 8 * (int((maxA - minA) / 8) + 1)
        gap = int((max_A - (maxA - minA)) / 2)
        minA = max(0, minA - gap)
        maxA = min(len(mask), minA + max_A)
        if maxA == len(mask):
            minA = maxA - max_A

    if (maxB - minB) % 8 != 0:
        max_B = 8 * (int((maxB - minB) / 8) + 1)
        gap = int((max_B - (maxB - minB)) / 2)
        minB = max(0, minB - gap)
        maxB = min(mask.shape[1], minB + max_B)
        if maxB == mask.shape[1]:
            minB = maxB - max_B

    if (maxC - minC) % 8 != 0:
        max_C = 8 * (int((maxC - minC) / 8) + 1)
        gap = int((max_C - (maxC - minC)) / 2)
        minC = max(0, minC - gap)
        maxC = min(mask.shape[2], minC + max_C)
        if maxC == mask.shape[2]:
            minC = maxC - max_C

    image, mask = (
        image[minA:maxA, minB:maxB, minC:maxC],
        mask[minA:maxA, minB:maxB, minC:maxC],
    )

    bbox = [minA, maxA, minB, maxB, minC, maxC]

    return image, mask, bbox

label_path = "./labelsJoin2200/"
image_path_1 = "./1-500/"
image_path_2 = "./501-1000/"
image_path_3 = "./1001-1500/"
image_path_4 = "./1501-2000/"
image_path_5 = "./2001-2200/"
save_path = "./Your_Save_Path/"
count_data = np.ones(13).astype(np.int16)

for i in tqdm(range(1,2201)):
    tmp_label = nib.load(os.path.join(label_path, "FLARE23_{:0>4}.nii.gz").format(i))
    tmp_label_affine = tmp_label.affine
    tmp_label = np.array(tmp_label.get_fdata()).astype(np.int16)

    if i < 501: image_path = image_path_1
    elif i < 1001: image_path = image_path_2
    elif i < 1501: image_path = image_path_3
    elif i < 2001: image_path = image_path_4
    elif i < 2201: image_path = image_path_5
    try:
        tmp_image = nib.load(os.path.join(image_path, "FLARE23_{:0>4}_0000.nii.gz").format(i))
        tmp_image_affine = tmp_image.affine
        tmp_image = tmp_image.get_fdata().astype(float)
    except: continue

    # Remove tumor data with more than 6 connected regions, as such data is likely invalid, including lung tumor data.
    tumor_label = tmp_label.copy()
    tumor_label[tumor_label != 14] = 0
    if np.max(tumor_label) == 0: continue  # Remove data without tumor labels.
    label_connect, num_connect = measure.label(tumor_label, connectivity=3, background=0, return_num=True)
    if num_connect > 6: continue

    # Identify organs containing tumors based on the connected components of tumors.
    organ_num = []
    organ_tumor_label = tmp_label.copy()
    for j in range(1, num_connect + 1):
        tmp_label_connect = label_connect.copy()
        tmp_label_connect[tmp_label_connect != j] = 0
        box_label, bbox = crop_by_tumor(tmp_label, tmp_label_connect)
        counts = np.bincount(box_label.astype(np.int16).flatten())
        counts[0] = 0; counts[14] = 0
        if np.argmax(counts) != 0: 
            organ_num.append(np.argmax(counts))
            organ_tumor_label[tmp_label_connect == j] = np.argmax(counts)


    organ_num = np.unique(organ_num)
    for j in organ_num:
        organ_label = organ_tumor_label.copy()
        organ_label[organ_label != j] = 0
        crop_image, crop_label, crop_box = crop_by_organ(tmp_image, tmp_label, organ_label, 10)
        if not os.path.exists(os.path.join(save_path, "%s"% j, "labels")):
            os.makedirs(os.path.join(save_path, "%s"% j, "labels"))
            os.makedirs(os.path.join(save_path, "%s"% j, "images"))

        nib.Nifti1Image(crop_label, tmp_label_affine).to_filename(os.path.join(save_path, "%s"% j, "labels", "Tumor%s_Label_%s.nii.gz"% (j,count_data[j-1])))
        nib.Nifti1Image(crop_image, tmp_image_affine).to_filename(os.path.join(save_path, "%s"% j, "images", "Tumor%s_Image_%s.nii.gz"% (j,count_data[j-1])))
        count_data[j-1] += 1

In [None]:
# Repair data that cannot be read by SimpleITK
import os
import SimpleITK as sitk
from tqdm import tqdm
import nibabel as nib


for item in os.walk("label_or_data_path"):
    for i in tqdm(range(len(item[2]))):
        try:
            image = sitk.ReadImage(os.path.join(label_path,item[2][i]))
        except:
            print(f'cosines problem occures, try to fix it...')
            img = nib.load(os.path.join(label_path,item[2][i]))
            img.set_qform(img.get_qform())
            img.set_sform(img.get_sform())
            nib.save(img,os.path.join(label_path,item[2][i]))
