The training set and validation set used in this article are from the training set of BraTs2018 (HGG: 210 patients, LGG: 75 patients).

However, since BraTs only discloses the training set data, there is no test set data. If a part of the training set is used as a test set, the training set will be much less. If the training data is too small, overfitting is prone to occur, that is, The performance in the training set is good, but the performance in the test set is poor. At this time, the network generalization ability has deteriorated. In order to solve the problem of lack of data, I came up with a way.

Because the training set of BraTs2019 has increased on the basis of BraTs2018, of which 49 cases have been added to HGG and 1 case is added to LGG, then I will use these new additions as my test set.

In [None]:
pip install SimpleITK

In [1]:
import os
import numpy as np
import SimpleITK as sitk

In [2]:
flair_name = "_flair.nii.gz"
t1_name = "_t1.nii.gz"
t1ce_name = "_t1ce.nii.gz"
t2_name = "_t2.nii.gz"
mask_name = "_seg.nii.gz"

In [3]:
bratshgg_path = r"/Users/wushu/Desktop/2-MICCAI_BraTS_2018/MICCAI_BraTS_2018_Data_Training/HGG"

In [4]:
bratslgg_path = r"/Users/wushu/Desktop/2-MICCAI_BraTS_2018/MICCAI_BraTS_2018_Data_Training/LGG"

In [5]:
outputImg_path = r"./trainImage"

In [6]:
outputMask_path = r"./trainMask"

In [7]:
if not os.path.exists(outputImg_path):
    os.mkdir(outputImg_path)
if not os.path.exists(outputMask_path):
    os.mkdir(outputMask_path)

In [8]:
def file_name_path(file_dir, dir=True, file=False):
    """
    get root path,sub_dirs,all_sub_files
    :param file_dir:
    :return: dir or file
    """
    for root, dirs, files in os.walk(file_dir):
        if len(dirs) and dir:
            print("sub_dirs:", dirs)
            return dirs
        if len(files) and file:
            print("files:", files)
            return files

In [9]:
pathhgg_list = file_name_path(bratshgg_path)
pathlgg_list = file_name_path(bratslgg_path)

sub_dirs: ['Brats18_CBICA_AOO_1', 'Brats18_TCIA02_471_1', 'Brats18_CBICA_ARW_1', 'Brats18_CBICA_ASK_1', 'Brats18_TCIA08_105_1', 'Brats18_CBICA_AAB_1', 'Brats18_CBICA_ATV_1', 'Brats18_CBICA_ASA_1', 'Brats18_CBICA_AQN_1', 'Brats18_TCIA04_192_1', 'Brats18_2013_20_1', 'Brats18_TCIA01_147_1', 'Brats18_CBICA_APR_1', 'Brats18_TCIA02_321_1', 'Brats18_CBICA_AQD_1', 'Brats18_CBICA_ALX_1', 'Brats18_TCIA08_205_1', 'Brats18_CBICA_AQJ_1', 'Brats18_TCIA01_203_1', 'Brats18_2013_2_1', 'Brats18_CBICA_AUN_1', 'Brats18_TCIA02_300_1', 'Brats18_CBICA_ASE_1', 'Brats18_CBICA_ASO_1', 'Brats18_CBICA_ATX_1', 'Brats18_CBICA_AAL_1', 'Brats18_TCIA03_419_1', 'Brats18_CBICA_AXL_1', 'Brats18_CBICA_AQV_1', 'Brats18_CBICA_ALN_1', 'Brats18_TCIA06_165_1', 'Brats18_TCIA03_338_1', 'Brats18_TCIA01_378_1', 'Brats18_CBICA_ASY_1', 'Brats18_CBICA_AUR_1', 'Brats18_TCIA02_198_1', 'Brats18_2013_11_1', 'Brats18_CBICA_AAP_1', 'Brats18_CBICA_ATD_1', 'Brats18_TCIA02_171_1', 'Brats18_CBICA_ASW_1', 'Brats18_TCIA08_234_1', 'Brats18_TCIA03

In [10]:
def normalize(slice, bottom=99, down=1):
    """
    normalize image with mean and std for regionnonzero,and clip the value into range
    :param slice:
    :param bottom:
    :param down:
    :return:
    """
    # percentile
    b = np.percentile(slice, bottom)
    t = np.percentile(slice, down)
    slice = np.clip(slice, t, b)# numpy.clip(a, a_min, a_max, out=None)

    # Normalize
    image_nonzero = slice[np.nonzero(slice)]
    if np.std(slice) == 0 or np.std(image_nonzero) == 0:
        return slice
    else:
        tmp = (slice - np.mean(image_nonzero)) / np.std(image_nonzero)
        # since the range of intensities is between 0 and 5000 ,
        # the min in the normalized slice corresponds to 0 intensity in unnormalized slice
        # the min is replaced with -9 just to keep track of 0 intensities
        # so that we can discard those intensities afterwards when sampling random patches
        tmp[tmp == tmp.min()] = -9 # black background 
        return tmp

In [11]:
def crop_ceter(img,croph,cropw):   
    #for n_slice in range(img.shape[0]):
    height,width = img[0].shape 
    starth = height//2-(croph//2)
    startw = width//2-(cropw//2)        
    return img[:,starth:starth+croph,startw:startw+cropw]

In [12]:
for subsetindex in range(len(pathhgg_list)):
    brats_subset_path = bratshgg_path + "/" + str(pathhgg_list[subsetindex]) + "/"
    # Get the four modes of each case and the path of the Mask
    flair_image = brats_subset_path + str(pathhgg_list[subsetindex]) + flair_name
    t1_image = brats_subset_path + str(pathhgg_list[subsetindex]) + t1_name
    t1ce_image = brats_subset_path + str(pathhgg_list[subsetindex]) + t1ce_name
    t2_image = brats_subset_path + str(pathhgg_list[subsetindex]) + t2_name
    mask_image = brats_subset_path + str(pathhgg_list[subsetindex]) + mask_name
    # Get the four modalities and Mask data of each case
    flair_src = sitk.ReadImage(flair_image, sitk.sitkInt16)
    t1_src = sitk.ReadImage(t1_image, sitk.sitkInt16)
    t1ce_src = sitk.ReadImage(t1ce_image, sitk.sitkInt16)
    t2_src = sitk.ReadImage(t2_image, sitk.sitkInt16)
    mask = sitk.ReadImage(mask_image, sitk.sitkUInt8)
    #GetArrayFromImage()  Change SimpleITK to ndarray 
    flair_array = sitk.GetArrayFromImage(flair_src)
    t1_array = sitk.GetArrayFromImage(t1_src)
    t1ce_array = sitk.GetArrayFromImage(t1ce_src)
    t2_array = sitk.GetArrayFromImage(t2_src)
    mask_array = sitk.GetArrayFromImage(mask)
    # Normalize the four modes separately, because they have different contrasts
    flair_array_nor = normalize(flair_array)
    t1_array_nor = normalize(t1_array)
    t1ce_array_nor = normalize(t1ce_array)
    t2_array_nor = normalize(t2_array)
    # Crop
    flair_crop = crop_ceter(flair_array_nor,160,160)
    t1_crop = crop_ceter(t1_array_nor,160,160)
    t1ce_crop = crop_ceter(t1ce_array_nor,160,160)
    t2_crop = crop_ceter(t2_array_nor,160,160)
    mask_crop = crop_ceter(mask_array,160,160) 
    print(str(pathhgg_list[subsetindex]))
    # Slice - Remove slices without lesions
    for n_slice in range(flair_crop.shape[0]):
        if np.max(mask_crop[n_slice,:,:]) != 0:
            maskImg = mask_crop[n_slice,:,:]
            
            FourModelImageArray = np.zeros((flair_crop.shape[1],flair_crop.shape[2],4),np.float)
            flairImg = flair_crop[n_slice,:,:]
            flairImg = flairImg.astype(np.float)
            FourModelImageArray[:,:,0] = flairImg
            t1Img = t1_crop[n_slice,:,:]
            t1Img = t1Img.astype(np.float)
            FourModelImageArray[:,:,1] = t1Img
            t1ceImg = t1ce_crop[n_slice,:,:]
            t1ceImg = t1ceImg.astype(np.float)
            FourModelImageArray[:,:,2] = t1ceImg
            t2Img = t2_crop[n_slice,:,:]
            t2Img = t2Img.astype(np.float)
            FourModelImageArray[:,:,3] = t2Img       
        
            imagepath = outputImg_path + "//" + str(pathhgg_list[subsetindex]) + "_" + str(n_slice) + ".npy"
            maskpath = outputMask_path + "//" + str(pathhgg_list[subsetindex]) + "_" + str(n_slice) + ".npy"
            np.save(imagepath,FourModelImageArray)#(160,160,4) np.float dtype('float64')
            np.save(maskpath,maskImg)# (160, 160) dtype('uint8') value:0 1 2 4
print("Done！")
        
    

Brats18_CBICA_AOO_1
Brats18_TCIA02_471_1
Brats18_CBICA_ARW_1
Brats18_CBICA_ASK_1
Brats18_TCIA08_105_1
Brats18_CBICA_AAB_1
Brats18_CBICA_ATV_1
Brats18_CBICA_ASA_1
Brats18_CBICA_AQN_1
Brats18_TCIA04_192_1
Brats18_2013_20_1
Brats18_TCIA01_147_1
Brats18_CBICA_APR_1
Brats18_TCIA02_321_1
Brats18_CBICA_AQD_1
Brats18_CBICA_ALX_1
Brats18_TCIA08_205_1
Brats18_CBICA_AQJ_1
Brats18_TCIA01_203_1
Brats18_2013_2_1
Brats18_CBICA_AUN_1
Brats18_TCIA02_300_1
Brats18_CBICA_ASE_1
Brats18_CBICA_ASO_1
Brats18_CBICA_ATX_1
Brats18_CBICA_AAL_1
Brats18_TCIA03_419_1
Brats18_CBICA_AXL_1
Brats18_CBICA_AQV_1
Brats18_CBICA_ALN_1
Brats18_TCIA06_165_1
Brats18_TCIA03_338_1
Brats18_TCIA01_378_1
Brats18_CBICA_ASY_1
Brats18_CBICA_AUR_1
Brats18_TCIA02_198_1
Brats18_2013_11_1
Brats18_CBICA_AAP_1
Brats18_CBICA_ATD_1
Brats18_TCIA02_171_1
Brats18_CBICA_ASW_1
Brats18_TCIA08_234_1
Brats18_TCIA03_257_1
Brats18_TCIA02_314_1
Brats18_CBICA_ABM_1
Brats18_TCIA03_138_1
Brats18_TCIA04_361_1
Brats18_CBICA_AQR_1
Brats18_TCIA05_478_1
Brats18

In [13]:
for subsetindex in range(len(pathlgg_list)):
    brats_subset_path = bratslgg_path + "/" + str(pathlgg_list[subsetindex]) + "/"
    # Get the four modes of each case and the path of the Mask
    flair_image = brats_subset_path + str(pathlgg_list[subsetindex]) + flair_name
    t1_image = brats_subset_path + str(pathlgg_list[subsetindex]) + t1_name
    t1ce_image = brats_subset_path + str(pathlgg_list[subsetindex]) + t1ce_name
    t2_image = brats_subset_path + str(pathlgg_list[subsetindex]) + t2_name
    mask_image = brats_subset_path + str(pathlgg_list[subsetindex]) + mask_name
    # Get the four modalities and Mask data of each case
    flair_src = sitk.ReadImage(flair_image, sitk.sitkInt16)
    t1_src = sitk.ReadImage(t1_image, sitk.sitkInt16)
    t1ce_src = sitk.ReadImage(t1ce_image, sitk.sitkInt16)
    t2_src = sitk.ReadImage(t2_image, sitk.sitkInt16)
    mask = sitk.ReadImage(mask_image, sitk.sitkUInt8)
    #GetArrayFromImage()  Change SimpleITK to ndarray
    flair_array = sitk.GetArrayFromImage(flair_src)
    t1_array = sitk.GetArrayFromImage(t1_src)
    t1ce_array = sitk.GetArrayFromImage(t1ce_src)
    t2_array = sitk.GetArrayFromImage(t2_src)
    mask_array = sitk.GetArrayFromImage(mask)
    #Normalize the four modes separately, because they have different contrasts
    flair_array_nor = normalize(flair_array)
    t1_array_nor = normalize(t1_array)
    t1ce_array_nor = normalize(t1ce_array)
    t2_array_nor = normalize(t2_array)
    # Crop
    flair_crop = crop_ceter(flair_array_nor,160,160)
    t1_crop = crop_ceter(t1_array_nor,160,160)
    t1ce_crop = crop_ceter(t1ce_array_nor,160,160)
    t2_crop = crop_ceter(t2_array_nor,160,160)
    mask_crop = crop_ceter(mask_array,160,160) 
    print(str(pathlgg_list[subsetindex]))
    # Slice - Remove slices without lesions
    for n_slice in range(flair_crop.shape[0]):
        if np.max(mask_crop[n_slice,:,:]) != 0:
            maskImg = mask_crop[n_slice,:,:]
            
            FourModelImageArray = np.zeros((flair_crop.shape[1],flair_crop.shape[2],4),np.float)
            flairImg = flair_crop[n_slice,:,:]
            flairImg = flairImg.astype(np.float)
            FourModelImageArray[:,:,0] = flairImg
            t1Img = t1_crop[n_slice,:,:]
            t1Img = t1Img.astype(np.float)
            FourModelImageArray[:,:,1] = t1Img
            t1ceImg = t1ce_crop[n_slice,:,:]
            t1ceImg = t1ceImg.astype(np.float)
            FourModelImageArray[:,:,2] = t1ceImg
            t2Img = t2_crop[n_slice,:,:]
            t2Img = t2Img.astype(np.float)
            FourModelImageArray[:,:,3] = t2Img       
        
            imagepath = outputImg_path + "//" + str(pathlgg_list[subsetindex]) + "_" + str(n_slice) + ".npy"
            maskpath = outputMask_path + "//" + str(pathlgg_list[subsetindex]) + "_" + str(n_slice) + ".npy"
            np.save(imagepath,FourModelImageArray)#(160,160,4) np.float dtype('float64')
            np.save(maskpath,maskImg)# (160, 160) dtype('uint8') value:0 1 2 4
print("Done!")

Brats18_TCIA10_639_1
Brats18_TCIA13_630_1
Brats18_2013_6_1
Brats18_TCIA13_615_1
Brats18_2013_8_1
Brats18_2013_24_1
Brats18_TCIA10_490_1
Brats18_TCIA10_637_1
Brats18_TCIA13_634_1
Brats18_TCIA10_346_1
Brats18_TCIA10_202_1
Brats18_TCIA09_312_1
Brats18_TCIA13_624_1
Brats18_TCIA10_442_1
Brats18_TCIA10_152_1
Brats18_TCIA13_645_1
Brats18_TCIA09_177_1
Brats18_TCIA10_629_1
Brats18_2013_15_1
Brats18_TCIA09_402_1
Brats18_TCIA10_408_1
Brats18_TCIA12_480_1
Brats18_2013_29_1
Brats18_TCIA10_241_1
Brats18_TCIA13_633_1
Brats18_TCIA09_493_1
Brats18_TCIA12_101_1
Brats18_TCIA12_470_1
Brats18_TCIA13_618_1
Brats18_TCIA09_451_1
Brats18_TCIA10_387_1
Brats18_TCIA09_141_1
Brats18_2013_1_1
Brats18_TCIA09_255_1
Brats18_TCIA10_130_1
Brats18_TCIA10_420_1
Brats18_TCIA10_393_1
Brats18_TCIA09_620_1
Brats18_TCIA10_351_1
Brats18_TCIA10_299_1
Brats18_TCIA13_642_1
Brats18_2013_16_1
Brats18_TCIA10_330_1
Brats18_TCIA13_623_1
Brats18_2013_28_1
Brats18_TCIA10_410_1
Brats18_TCIA10_282_1
Brats18_TCIA13_653_1
Brats18_TCIA10_261_