The training set and validation set used in this article are from the training set of BraTs2018 (HGG: 210 patients, LGG: 75 patients).

However, since BraTs only discloses the training set data, there is no test set data. If a part of the training set is used as a test set, the training set will be much less. If the training data is too small, overfitting is prone to occur, that is, The performance in the training set is good, but the performance in the test set is poor. At this time, the network generalization ability has deteriorated. In order to solve the problem of lack of data, I came up with a way.

Because the training set of BraTs2019 has increased on the basis of BraTs2018, of which 49 cases have been added to HGG and 1 case is added to LGG, then I will use these new additions as my test set.

In [24]:
import pandas as pd

In [25]:
import numpy as np

In [26]:
def compare_site_names(one,two):
    df1 = pd.read_csv(one, header=None)
    df2 = pd.read_csv(two, header=None)
    data1 = df1.to_dict(orient='list')[0]
    data2 = df2.to_dict(orient='list')[0]
    diff1 = set(data1).difference(data2)
    diff2 = set(data2).difference(data1)
    return list(diff2)

In [27]:
hgg_diff = compare_site_names("18hgg.csv","19hgg.csv")

In [28]:
len(hgg_diff)

49

In [29]:
lgg_diff = compare_site_names("18lgg.csv","19lgg.csv")

In [30]:
len(lgg_diff)

1

In [31]:
flair_name = "_flair.nii.gz"
t1_name = "_t1.nii.gz"
t1ce_name = "_t1ce.nii.gz"
t2_name = "_t2.nii.gz"
mask_name = "_seg.nii.gz"

In [32]:
bratshgg_path = r"/Users/wushu/Desktop/BraTs2019/MICCAI_BraTS_2019_Data_Training/HGG"

In [33]:
bratslgg_path = r"/Users/wushu/Desktop/BraTs2019/MICCAI_BraTS_2019_Data_Training/LGG"

In [34]:
outputImg_path = r"./testImage"

In [35]:
outputMask_path = r"./testMask"

In [36]:
import os

In [37]:
if not os.path.exists(outputImg_path):
    os.mkdir(outputImg_path)
if not os.path.exists(outputMask_path):
    os.mkdir(outputMask_path)

In [38]:
pathhgg_list = []
pathlgg_list = []

In [39]:
for idx in range(len(hgg_diff)):
    mystr = "BraTS19" + hgg_diff[idx]
    pathhgg_list.append(mystr)

In [40]:
pathhgg_list

['BraTS19_TMC_11964_1',
 'BraTS19_CBICA_BAX_1',
 'BraTS19_CBICA_AUX_1',
 'BraTS19_CBICA_BGE_1',
 'BraTS19_TMC_21360_1',
 'BraTS19_CBICA_BEM_1',
 'BraTS19_CBICA_BGN_1',
 'BraTS19_CBICA_BAP_1',
 'BraTS19_CBICA_AYG_1',
 'BraTS19_CBICA_AWX_1',
 'BraTS19_CBICA_APK_1',
 'BraTS19_CBICA_ASF_1',
 'BraTS19_CBICA_BJY_1',
 'BraTS19_CBICA_AVF_1',
 'BraTS19_CBICA_AUA_1',
 'BraTS19_CBICA_BGO_1',
 'BraTS19_CBICA_AOS_1',
 'BraTS19_CBICA_BGR_1',
 'BraTS19_CBICA_BLJ_1',
 'BraTS19_TMC_15477_1',
 'BraTS19_CBICA_ASR_1',
 'BraTS19_CBICA_BAN_1',
 'BraTS19_CBICA_BBG_1',
 'BraTS19_CBICA_ANV_1',
 'BraTS19_CBICA_BNR_1',
 'BraTS19_CBICA_BHZ_1',
 'BraTS19_CBICA_BGG_1',
 'BraTS19_CBICA_ATN_1',
 'BraTS19_CBICA_BCF_1',
 'BraTS19_TMC_30014_1',
 'BraTS19_CBICA_AVB_1',
 'BraTS19_CBICA_BDK_1',
 'BraTS19_CBICA_BCL_1',
 'BraTS19_CBICA_BGT_1',
 'BraTS19_CBICA_BIC_1',
 'BraTS19_TMC_06290_1',
 'BraTS19_TMC_27374_1',
 'BraTS19_CBICA_AYC_1',
 'BraTS19_CBICA_BHV_1',
 'BraTS19_CBICA_AWV_1',
 'BraTS19_CBICA_BHQ_1',
 'BraTS19_TMC_12

In [41]:
for idx in range(len(lgg_diff)):
    mystr = "BraTS19" + lgg_diff[idx]
    pathlgg_list.append(mystr)

In [42]:
pathlgg_list

['BraTS19_TMC_09043_1']

The above printouts are the newly added cases in the BraTS19 training set. We use them as the test set.

In [43]:
import SimpleITK as sitk

In [44]:
def normalize(slice, bottom=99, down=1):
    # percentile
    b = np.percentile(slice, bottom)
    t = np.percentile(slice, down)
    slice = np.clip(slice, t, b)

    # normalize except for the black background 
    image_nonzero = slice[np.nonzero(slice)]
    if np.std(slice) == 0 or np.std(image_nonzero) == 0:
        return slice
    else:
        tmp = (slice - np.mean(image_nonzero)) / np.std(image_nonzero)
        # since the range of intensities is between 0 and 5000 ,
        # the min in the normalized slice corresponds to 0 intensity in unnormalized slice
        # the min is replaced with -9 just to keep track of 0 intensities
        # so that we can discard those intensities afterwards when sampling random patches
        tmp[tmp == tmp.min()] = -9 # black background 
        return tmp

In [45]:
def crop_ceter(img,croph,cropw):   
    #for n_slice in range(img.shape[0]):
    height,width = img[0].shape 
    starth = height//2-(croph//2)
    startw = width//2-(cropw//2)        
    return img[:,starth:starth+croph,startw:startw+cropw]

In [46]:
for subsetindex in range(len(pathhgg_list)):
    brats_subset_path = bratshgg_path + "/" + (pathhgg_list[subsetindex]) + "/"
    #Get the four modes of each case and the path of the Mask
    flair_image = brats_subset_path + (pathhgg_list[subsetindex]) + flair_name
    t1_image = brats_subset_path + (pathhgg_list[subsetindex]) + t1_name
    t1ce_image = brats_subset_path + (pathhgg_list[subsetindex]) + t1ce_name
    t2_image = brats_subset_path + (pathhgg_list[subsetindex]) + t2_name
    mask_image = brats_subset_path + (pathhgg_list[subsetindex]) + mask_name
    #Get the four modalities and Mask data of each case
    flair_src = sitk.ReadImage(flair_image, sitk.sitkInt16)
    t1_src = sitk.ReadImage(t1_image, sitk.sitkInt16)
    t1ce_src = sitk.ReadImage(t1ce_image, sitk.sitkInt16)
    t2_src = sitk.ReadImage(t2_image, sitk.sitkInt16)
    mask = sitk.ReadImage(mask_image, sitk.sitkUInt8)
    #GetArrayFromImage() can change SimpleITK to ndarray 
    flair_array = sitk.GetArrayFromImage(flair_src)
    t1_array = sitk.GetArrayFromImage(t1_src)
    t1ce_array = sitk.GetArrayFromImage(t1ce_src)
    t2_array = sitk.GetArrayFromImage(t2_src)
    mask_array = sitk.GetArrayFromImage(mask)
    #Normalize the four modes separately, because they have different contrasts
    flair_array_nor = normalize(flair_array)
    t1_array_nor = normalize(t1_array)
    t1ce_array_nor = normalize(t1ce_array)
    t2_array_nor = normalize(t2_array)
    #Crop
    flair_crop = crop_ceter(flair_array_nor,160,160)
    t1_crop = crop_ceter(t1_array_nor,160,160)
    t1ce_crop = crop_ceter(t1ce_array_nor,160,160)
    t2_crop = crop_ceter(t2_array_nor,160,160)
    mask_crop = crop_ceter(mask_array,160,160) 
    print((pathhgg_list[subsetindex]))
    #Slice - Remove slices without lesions
    for n_slice in range(flair_crop.shape[0]):
        if np.max(mask_crop[n_slice,:,:]) != 0:
            maskImg = mask_crop[n_slice,:,:]
            
            FourModelImageArray = np.zeros((flair_crop.shape[1],flair_crop.shape[2],4),np.float)
            flairImg = flair_crop[n_slice,:,:]
            flairImg = flairImg.astype(np.float)
            FourModelImageArray[:,:,0] = flairImg
            t1Img = t1_crop[n_slice,:,:]
            t1Img = t1Img.astype(np.float)
            FourModelImageArray[:,:,1] = t1Img
            t1ceImg = t1ce_crop[n_slice,:,:]
            t1ceImg = t1ceImg.astype(np.float)
            FourModelImageArray[:,:,2] = t1ceImg
            t2Img = t2_crop[n_slice,:,:]
            t2Img = t2Img.astype(np.float)
            FourModelImageArray[:,:,3] = t2Img       
        
            imagepath = outputImg_path + "//" + (pathhgg_list[subsetindex]) + "_" + str(n_slice) + ".npy"
            maskpath = outputMask_path + "//" + (pathhgg_list[subsetindex]) + "_" + str(n_slice) + ".npy"
            np.save(imagepath,FourModelImageArray)#(160,160,4) np.float dtype('float64')
            np.save(maskpath,maskImg)# (160, 160) dtype('uint8') value:0 1 2 4
print("Done！")

BraTS19_TMC_11964_1
BraTS19_CBICA_BAX_1
BraTS19_CBICA_AUX_1
BraTS19_CBICA_BGE_1
BraTS19_TMC_21360_1
BraTS19_CBICA_BEM_1
BraTS19_CBICA_BGN_1
BraTS19_CBICA_BAP_1
BraTS19_CBICA_AYG_1
BraTS19_CBICA_AWX_1
BraTS19_CBICA_APK_1
BraTS19_CBICA_ASF_1
BraTS19_CBICA_BJY_1
BraTS19_CBICA_AVF_1
BraTS19_CBICA_AUA_1
BraTS19_CBICA_BGO_1
BraTS19_CBICA_AOS_1
BraTS19_CBICA_BGR_1
BraTS19_CBICA_BLJ_1
BraTS19_TMC_15477_1
BraTS19_CBICA_ASR_1
BraTS19_CBICA_BAN_1
BraTS19_CBICA_BBG_1
BraTS19_CBICA_ANV_1
BraTS19_CBICA_BNR_1
BraTS19_CBICA_BHZ_1
BraTS19_CBICA_BGG_1
BraTS19_CBICA_ATN_1
BraTS19_CBICA_BCF_1
BraTS19_TMC_30014_1
BraTS19_CBICA_AVB_1
BraTS19_CBICA_BDK_1
BraTS19_CBICA_BCL_1
BraTS19_CBICA_BGT_1
BraTS19_CBICA_BIC_1
BraTS19_TMC_06290_1
BraTS19_TMC_27374_1
BraTS19_CBICA_AYC_1
BraTS19_CBICA_BHV_1
BraTS19_CBICA_AWV_1
BraTS19_CBICA_BHQ_1
BraTS19_TMC_12866_1
BraTS19_CBICA_BKV_1
BraTS19_TMC_06643_1
BraTS19_CBICA_AUW_1
BraTS19_CBICA_AVT_1
BraTS19_CBICA_BGW_1
BraTS19_CBICA_BGX_1
BraTS19_CBICA_AOC_1
Done！


In [47]:
for subsetindex in range(len(pathlgg_list)):
    brats_subset_path = bratslgg_path + "/" + (pathlgg_list[subsetindex]) + "/"
    #Get the four modes of each case and the path of the Mask
    flair_image = brats_subset_path + (pathlgg_list[subsetindex]) + flair_name
    t1_image = brats_subset_path + (pathlgg_list[subsetindex]) + t1_name
    t1ce_image = brats_subset_path + (pathlgg_list[subsetindex]) + t1ce_name
    t2_image = brats_subset_path + (pathlgg_list[subsetindex]) + t2_name
    mask_image = brats_subset_path + (pathlgg_list[subsetindex]) + mask_name
    #Get the four modalities and Mask data of each case
    flair_src = sitk.ReadImage(flair_image, sitk.sitkInt16)
    t1_src = sitk.ReadImage(t1_image, sitk.sitkInt16)
    t1ce_src = sitk.ReadImage(t1ce_image, sitk.sitkInt16)
    t2_src = sitk.ReadImage(t2_image, sitk.sitkInt16)
    mask = sitk.ReadImage(mask_image, sitk.sitkUInt8)
    #GetArrayFromImage() can change SimpleITK to ndarray
    flair_array = sitk.GetArrayFromImage(flair_src)
    t1_array = sitk.GetArrayFromImage(t1_src)
    t1ce_array = sitk.GetArrayFromImage(t1ce_src)
    t2_array = sitk.GetArrayFromImage(t2_src)
    mask_array = sitk.GetArrayFromImage(mask)
    #Normalize the four modes separately, because they have different contrasts
    flair_array_nor = normalize(flair_array)
    t1_array_nor = normalize(t1_array)
    t1ce_array_nor = normalize(t1ce_array)
    t2_array_nor = normalize(t2_array)
    #Crop
    flair_crop = crop_ceter(flair_array_nor,160,160)
    t1_crop = crop_ceter(t1_array_nor,160,160)
    t1ce_crop = crop_ceter(t1ce_array_nor,160,160)
    t2_crop = crop_ceter(t2_array_nor,160,160)
    mask_crop = crop_ceter(mask_array,160,160) 
    print((pathlgg_list[subsetindex]))
    #Slice - Remove slices without lesions
    for n_slice in range(flair_crop.shape[0]):
        if np.max(mask_crop[n_slice,:,:]) != 0:
            maskImg = mask_crop[n_slice,:,:]
            
            FourModelImageArray = np.zeros((flair_crop.shape[1],flair_crop.shape[2],4),np.float)
            flairImg = flair_crop[n_slice,:,:]
            flairImg = flairImg.astype(np.float)
            FourModelImageArray[:,:,0] = flairImg
            t1Img = t1_crop[n_slice,:,:]
            t1Img = t1Img.astype(np.float)
            FourModelImageArray[:,:,1] = t1Img
            t1ceImg = t1ce_crop[n_slice,:,:]
            t1ceImg = t1ceImg.astype(np.float)
            FourModelImageArray[:,:,2] = t1ceImg
            t2Img = t2_crop[n_slice,:,:]
            t2Img = t2Img.astype(np.float)
            FourModelImageArray[:,:,3] = t2Img       
        
            imagepath = outputImg_path + "//" + (pathlgg_list[subsetindex]) + "_" + str(n_slice) + ".npy"
            maskpath = outputMask_path + "//" + (pathlgg_list[subsetindex]) + "_" + str(n_slice) + ".npy"
            np.save(imagepath,FourModelImageArray)#(160,160,4) np.float dtype('float64')
            np.save(maskpath,maskImg)# (160, 160) dtype('uint8') value:0 1 2 4
print("Done！")

BraTS19_TMC_09043_1
Done！
