# Create demean and unit variance data and save in h5 (w/ sliding window)

In [12]:
import os
import numpy as np
import nibabel as nib
from sklearn import preprocessing 
import h5py
from scipy.misc import imresize
import pandas as pd


def sliding_window(image, step, windowSize):
    image = np.asarray(image)
    for y in range(0, image.shape[0], step):
        for x in range(0, image.shape[1], step):
            yield (x, y, image[y:y+windowSize[1], x:x+windowSize[0]])
            
            
directory = '/run/user/1000/gvfs/smb-share:server=192.168.200.1,share=mri'
step = 20
windowSize = (64, 64)

mri_df = pd.read_csv(os.path.join(directory, 'MRI.csv'))
imgList = mri_df.image.tolist()
gtList = mri_df.target.tolist()
filenames = []
images = np.empty([1, windowSize[0], windowSize[1]])
targets = np.empty([1, windowSize[0], windowSize[1]], dtype=int)

# This is for 20180416 data(chest data are in delList)
delList = [1 ,5, 8, 14, 20, 21]
for i, f in enumerate(imgList):
    if i not in delList:
        # Get gt and image file 
        img_data = nib.load(os.path.join(directory, 'MRI', f)).get_data()
        gt_data = nib.load(os.path.join(directory, 'MRI', gtList[i])).get_data()

        # slice every row from top to bottom
        for d in range(gt_data.shape[2]):
            target = gt_data[:, :, d]
            image = img_data[:, :, d]
            scaler = preprocessing.StandardScaler().fit(image)
            image_norm = scaler.transform(image)
            if target.sum() > 0:
                windowCount = 0
                for (x, y, gt_patch) in sliding_window(target, step=step, windowSize=windowSize):
                    if gt_patch.sum() > 200:
                        window_filename = '{:0>4d}_{:0>3d}_{:0>5d}'.format(i, d, windowCount)
                        filenames.append(window_filename)
                        img_patch = image_norm[y:y+windowSize[1], x:x+windowSize[0]]
                        img_patch = np.expand_dims(img_patch, axis=0)
                        gt_patch = np.expand_dims(gt_patch, axis=0)
                        images = np.concatenate((images, img_patch), axis=0)
                        targets = np.concatenate((targets, gt_patch), axis=0)
                        windowCount += 1

images = images[1:, :, :]
targets = targets[1:, :, :]
filenames = [f.encode('utf8') for f in filenames]

hf = h5py.File('data.h5', 'w')
hf.create_dataset('filenames', data=filenames)
hf.create_dataset('images', data=images)
hf.create_dataset('targets', data=targets)
hf.close()



# Create demean and unit variance data and save in h5 (w/o sliding window)

In [2]:
import os
import numpy as np
import nibabel as nib
from sklearn import preprocessing 
import h5py
from scipy.misc import imresize
import pandas as pd
          
directory = '/run/user/1000/gvfs/smb-share:server=192.168.200.1,share=mri'

mri_df = pd.read_csv(os.path.join(directory, 'MRI.csv'))
imgList = mri_df.image.tolist()
gtList = mri_df.target.tolist()
filenames = []
imSize = (256, 256)

# Arrays used for stacking images
images = np.empty([1, imSize[0], imSize[1]])
image_norms = np.empty([1, imSize[0], imSize[1]])
targets = np.empty([1, imSize[0], imSize[1]], dtype=int)

# This is for 20180416 data(chest data are in delList)
delList = [1 ,5, 8, 14, 20, 21]
for i, f in enumerate(imgList):
    if i not in delList:
        print (f)
        # Get gt and image file 
        img_data = nib.load(os.path.join(directory, 'MRI', f)).get_data()
        gt_data = nib.load(os.path.join(directory, 'MRI', gtList[i])).get_data()
        
        # slice every row from top to bottom
        for d in range(gt_data.shape[2]):
            # Read slice and resize every slice to imSize
            target = gt_data[:, :, d]
            image = img_data[:, :, d]
            image = imresize(image, imSize)
            target = imresize(target, imSize, 'nearest', mode='F')
            
            # Demean and unit variance
            scaler = preprocessing.StandardScaler().fit(image)
            image_norm = scaler.transform(image)

            # Write to numpy array
            image = np.expand_dims(image, axis=0)
            image_norm = np.expand_dims(image_norm, axis=0)
            target = np.expand_dims(target, axis=0)
            
            images = np.concatenate((images, image), axis=0)
            image_norms = np.concatenate((image_norms, image_norm), axis=0)
            targets = np.concatenate((targets, target), axis=0)
            
            # Record slice filename
            im_filename = '{:0>4d}_{:0>3d}'.format(i, d)
            filenames.append(im_filename)

images = images[1:, :, :]
image_norms = image_norms[1:, :, :]
targets = targets[1:, :, :]
filenames = [f.encode('utf8') for f in filenames]

hf = h5py.File('data/data.h5', 'w')
hf.create_dataset('filenames', data=filenames)
hf.create_dataset('images', data=images)
hf.create_dataset('image_norms', data=image_norms)
hf.create_dataset('targets', data=targets)
hf.close()

case2015.03.30.13.36.40_CHIU_20150330_11527_002_1_21_CYBER_KNIFE.nii


`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.


case2016.11.04.13.54.03_CHANG_20161104_1631_005_1_21_CYBER_KNIFE_C+.nii
case2011.04.18.13.08.02_YANG_20110418_4663_002_1_21_CYBER_KNIFE.nii
case2015.06.16.13.40.29_TSAI_20150616_16261_004_1_21_CYBER_KNIFE.nii
case2010.07.26.13.18.50_SOUNG_20100726_46207_005_1_21_CYBER_KNIFE_C+.nii
case2011.01.18.13.31.24_CHEN_20110118_1902_004_1_21_CYBER_KNIFE_C+.nii
case2010.02.08.11.30.30_JANG_20100208_28496_003_CYBERKNIFE__2_AX_T1-2MM+C.nii
case2011.01.18.13.03.31_CHEN_20110118_34619_004_0517349__AX_T1_+C.nii
case2015.03.30.13.37.46_CHIU_20150330_11527_004_1_21_CYBER_KNIFE.nii
case2010.02.08.11.10.59_JANG_20100208_28496_002_CYBERKNIFE_4_3D_FIESTA-AX.nii
case2011.01.18.13.27.12_CHEN_20110118_1902_002_1_21_CYBER_KNIFE.nii
case2011.04.18.12.31.23_YANG_20110418_36224_002_CYBERKNIFE__3D_FIESTA-C.nii
case2016.11.04.13.52.45_CHANG_20161104_1631_002_1_21_CYBER_KNIFE.nii
case2009.03.04.13.18.10_LO_20090304_29218_002_1_21_CYBER_KNIFE_C+.nii
case2011.01.18.12.58.13_CHEN_20110118_34619_003_0517349__AX_T1.nii
ca

# 1/5 data for validation, and 4/5 for training 

In [7]:
import h5py


hf = h5py.File('data/data.h5', 'r')
filenames = hf.get('filenames')
images = hf.get('images')
image_norms = hf.get('image_norms')
targets = hf.get('targets')

valNum = int(len(filenames)/5)

hf_train = h5py.File('data/data-train.h5', 'w')
hf_train.create_dataset('filenames',   data=filenames[valNum:])
hf_train.create_dataset('images',      data=images[valNum:, :, :])
hf_train.create_dataset('image_norms', data=image_norms[valNum:, :, :])
hf_train.create_dataset('targets',     data=targets[valNum:, :, :])
hf_train.close()

hf_val = h5py.File('data/data-val.h5', 'w')
hf_val.create_dataset('filenames',   data=filenames[:valNum])
hf_val.create_dataset('images',      data=images[:valNum, :, :])
hf_val.create_dataset('image_norms', data=image_norms[:valNum, :, :])
hf_val.create_dataset('targets',     data=targets[:valNum, :, :])
hf_val.close()

hf.close()

In [10]:
import h5py
import os

num = 0
split = ['train', 'val']
for s in split:
    hf = h5py.File(os.path.join(os.getcwd(), 'data', 'data-' + s + '.h5'), 'r')
    targets = hf['targets'][:]
    num += targets.shape[0]
    print ('Size of {} data: {}'.format(s, targets.shape))

print ('Total num: {}'.format(num))

Size of train data: (2492, 256, 256)
Size of val data: (622, 256, 256)
Total num: 3114


In [None]:
# # ===========================            
# directory = '/run/user/1000/gvfs/smb-share:server=192.168.200.1,share=mri'
# file_name = 'case2009.03.04.13.18.10_LO_20090304_29218_002_1_21_CYBER_KNIFE_C+.nii'
# nii_file = os.path.join(directory, 'MRI', file_name)
# img = nib.load(nii_file)
# # print (img.header)
# img_data = img.get_data()[:, :, 128]
# scaler = preprocessing.StandardScaler().fit(img_data)
# # print (scaler.mean_)
# # print (scaler.scale_)
# img_data_norm = scaler.transform(img_data)
# img_data = Image.fromarray(img_data).convert('L')
# img_data.save(os.path.join(directory, 'test.png'))

# # print (img_data_norm.mean())
# # print (img_data_norm.std())
# print (img_data_norm)
# img_data_norm = Image.fromarray(img_data_norm).convert('L')
# img_data_norm.save(os.path.join(directory, 'test_norm.png'))
# ppimage = Image.open(os.path.join(directory, 'test_norm.png'))
# print (ppimage.getdata())