In [None]:
import os
import pandas as pd
import pydicom
import h5py
import numpy as np
from scipy import ndimage
import math
import matplotlib.pyplot as plt 

from Functions.data_import import *

### Import File with paths to images

In [None]:
PATH_IMAGE_INFO = '~/file_paths_tof.csv'

In [None]:
data_info = pd.read_csv(PATH_IMAGE_INFO)
names = ['pat_id','path', 'sequence', 'thickness']
data_info.columns = names
data_info['pat_id'] = data_info['pat_id'].astype(int)
data_info.head()

In [None]:
patient_list = list(set(data_info.pat_id))
len(patient_list) # how many patients do we have?

### Import Labels for patients

In [None]:
path = "~/patids_outcome.xlsx"
patids_outcome = pd.read_excel(path)
patids_outcome.head()

In [None]:
patids_outcome['outcome_groups'].value_counts()

In [None]:
len(patids_outcome.p_id)

### Create HDF5 File

In [None]:
root_dir = "D:\Pat_"
def get_paths_tof(pat_id):
    DIR = root_dir+str(pat_id)+"\Pat"+str(pat_id)+"_TOF"
    folder = os.listdir(DIR)[0]
    DIR = DIR+"\\"+folder
    folder = os.listdir(DIR)[0]
    DIR = DIR+"\\"+folder
    folder = os.listdir(DIR)[0]
    DIR = DIR+"\\"+folder
        
    return(DIR)

In [None]:
IMAGE_DIMENSIONS_TOF = (128, 112, 40) #format for images 
PATH_3D_H5 = 'D:\\tof_data_128_112_40_outcome.hdf5'

In [None]:
first_patient = True

if os.path.exists(PATH_3D_H5):
    os.remove(PATH_3D_H5)
    
patient_list = list(patids_outcome.p_id)

with h5py.File(PATH_3D_H5, 'a') as f:
    for patient_number in patient_list:
        patient_data = data_info[data_info.pat_id == patient_number].copy()
        directory = get_paths_tof(patient_number)

        #image to array
        raw_3d_image = image_to_array(patient_number)
        
        #remove images from z-axis
        #array_sliced = remove_slices_zaxis(raw_3d_image)

        #resize image
        scaled_3d_image = scale_array_3D(raw_3d_image, IMAGE_DIMENSIONS_TOF)

        # Image matrices
        X = scaled_3d_image[np.newaxis, :, :, :]

        # Patient ID's
        pat = format(patient_number, '03d')
        pat = np.string_([pat])

        #path to file with all images for patient
        path = np.string_([directory]) 

        #label 0 = good outcome, label 1 = bad outcome
        Y_pat = patids_outcome.outcome_groups.loc[patids_outcome.p_id == patient_number].values
        Y = np.array(Y_pat)

        ## write to h5py sequentially
        ms = [id for id in IMAGE_DIMENSIONS_TOF]
        ms.insert(0, None)
        ms = tuple(ms)

        if first_patient: ## initialize dataset
            f.create_dataset('X', data = X, maxshape = ms, chunks = True)
            f.create_dataset('stroke', data = Y_pat, maxshape = (None,), chunks = True)
            f.create_dataset('pat', data = pat, maxshape = (None,), chunks = True)
            f.create_dataset('path', data = path, maxshape = (None,), chunks = True)
            first_patient = False

        else: ## append dataset
            f['X'].resize((f['X'].shape[0] + X.shape[0]), axis = 0)
            f['X'][-X.shape[0]:, :, :, :] = X
            f['stroke'].resize((f['stroke'].shape[0] + Y_pat.shape[0]), axis = 0)
            f['stroke'][-Y_pat.shape[0]:] = Y_pat
            f['pat'].resize((f['pat'].shape[0] + pat.shape[0]), axis = 0)
            f['pat'][-pat.shape[0]:] = pat
            f['path'].resize((f['path'].shape[0] + path.shape[0]), axis = 0)
            f['path'][-path.shape[0]:] = path

In [None]:
f.close()

### Checking HDF5 File

In [None]:
dd = h5py.File(PATH_3D_H5, 'r')
list(dd.keys())

In [None]:
X = dd['X']
np.max(X)

In [None]:
patients = [p.decode() for p in dd['pat']]
len(patients)

In [None]:
path = [p for p in dd['path']]
path[0:10]

In [None]:
labels = [p for p in dd['stroke']]
labels

In [None]:
dd.close()