In [11]:
import sys
sys.path.append('/host/d/Github')
import os
from dipy.align.reslice import reslice
import numpy as np
import nibabel as nb
import pandas as pd
import CT_registration_diffusion.Data_processing as Data_processing
import CT_registration_diffusion.functions_collection as ff

## step 1: rename original file

In [15]:
dataset = 'Popi'
data_path = os.path.join('/host/d/Data/4DCT',dataset)
print('data path: ', data_path)

case_list = ff.sort_timeframe(ff.find_all_target_files(['C*'],data_path),0,'e')
print('case_list:', case_list, 'how many cases:', len(case_list))

data path:  /host/d/Data/4DCT/Popi
case_list: ['/host/d/Data/4DCT/Popi/Case1' '/host/d/Data/4DCT/Popi/Case2'
 '/host/d/Data/4DCT/Popi/Case3' '/host/d/Data/4DCT/Popi/Case4'
 '/host/d/Data/4DCT/Popi/Case5' '/host/d/Data/4DCT/Popi/Case6'] how many cases: 6


In [16]:
for case_num in range(len(case_list)):
    case_path = case_list[case_num]
    print('processing case: ', case_path)

    case_num = ff.find_timeframe(case_path,0,'e')
    print('this is case number: ', case_num)

    original_image_folder = os.path.join(case_path, 'original_image')

    # total_timeframes = len(ff.find_all_target_files(['ca*'],original_image_folder)) # this is for DIR-LAB
    total_timeframes = len(ff.find_all_target_files(['img_*'],original_image_folder)) # this is for Popi
    print('total timeframes: ', total_timeframes)

    for t in range(total_timeframes):
        # print('processing timeframe: ', t)
        if os.path.isfile(os.path.join(original_image_folder, 'img_'+str(t)+'.nii.gz')):
            print('file img_'+str(t)+'.nii.gz already exists, skip renaming')
            continue

        # original_file_name = 'case'+str(case_num)+'_T'+str(t)+'0_s.nii.gz' # this is for DIR-LAB
        original_file_name = 'img_'+str(t)+'0.nii.gz'  # this is for Popi
        new_file_name = 'img_'+str(t)+'.nii.gz'
        # print('renaming file: ', original_file_name, ' to ', new_file_name)

        # rename
        os.rename(os.path.join(original_image_folder, original_file_name),
                  os.path.join(original_image_folder, new_file_name))


processing case:  /host/d/Data/4DCT/Popi/Case1
this is case number:  1
total timeframes:  10
processing case:  /host/d/Data/4DCT/Popi/Case2
this is case number:  2
total timeframes:  10
processing case:  /host/d/Data/4DCT/Popi/Case3
this is case number:  3
total timeframes:  10
processing case:  /host/d/Data/4DCT/Popi/Case4
this is case number:  4
total timeframes:  10
processing case:  /host/d/Data/4DCT/Popi/Case5
this is case number:  5
total timeframes:  10
processing case:  /host/d/Data/4DCT/Popi/Case6
this is case number:  6
total timeframes:  10


## Step 2: Process image

### step a: put the original image into correct data range by -1024

In [7]:
datasets = ['DIR_LAB','Popi']

for dataset in datasets:
    data_path = os.path.join('/host/d/Data/4DCT',dataset)

    case_list = ff.sort_timeframe(ff.find_all_target_files(['C*'],data_path),0,'e')
    print('case_list:', case_list, 'how many cases:', len(case_list))

    for case_num in range(len(case_list)):
        case_path = case_list[case_num]

        case_num = ff.find_timeframe(case_path,0,'e')

        original_image_folder = os.path.join(case_path, 'original_image')

        total_timeframes = len(ff.find_all_target_files(['img*'],original_image_folder)) 
        print('total timeframes: ', total_timeframes)

        for t in range(total_timeframes):
            # print('processing timeframe: ', t)
            filename = os.path.join(original_image_folder, 'img_'+str(t)+'.nii.gz')
            img_file = nb.load(filename)
            affine = img_file.affine
            header = img_file.header
            img_data = img_file.get_fdata()
            if np.min(img_data)<=-500:
              
                print('image data already in correct range, skip adjustment for file: ', filename)
                continue
            else:
                img_data = img_data - 1024
                new_img = nb.Nifti1Image(img_data, affine, header)
                nb.save(new_img, filename)


case_list: ['/host/d/Data/4DCT/Popi/Case1' '/host/d/Data/4DCT/Popi/Case2'
 '/host/d/Data/4DCT/Popi/Case3' '/host/d/Data/4DCT/Popi/Case4'
 '/host/d/Data/4DCT/Popi/Case5' '/host/d/Data/4DCT/Popi/Case6'] how many cases: 6
total timeframes:  10
after adjustment, data min value: -1024.0  max value: 2975.9999990686774
after adjustment, data min value: -1024.0  max value: 2975.9999990686774
after adjustment, data min value: -1024.0  max value: 2975.9999990686774
after adjustment, data min value: -1024.0  max value: 2975.9999990686774
after adjustment, data min value: -1024.0  max value: 2975.9999990686774
after adjustment, data min value: -1024.0  max value: 2975.9999990686774
after adjustment, data min value: -1024.0  max value: 2975.9999990686774
after adjustment, data min value: -1024.0  max value: 2975.9999990686774
after adjustment, data min value: -1024.0  max value: 2975.9999990686774
after adjustment, data min value: -1024.0  max value: 2975.9999990686774
total timeframes:  10
after a

### step a2: flip the z-axis of some cases so that all data has the same order of slices
####### only need to do once

In [None]:
# datasets = ['Popi']

# for dataset in datasets:
#     data_path = os.path.join('/host/d/Data/4DCT',dataset)

#     case_list = ff.sort_timeframe(ff.find_all_target_files(['Case*'],data_path),0,'e')
#     print('case_list:', case_list, 'how many cases:', len(case_list))

#     for case_num in range(len(case_list)):
#         case_path = case_list[case_num]
#         print('processing case: ', case_path)
#         case_id = os.path.basename(case_path)


#         # fine how many time frames
#         total_timeframes = len(ff.find_all_target_files(['img_*'],os.path.join(case_path, 'original_image')))
#         print('total timeframes: ', total_timeframes)

#         for t in range(0,total_timeframes):
            
#             image_file = os.path.join(case_path, 'original_image', 'img_'+str(t)+'.nii.gz')
#             print('processing image file: ', image_file)

#             # load resampled image
#             nii_img = nb.load(image_file)
#             affine = nii_img.affine
#             header = nii_img.header
#             nii_image_data = nii_img.get_fdata()

#             # flip the z axis
#             nii_image_data = nii_image_data[:,:,::-1]
#             # save the flipped image
#             new_nii_img = nb.Nifti1Image(nii_image_data, affine, header)
#             nb.save(new_nii_img, image_file)


### step b. resample to an uniform pixel dimension, default = [1.5, 1.5, 2.5]

In [None]:
case_shape_list = []

datasets = ['DIR_LAB','Popi']

for dataset in datasets:
    data_path = os.path.join('/host/d/Data/4DCT',dataset)

    case_list = ff.sort_timeframe(ff.find_all_target_files(['C*'],data_path),0,'e')
    print('case_list:', case_list, 'how many cases:', len(case_list))

    for case_num in range(len(case_list)):
        case_path = case_list[case_num]
        print('processing case: ', case_path)
        case_id = os.path.basename(case_path)

        # define save folder
        resampled_folder = os.path.join(case_path, 'resampled_image')
        ff.make_folder([resampled_folder])

        # fine how many time frames
        total_timeframes = len(ff.find_all_target_files(['img_*'],os.path.join(case_path, 'original_image')))
        print('total timeframes: ', total_timeframes)

        for t in range(total_timeframes):
            original_image_file = os.path.join(case_path, 'original_image', 'img_'+str(t)+'.nii.gz')

            # load
            nii_img = nb.load(original_image_file)
            affine = nii_img.affine
            # get current pixel dimension
            pixdim = nii_img.header.get_zooms()

            # resample to [1.5, 1.5, 2.5] mm
            new_dim = [1.5,1.5,2.5]
            nii_img_resampled = Data_processing.resample_nifti(nii_img, order=3, mode='nearest', cval=np.min(nii_img.get_fdata()), in_plane_resolution_mm=new_dim[0], slice_thickness_mm=new_dim[-1])
        
            # turn image from float to int
            data_resampled = nii_img_resampled.get_fdata()
            data_resampled = np.round(data_resampled).astype(np.int16)
            nii_img_resampled = nb.Nifti1Image(data_resampled, nii_img_resampled.affine, nii_img_resampled.header)

            # record the shape
            # image_shape = nii_img_resampled.shape
            # if t == 0:
            #     case_shape_list.append([dataset, case_id, image_shape])
            #     save_excel_folder = os.path.join('/host/d/Data/4DCT/Patient_lists')
            #     ff.make_folder([save_excel_folder])
            #     df = pd.DataFrame(case_shape_list, columns=['dataset_ID','case_ID','resampled_image_shape'])
            #     df.to_excel(os.path.join(save_excel_folder, 'resampled_image_shape_summary.xlsx'), index=False)


            # save resampled image
            resampled_image_file = os.path.join(resampled_folder, 'img_'+str(t)+'.nii.gz')
            nb.save(nii_img_resampled, resampled_image_file)
            print('resampled image saved to: ', resampled_image_file)

          
    

### step c. crop or pad to an uniform dimension, this step needs manual definition of crop/pad operations (saved in resampled_image_shape_summary.xlsx) to ensure the lung coverage

In [None]:
crop_info = pd.read_excel('/host/d/Data/4DCT/Patient_lists/resampled_image_shape_summary.xlsx')
target_size = [224,224, 96]

datasets = ['DIR_LAB','Popi']

for dataset in datasets:
    data_path = os.path.join('/host/d/Data/4DCT',dataset)

    case_list = ff.sort_timeframe(ff.find_all_target_files(['Case*'],data_path),0,'e')
    print('case_list:', case_list, 'how many cases:', len(case_list))

    for case_num in range(len(case_list)):
        case_path = case_list[case_num]
        print('processing case: ', case_path)
        case_id = os.path.basename(case_path)

        # define save folder
        cropped_folder = os.path.join(case_path, 'cropped_image')
        ff.make_folder([resampled_folder])

        # find corresponding crop info
        row = crop_info[(crop_info['dataset_ID']==dataset) & (crop_info['case_ID']==case_id)].iloc[0]

        # fine how many time frames
        total_timeframes = len(ff.find_all_target_files(['img_*'],os.path.join(case_path, 'resampled_image')))
        print('total timeframes: ', total_timeframes)

        for t in range(0,total_timeframes):
            resampled_image_file = os.path.join(case_path, 'resampled_image', 'img_'+str(t)+'.nii.gz')

    
            # load resampled image
            nii_img_resampled = nb.load(resampled_image_file)
            affine = nii_img_resampled.affine
            header = nii_img_resampled.header
            nii_img_resampled = nii_img_resampled.get_fdata()

            if row['xy_pad'] == 'yes':
                nii_image_cropped = Data_processing.crop_or_pad(nii_img_resampled, [target_size[0], target_size[1], nii_img_resampled.shape[2]], np.min(nii_img_resampled))
            elif row['xy_pad'] == 'no':
                x_center = row['x_center']
                y_center = row['y_center']
                x_start = max(0, int(x_center - target_size[0]//2))
                y_start = max(0, int(y_center - target_size[1]//2))
                nii_image_cropped = nii_img_resampled[x_start:x_start+target_size[0], y_start:y_start+target_size[1], :]
              
            
            if row['z_slice_start'] == 'pad':
                nii_image_cropped = Data_processing.crop_or_pad(nii_img_resampled, [target_size[0], target_size[1], target_size[2]], np.min(nii_img_resampled))
            else:
                nii_image_cropped = nii_image_cropped[:, :, int(row['z_slice_start']):int(row['z_slice_start'])+target_size[2]]

            nb.save(nb.Nifti1Image(np.round(nii_image_cropped).astype(np.int16), affine, header),
                    os.path.join(cropped_folder, 'img_'+str(t)+'.nii.gz'))
            

       
    