## Data Preparation

You should prepare the following things before running this step. 

1. **NIfTI images** of fixed CT
   - we are going to use it to simulate the noise-free ground truth and noisy thin-slice counterpart.

2. **A patient list** that emunarates these fixed CT.  
   - we have 100 fixed CT in this study.


---

## Task: Simulate noise-free ground truth and noisy thin-slice counterpart

- In this script, we start with any fixed CT scan. We first resample it to 5mm using averaging, which will be considered as **"noise-free ground truth"**. 

- Then we resample this ground truth into 0.625mm using interpolation, which will be considered as **"noise-free thin slice"**. We select middle 100 slices, otherwise the data is too big. The file is named as ```img_thinslice_partial.nii.gz```

- Lastly, we insert noise into thin slices (using two types of noises), which will be considered as **"noisy thin slice"**.

---

### Docker environment
Please use `docker/docker_tensorflow`, it will build a tensorflow docker


In [4]:
import sys
sys.path.append('/workspace/Documents')
# imports
import os, sys
import numpy as np 
import pandas as pd
import nibabel as nb
from skimage.measure import block_reduce

import CTDenoising_Diffusion_N2N.functions_collection as ff
import CTDenoising_Diffusion_N2N.Data_processing as Data_processing

data_path = '/mnt/camca_NAS/Portable_CT_data'
main_path = '/mnt/camca_NAS/denoising/'

### step 1: resample all fixed CT to 5mm using averaging, then interpolate to 0.625mm

In [6]:
# read patient list
patient_sheet = pd.read_excel(os.path.join(main_path,'Patient_lists', 'fixedCT_static.xlsx'),dtype={'Patient_ID': str, 'Patient_subID': str})
print('patient sheet len: ', len(patient_sheet))

for i in range(0, len(patient_sheet)):
    row = patient_sheet.iloc[i]
    patient_id = row['Patient_ID']
    patient_subID = row['Patient_subID']
    use = row['use']

    original_file = os.path.join(data_path,'nii_imgs_202404/static',patient_id,patient_subID,'fixed', use+'.nii.gz')
    
    # get the affine and pixel dimension
    img = nb.load(original_file)
    affine = img.affine
    pixdim = img.header.get_zooms()
    img_data = img.get_fdata()

    # define save folder
    save_folder = os.path.join(main_path, 'Data/fixedCT1', patient_id, patient_subID)
    ff.make_folder([os.path.join(main_path,'Data/fixedCT1'), os.path.join(main_path,'Data/fixedCT1', patient_id), os.path.join(main_path,'Data/fixedCT1', patient_id, patient_subID)])

    ######### first, resample slice thickness to 5mm using averaging
    z_scale_factor = int(5 // pixdim[2])
    print('z_scale_factor: ', z_scale_factor)
    img_data_xyz5mm = block_reduce(img_data, (1,1,z_scale_factor), np.mean)

    # change affine and pixel dimension accordingly
    new_affine_5mm = affine.copy()
    new_affine_5mm[2, 2] *= z_scale_factor
    new_pixdim_5mm = (pixdim[0],pixdim[1], pixdim[2]*z_scale_factor)
    # save in the header
    img.header.set_zooms(new_pixdim_5mm)

    # save the image
    save_file = os.path.join(save_folder, 'img_5mm.nii.gz')
    nb.save(nb.Nifti1Image(img_data_xyz5mm, new_affine_5mm, img.header), save_file)

    ######### second, resample slice thickness to 0.625mm using interpolation
    new_dim = [pixdim[0], pixdim[1], 0.625]

    img_5mm = nb.load(os.path.join(save_folder, 'img_5mm.nii.gz'))
    hr_resample = Data_processing.resample_nifti(img_5mm, order=1,  mode = 'nearest',  cval = np.min(img_5mm.get_fdata()), in_plane_resolution_mm=new_dim[0], slice_thickness_mm=new_dim[-1])
    nb.save(hr_resample, os.path.join(save_folder, 'img_thinslice.nii.gz'))

    ######### select middle 100 slices
    img_thinslice = nb.load(os.path.join(save_folder, 'img_thinslice.nii.gz'))
    img_thinslice_data = img_thinslice.get_fdata()[:,:,img_thinslice.shape[2]//2-50:img_thinslice.shape[2]//2+50]
    nb.save(nb.Nifti1Image(img_thinslice_data, img_thinslice.affine, img_thinslice.header), os.path.join(save_folder, 'img_thinslice_partial.nii.gz'))


### step 2: insert two types of noise
- type 1: Possion noise + hann filter
- type 2: Gaussian noise + soft tissue kernel