## Data Preparation

You should prepare the following things before running this step. 

1. **trained model** from ```step3.ipynb``` 

2. **A patient list** that emunarates the dataset 
   - check ```step2.ipynb```
   - for example data: as we said in previous steps, we are going to validate our models on type 2 noise. so please use  ```example_data/Patient_lists/patient_list_unsupervised_gaussian.xlsx``` for unsupervised learning.

3. bins for **histogram equalization**
    - provided in ```/help_data```

---

## Task: Inference

- first: we do inference for multiple times (section "step 5.prediction" in the script) 
- second: we take the average (section "step6.average" in the sript) to be the final product.

---

### Docker environment
Please use `docker/docker_pytorch`, it will build a pytorch docker


In [1]:
import sys
sys.path.append('/workspace/Documents')
import os
import torch
import numpy as np 
import nibabel as nb
import CTDenoising_Diffusion_N2N.denoising_diffusion_pytorch.denoising_diffusion_pytorch.conditional_diffusion as ddpm
import CTDenoising_Diffusion_N2N.functions_collection as ff
import CTDenoising_Diffusion_N2N.Build_lists.Build_list as Build_list
import CTDenoising_Diffusion_N2N.Generator as Generator

main_path = '/mnt/camca_NAS/denoising/' # replace with your own path

  from .autonotebook import tqdm as notebook_tqdm
  @autocast(enabled = False)


### step 1: define trial name and trained model file

In [2]:
supervision = 'unsupervised' # 'unsupervised' or 'supervised'
noise_type = 'possion' if supervision == 'supervised' else 'gaussian'
beta = 0 # by default

trial_name = 'model_'+supervision + '_' + noise_type + '_beta' + str(beta)
print(trial_name)

model_unsupervised_gaussian_beta0


In [8]:
epoch = 61
trained_model_filename = os.path.join(main_path,'models', trial_name, 'models/model-' + str(epoch) + '.pt')
save_folder = os.path.join(main_path,'models', trial_name,'pred_images')
os.makedirs(save_folder, exist_ok=True)

### step 2: set default parameters
usually you don't need to change

In [9]:
problem_dimension = '2D'
condition_channel = 1 if (supervision == 'supervised') or ('mean' in trial_name) else 2
image_size = [512,512]
num_patches_per_slice = 2
patch_size = [128,128]

objective = 'pred_x0'

sampling_timesteps = 100

histogram_equalization = True
background_cutoff = -1000
maximum_cutoff = 2000
normalize_factor = 'equation'
clip_range = [-1,1]

### step 3: define patient list
test on type 2 noise  (Gaussian noise)

In [10]:
build_sheet =  Build_list.Build(os.path.join(main_path, 'example_data/Patient_lists','patient_list_unsupervised_gaussian.xlsx'))
_,patient_id_list,patient_subid_list,random_num_list, condition_list, x0_list = build_sheet.__build__(batch_list = [0])  # for the purpose of example, we test on the same training case
# n = ff.get_X_numbers_in_interval(total_number = patient_id_list.shape[0],start_number = 0,end_number = 1, interval = 2) # each case has two simulations, we do on the first one as example
print('total number:', patient_id_list.shape[0])

total number: 1


### step 4: define model

In [11]:
model = ddpm.Unet(
    problem_dimension = problem_dimension,
    init_dim = 64,
    out_dim = 1,
    channels = 1, 
    conditional_diffusion = True,
    condition_channels = condition_channel,

    downsample_list = (True, True, True, False),
    upsample_list = (True, True, True, False),
    full_attn = (None, None, False, True),) # make sure these settings are the same as step 3 training


diffusion_model = ddpm.GaussianDiffusion(
    model,
    image_size = image_size,
    timesteps = 1000,           # number of steps
    sampling_timesteps = sampling_timesteps,    
    ddim_sampling_eta = 1., # don't change!!
    force_ddim = False,
    auto_normalize=False,
    objective = objective,
    clip_or_not = True, 
    clip_range = clip_range, )

is ddim sampling True


### step 5: Prediction (doing inference for multiple times)
to minimize data storage, we only evaluated on the middle 50 slices (slice 30 - 80)

In [13]:
slice_range = [30,80] # the range of slices to be used
inference_times = 20

for i in range(0,patient_id_list.shape[0]):
    patient_id = patient_id_list[i]
    patient_subid = patient_subid_list[i]
    random_num = random_num_list[i]
    x0_file = x0_list[i]
    condition_file = condition_list[i]

    print(i,patient_id, patient_subid, random_num)

    # get the condition image (original noisy image)
    print('condition_file:', condition_file, 'shape: ', nb.load(condition_file).get_fdata().shape)
    condition_img = nb.load(condition_file).get_fdata()[:,:,slice_range[0]:slice_range[1]]
    affine = nb.load(condition_file).affine
    shape = condition_img.shape

    # get the ground truth image (if no ground truth image (in the real-world cases), just comment out the following lines)
    gt_img = nb.load(x0_file)
    print('x0_file:', x0_file, 'shape:', gt_img.get_fdata().shape)
    gt_img = gt_img.get_fdata()[:,:,slice_range[0]:slice_range[1]]

    for iteration in range(1,1+inference_times):
        print('iteration:', iteration)

        # make folders
        ff.make_folder([os.path.join(save_folder, patient_id), os.path.join(save_folder, patient_id, patient_subid), os.path.join(save_folder, patient_id, patient_subid, 'random_' + str(random_num))])
        save_folder_case = os.path.join(save_folder, patient_id, patient_subid, 'random_' + str(random_num), 'epoch' + str(epoch)+'_'+str(iteration))
        os.makedirs(save_folder_case, exist_ok=True)

        if os.path.isfile(os.path.join(save_folder_case, 'pred_img.nii.gz')):
            print('already done')
            continue

        # generator
        generator = Generator.Dataset_2D(
            supervision = supervision,

            img_list = np.array([x0_file]),
            condition_list = np.array([condition_file]),
            image_size = image_size,

            num_slices_per_image = slice_range[1] - slice_range[0],
            random_pick_slice = False,
            slice_range = [slice_range[0], slice_range[1]],

            histogram_equalization = histogram_equalization,
            bins = np.load('./help_data/histogram_equalization/bins.npy'),
            bins_mapped = np.load('./help_data/histogram_equalization/bins_mapped.npy'),
                
            background_cutoff = background_cutoff,
            maximum_cutoff = maximum_cutoff,
            normalize_factor = normalize_factor,)

        # sample:
        sampler = ddpm.Sampler(diffusion_model,generator,batch_size = 1)

        pred_img = sampler.sample_2D(trained_model_filename, condition_img)
        print(pred_img.shape)
    
        # save
        nb.save(nb.Nifti1Image(pred_img, affine), os.path.join(save_folder_case, 'pred_img.nii.gz'))

        if iteration == 1:
            nb.save(nb.Nifti1Image(condition_img, affine), os.path.join(save_folder_case, 'condition_img.nii.gz'))
            nb.save(nb.Nifti1Image(gt_img, affine), os.path.join(save_folder_case, 'gt_img.nii.gz'))
            
       

0 00004038 0000455420 0


condition_file: /mnt/camca_NAS/denoising/example_data/simulation/00004038/0000455420/gaussian_random_0/recon.nii.gz shape:  (512, 512, 100)
x0_file: /mnt/camca_NAS/denoising/example_data/fixedCT/00004038/0000455420/img_thinslice_partial.nii.gz shape: (512, 512, 100)
iteration: 1
histogram equalization:  True
model device:  cuda:0
using DDIM, eta:  1.0


sampling loop time step: 100%|██████████| 100/100 [00:06<00:00, 14.65it/s]


pred_img_slice shape:  (512, 512)
using DDIM, eta:  1.0


sampling loop time step: 100%|██████████| 100/100 [00:06<00:00, 14.71it/s]


pred_img_slice shape:  (512, 512)
final image shape:  (512, 512, 2)
(512, 512, 2)
iteration: 2
histogram equalization:  True
model device:  cuda:0
using DDIM, eta:  1.0


sampling loop time step: 100%|██████████| 100/100 [00:06<00:00, 14.68it/s]


pred_img_slice shape:  (512, 512)
using DDIM, eta:  1.0


sampling loop time step: 100%|██████████| 100/100 [00:06<00:00, 14.72it/s]


pred_img_slice shape:  (512, 512)
final image shape:  (512, 512, 2)
(512, 512, 2)


### step 6: average the results of multiple inferences

In [16]:
slice_range = [30,80] # the range of slices to be used
inference_avg_scans = [10,20] # avg 10 or 20 inference results

for i in range(0,patient_id_list.shape[0]):
    patient_id = patient_id_list[i]
    patient_subid = patient_subid_list[i]
    random_num = random_num_list[i]
    x0_file = x0_list[i]
    condition_file = condition_list[i]

    print(i,patient_id, patient_subid, random_num)

    save_folder_avg = os.path.join(save_folder, patient_id, patient_subid, 'random_' + str(random_num), 'epoch' + str(epoch)+'avg'); os.makedirs(save_folder_avg, exist_ok=True)

    # get the condition image (original noisy image)
    print('condition_file:', condition_file, 'shape: ', nb.load(condition_file).get_fdata().shape)
    condition_img = nb.load(condition_file).get_fdata()[:,:,slice_range[0]:slice_range[1]]
    affine = nb.load(condition_file).affine
    shape = condition_img.shape
        
    made_predicts = ff.sort_timeframe(ff.find_all_target_files(['epoch' + str(epoch)+'_*'], os.path.join(save_folder, patient_id, patient_subid, 'random_' + str(random_num))),0,'_','/')
    print(made_predicts)
    total_predicts = len(made_predicts)

    loaded_data = np.zeros((shape[0], shape[1], shape[2], total_predicts))
    for j in range(total_predicts):
        loaded_data[:,:,:,j] = nb.load(os.path.join(made_predicts[j],'pred_img.nii.gz')).get_fdata()

    for avg_num in inference_avg_scans:
        print('avg_num:', avg_num)
        predicts_avg = np.zeros((shape[0], shape[1], shape[2], avg_num))
        print('predict_num:', avg_num)
        for j in range(avg_num):
            print('file:', made_predicts[j])
            predicts_avg[:,:,:,j] = loaded_data[:,:,:,j]
        # average across last axis
        predicts_avg = np.mean(predicts_avg, axis = -1)
        nb.save(nb.Nifti1Image(predicts_avg, affine), os.path.join(save_folder_avg, 'pred_img_scans' + str(avg_num) + '.nii.gz'))

0 00004038 0000455420 0


condition_file: /mnt/camca_NAS/denoising/example_data/simulation/00004038/0000455420/gaussian_random_0/recon.nii.gz shape:  (512, 512, 100)
['/mnt/camca_NAS/denoising/models/model_unsupervised_gaussian_beta0/pred_images/00004038/0000455420/random_0/epoch61_1'
 '/mnt/camca_NAS/denoising/models/model_unsupervised_gaussian_beta0/pred_images/00004038/0000455420/random_0/epoch61_2']
avg_num: 2
predict_num: 2
file: /mnt/camca_NAS/denoising/models/model_unsupervised_gaussian_beta0/pred_images/00004038/0000455420/random_0/epoch61_1
file: /mnt/camca_NAS/denoising/models/model_unsupervised_gaussian_beta0/pred_images/00004038/0000455420/random_0/epoch61_2
