Training, Validation and testing dataset were labeled by batch manually

In [2]:
import sys
sys.path.append('/host/d/Github/')
import os
import numpy as np
import pandas as pd
import nibabel as nb
import Diffusion_denoising_thin_slice.functions_collection as ff

In [14]:
patient_sheet = ff.find_all_target_files(['*'],os.path.join('/host/d/Data/NYU_MR/multicoil_train/ref'))
print('total patient num: ', len(patient_sheet))

total patient num:  50


In [8]:
# shuffle these patients
np.random.seed(42)
shuffled_indices = np.random.permutation(len(patient_sheet))
patient_sheet = [patient_sheet[i] for i in shuffled_indices]

In [9]:
# 
results = []
for i in range(0, len(patient_sheet)):
    patient_id = os.path.basename(patient_sheet[i])
    # first 35 are train, next 5 are val, last 10 are test   
    if i < 35:
        batch = 'train'
    elif i<40 and i>=35:
        batch = 'val'
    else:
        batch = 'test'
    results.append([batch, patient_id])
df = pd.DataFrame(results, columns=['batch','Patient_ID'])
ff.make_folder(['/host/d/Data/NYU_MR/Patient_lists'])
save_path = os.path.join('/host/d/Data/NYU_MR/Patient_lists','NYU_MR_batched.xlsx')
df.to_excel(save_path, index=False)

# build list for simulations

In [4]:
patient_sheet = pd.read_excel(os.path.join('/host/d/Data/NYU_MR/Patient_lists/NYU_MR_batched.xlsx'),dtype={'Patient_ID': str})
simulation_num = 1

data_path = '/host/d/Data/NYU_MR/multicoil_train'


results = []
for i in range(0, len(patient_sheet)):
    patient_id = patient_sheet['Patient_ID'][i]
    batch = patient_sheet['batch'][i]
    print(f"Processing patient {patient_id} in batch {batch}")


    ground_truth_file = os.path.join(data_path,'ref', patient_id, 'img.nii.gz')
    img = nb.load(ground_truth_file).get_fdata()
    print('ground truth shape:', img.shape)
    slice_num = img.shape[0]
 
    max_value = np.max(img)
    min_value = np.min(img)
    print('ground truth max and min value:', max_value, min_value)

    x_dim, y_dim = img.shape[1], img.shape[2]


    for n in range(0,simulation_num):
        simulation_file_all =''
        simulation_file_odd = os.path.join(data_path,'undersample_8', patient_id, 'random_' + str(n), 'recon1', 'img.nii.gz')
        simulation_file_even = os.path.join(data_path,'undersample_8', patient_id, 'random_' + str(n), 'recon2', 'img.nii.gz')
            

        results.append([batch,patient_id, n, simulation_file_all, simulation_file_odd, simulation_file_even, ground_truth_file, slice_num, x_dim, y_dim, max_value, min_value])

        df = pd.DataFrame(results, columns=['batch','Patient_ID', 'random_num', 'simulation_file_all','simulation_file_odd', 'simulation_file_even', 'ground_truth_file', 'slice_num', 'x_dim', 'y_dim', 'max_value', 'min_value'])
        df.to_excel(os.path.join('/host/d/Data/NYU_MR/Patient_lists','NYU_MR_simulation.xlsx'), index=False)


Processing patient file1000176 in batch train
ground truth shape: (36, 640, 320)
ground truth max and min value: 0.00024828744763237006 5.321381196150904e-09
Processing patient file1000486 in batch train
ground truth shape: (35, 640, 320)
ground truth max and min value: 0.00015094980406731517 5.34433555772473e-09
Processing patient file1000401 in batch train
ground truth shape: (36, 640, 320)
ground truth max and min value: 0.0003873487792687884 1.3839940417560417e-09
Processing patient file1000540 in batch train
ground truth shape: (33, 640, 320)
ground truth max and min value: 0.0001635280769631598 1.5471714286768164e-09
Processing patient file1000233 in batch train
ground truth shape: (35, 640, 320)
ground truth max and min value: 0.00016510124911750407 7.305896232114525e-10
Processing patient file1000584 in batch train
ground truth shape: (38, 640, 320)
ground truth max and min value: 0.0011870991823825108 2.154442452000412e-09
Processing patient file1000363 in batch train
ground t

### build list for distillation

In [11]:
patient_sheet = pd.read_excel(os.path.join('/host/d/Data/low_dose_CT/Patient_lists/mayo_low_dose_CT_batched.xlsx'),dtype={'Patient_ID': str})
noise_types = ['gaussian']
simulation_num = 1

data_path = '/host/d/Data/low_dose_CT/'

for noise_type in noise_types:
    results = []
    for i in range(0, len(patient_sheet)):
        patient_id = patient_sheet['Patient_ID'][i]
        batch = patient_sheet['batch'][i]
        print(f"Processing patient {patient_id} in batch {batch} with noise type {noise_type}")


        ground_truth_file = os.path.join(data_path,'nii_imgs', patient_id, 'img_sliced.nii.gz')
        # img = nb.load(ground_truth_file).get_fdata()
        print('ground truth max and min value:', max_value, min_value)


        for n in range(0,simulation_num):
            n_type = 'gaussian' if noise_type == 'gaussian' else 'poisson'
            simulation_file_all = os.path.join(data_path,'simulation_v2', patient_id, n_type + '_random_' + str(n), 'recon_all_sliced.nii.gz')
            simulation_file_odd = os.path.join(data_path,'simulation_v2', patient_id, n_type + '_random_' + str(n), 'recon_odd_sliced.nii.gz')
            simulation_file_even = os.path.join(data_path,'simulation_v2', patient_id, n_type + '_random_' + str(n), 'recon_even_sliced.nii.gz')

            generated_20_file = os.path.join('/host/d/projects/denoising/models/unsupervised_gaussian_2/pred_images_input_both', patient_id, 'random_' + str(n), 'epoch190avg/pred_img_scans20.nii.gz')
            generated_10_file = os.path.join('/host/d/projects/denoising/models/unsupervised_gaussian_2/pred_images_input_both', patient_id, 'random_' + str(n), 'epoch190avg/pred_img_scans10.nii.gz')
            

            results.append([batch,patient_id, n, simulation_file_all, simulation_file_odd, simulation_file_even, ground_truth_file, 100, generated_20_file, generated_10_file])

        df = pd.DataFrame(results, columns=['batch','Patient_ID', 'random_num', 'simulation_file_all','simulation_file_odd', 'simulation_file_even', 'ground_truth_file', 'slice_num', 'generated_20_file', 'generated_10_file'])
        df.to_excel(os.path.join('/host/d/Data/low_dose_CT/Patient_lists', 'mayo_low_dose_CT_distill_v2.xlsx'), index=False)



Processing patient L333 in batch train with noise type gaussian
ground truth max and min value: 3071.0 -1024.0
Processing patient L096 in batch train with noise type gaussian
ground truth max and min value: 3071.0 -1024.0
Processing patient L286 in batch train with noise type gaussian
ground truth max and min value: 3071.0 -1024.0
Processing patient L067 in batch train with noise type gaussian
ground truth max and min value: 3071.0 -1024.0
Processing patient L310 in batch train with noise type gaussian
ground truth max and min value: 3071.0 -1024.0
Processing patient L109 in batch train with noise type gaussian
ground truth max and min value: 3071.0 -1024.0
Processing patient L506 in batch val with noise type gaussian
ground truth max and min value: 3071.0 -1024.0
Processing patient L192 in batch test with noise type gaussian
ground truth max and min value: 3071.0 -1024.0
Processing patient L143 in batch test with noise type gaussian
ground truth max and min value: 3071.0 -1024.0
Proce