Training, Validation and testing dataset were labeled by batch manually

In [2]:
import sys
sys.path.append('/host/d/Github/')
import os
import numpy as np
import pandas as pd
import nibabel as nb
import Diffusion_denoising_thin_slice.functions_collection as ff

In [3]:
patient_sheet = ff.find_all_target_files(['*'],os.path.join('/host/d/Data/NYU_MR/multicoil_train/ref'))
print('total patient num: ', len(patient_sheet))

total patient num:  63


### set the batch

In [20]:
results = []
for i in range(0, len(patient_sheet)):
    patient_id = os.path.basename(patient_sheet[i])
    data_file = os.path.join('/host/d/Data/NYU_MR/multicoil_train/ref', patient_id, 'img.nii.gz')
    img_nii = nb.load(data_file)
    img_data = img_nii.get_fdata()
    slice_num, x_dim, y_dim = img_data.shape
    max_value = np.max(img_data)
    min_value = np.min(img_data)
    results.append([patient_id, slice_num, x_dim, y_dim, max_value, min_value])


In [21]:
df = pd.DataFrame(results, columns=['Patient_ID', 'slice_num', 'x_dim', 'y_dim', 'max_value', 'min_value'])
# important: only leave the rows that has max_val <=0.0005
new_df = df[df['max_value'] <= 0.0005]
print('filtered patient num: ', new_df.shape[0])

filtered patient num:  50


In [22]:
# add one column called batch, 0-35 train, 35-40 val, >=40 test
batch_list = []
for i in range(0, new_df.shape[0]):
    if i < 35:
        batch_list.append('train')
    elif i < 40:
        batch_list.append('val')
    else:
        batch_list.append('test')
new_df['batch'] = batch_list
new_df.to_excel(os.path.join('/host/d/Data/NYU_MR/Patient_lists/', 'NYU_MR_batched.xlsx'), index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['batch'] = batch_list


# build list for simulations

In [None]:
patient_sheet = pd.read_excel(os.path.join('/host/d/Data/NYU_MR/Patient_lists/NYU_MR_batched.xlsx'),dtype={'Patient_ID': str})
simulation_num = 1

data_path = '/host/d/Data/NYU_MR/multicoil_train'


results = []
for i in range(0, len(patient_sheet)):
    patient_id = patient_sheet['Patient_ID'][i]
    batch = patient_sheet['batch'][i]
    print(f"Processing patient {patient_id} in batch {batch}")


    ground_truth_file = os.path.join(data_path,'ref', patient_id, 'img.nii.gz')
    row = patient_sheet[patient_sheet['Patient_ID'] == patient_id]
    slice_num = int(row['slice_num'])
    x_dim = int(row['x_dim'])
    y_dim = int(row['y_dim'])
    max_value = float(row['max_value'])
    min_value = float(row['min_value'])


    for n in range(0,simulation_num):
        simulation_file_all =''
        simulation_file_odd = os.path.join(data_path,'undersample_4_equispaced', patient_id, 'random_' + str(n), 'recon', 'img.nii.gz')
        simulation_file_even =simulation_file_odd
            

        results.append([batch,patient_id, n, simulation_file_all, simulation_file_odd, simulation_file_even, ground_truth_file, slice_num, x_dim, y_dim, max_value, min_value])

        df = pd.DataFrame(results, columns=['batch','Patient_ID', 'random_num', 'simulation_file_all','simulation_file_odd', 'simulation_file_even', 'ground_truth_file', 'slice_num', 'x_dim', 'y_dim', 'max_value', 'min_value'])
        df.to_excel(os.path.join('/host/d/Data/NYU_MR/Patient_lists','NYU_MR_simulation_undersample4_equispaced.xlsx'), index=True)


Processing patient file1000015 in batch train
Processing patient file1000057 in batch train
Processing patient file1000059 in batch train
Processing patient file1000060 in batch train
Processing patient file1000094 in batch train
Processing patient file1000097 in batch train
Processing patient file1000109 in batch train
Processing patient file1000117 in batch train
Processing patient file1000154 in batch train
Processing patient file1000176 in batch train
Processing patient file1000181 in batch train
Processing patient file1000204 in batch train
Processing patient file1000210 in batch train
Processing patient file1000233 in batch train
Processing patient file1000250 in batch train
Processing patient file1000275 in batch train
Processing patient file1000300 in batch train
Processing patient file1000311 in batch train
Processing patient file1000340 in batch train
Processing patient file1000351 in batch train
Processing patient file1000363 in batch train
Processing patient file1000378 in 

### build list for distillation

In [11]:
patient_sheet = pd.read_excel(os.path.join('/host/d/Data/low_dose_CT/Patient_lists/mayo_low_dose_CT_batched.xlsx'),dtype={'Patient_ID': str})
noise_types = ['gaussian']
simulation_num = 1

data_path = '/host/d/Data/low_dose_CT/'

for noise_type in noise_types:
    results = []
    for i in range(0, len(patient_sheet)):
        patient_id = patient_sheet['Patient_ID'][i]
        batch = patient_sheet['batch'][i]
        print(f"Processing patient {patient_id} in batch {batch} with noise type {noise_type}")


        ground_truth_file = os.path.join(data_path,'nii_imgs', patient_id, 'img_sliced.nii.gz')
        # img = nb.load(ground_truth_file).get_fdata()
        print('ground truth max and min value:', max_value, min_value)


        for n in range(0,simulation_num):
            n_type = 'gaussian' if noise_type == 'gaussian' else 'poisson'
            simulation_file_all = os.path.join(data_path,'simulation_v2', patient_id, n_type + '_random_' + str(n), 'recon_all_sliced.nii.gz')
            simulation_file_odd = os.path.join(data_path,'simulation_v2', patient_id, n_type + '_random_' + str(n), 'recon_odd_sliced.nii.gz')
            simulation_file_even = os.path.join(data_path,'simulation_v2', patient_id, n_type + '_random_' + str(n), 'recon_even_sliced.nii.gz')

            generated_20_file = os.path.join('/host/d/projects/denoising/models/unsupervised_gaussian_2/pred_images_input_both', patient_id, 'random_' + str(n), 'epoch190avg/pred_img_scans20.nii.gz')
            generated_10_file = os.path.join('/host/d/projects/denoising/models/unsupervised_gaussian_2/pred_images_input_both', patient_id, 'random_' + str(n), 'epoch190avg/pred_img_scans10.nii.gz')
            

            results.append([batch,patient_id, n, simulation_file_all, simulation_file_odd, simulation_file_even, ground_truth_file, 100, generated_20_file, generated_10_file])

        df = pd.DataFrame(results, columns=['batch','Patient_ID', 'random_num', 'simulation_file_all','simulation_file_odd', 'simulation_file_even', 'ground_truth_file', 'slice_num', 'generated_20_file', 'generated_10_file'])
        df.to_excel(os.path.join('/host/d/Data/low_dose_CT/Patient_lists', 'mayo_low_dose_CT_distill_v2.xlsx'), index=False)



Processing patient L333 in batch train with noise type gaussian
ground truth max and min value: 3071.0 -1024.0
Processing patient L096 in batch train with noise type gaussian
ground truth max and min value: 3071.0 -1024.0
Processing patient L286 in batch train with noise type gaussian
ground truth max and min value: 3071.0 -1024.0
Processing patient L067 in batch train with noise type gaussian
ground truth max and min value: 3071.0 -1024.0
Processing patient L310 in batch train with noise type gaussian
ground truth max and min value: 3071.0 -1024.0
Processing patient L109 in batch train with noise type gaussian
ground truth max and min value: 3071.0 -1024.0
Processing patient L506 in batch val with noise type gaussian
ground truth max and min value: 3071.0 -1024.0
Processing patient L192 in batch test with noise type gaussian
ground truth max and min value: 3071.0 -1024.0
Processing patient L143 in batch test with noise type gaussian
ground truth max and min value: 3071.0 -1024.0
Proce