Training, Validation and testing dataset were labeled by batch manually

In [1]:
import sys
sys.path.append("../")
import os
import numpy as np
import pandas as pd
import DeepStrain.functions_collection as ff

In [2]:
data_path_checked = '/mnt/mount_zc_NAS/HFpEF/data/HFpEF_data'
data_path_unchecked = '/mnt/mount_zc_NAS/HFpEF/data/HFpEF_data/unchecked'

In [3]:
checked_cases = ff.find_all_target_files(['ID*'], os.path.join(data_path_checked, 'nii_manual_seg'))
unchecked_cases = ff.find_all_target_files(['ID*'], os.path.join(data_path_unchecked, 'nii_manual_seg'))
print('checked_cases: ', len(checked_cases))
print('unchecked_cases: ', len(unchecked_cases))

checked_cases:  50
unchecked_cases:  53


In [4]:
def split_batch(total_samples, num_batches):
    samples_per_batch = total_samples // num_batches

    # Calculate the number of samples in the last batch
    samples_in_last_batch = total_samples % num_batches

    # Create the list of batch indices
    batch_indices = []
    for i in range(num_batches):
        batch_indices.extend([i] * samples_per_batch)

    # Add the remaining samples to the last batch
    batch_indices.extend([num_batches-1] * samples_in_last_batch)
    return batch_indices

checked_batch = split_batch(len(checked_cases) * 2, 10)
unchecked_batch = split_batch(len(unchecked_cases), 10)

print(len(checked_batch))


100


In [6]:
patient_list_checked = pd.read_excel(os.path.join(data_path_checked, 'Patient_list', 'Important_HFpEF_Patient_list_unique_patient_w_notes.xlsx' ))
patient_list_unchecked = pd.read_excel(os.path.join(data_path_checked, 'Patient_list', 'full_list.xlsx' ))

In [7]:
Results = []
for i in range(0,checked_cases.shape[0]):
    patient_id = os.path.basename(checked_cases[i])
    patient_id_num = ff.ID_00XX_to_XX(patient_id)

    batch = checked_batch[i * 2]

    print(patient_id, patient_id_num, batch)

    ED = patient_list_checked[patient_list_checked['OurID'] == patient_id_num]['ED'].values[0].astype(int)
    ES = patient_list_checked[patient_list_checked['OurID'] == patient_id_num]['ES'].values[0].astype(int)

    for e in ['ED', 'ES']:
        if e == 'ED':
            tf = ED
           
        else:
            tf = ES
        

        # img:
        img_file = os.path.join(data_path_checked, 'nii_img', patient_id, 'Org3D_frame' + str(tf) + '.nii.gz')
        assert os.path.isfile(img_file) == 1
        # manual seg:
        seg_file = os.path.join(data_path_checked, 'nii_manual_seg', patient_id, 'SAX_' + e + '_seg.nii.gz')
        assert os.path.isfile(seg_file) == 1

        # pred_seg 
        pred_seg_file = os.path.join('/mnt/mount_zc_NAS//DeepStrain/results/trained/seg', patient_id, 'pred_seg_frame'+str(tf)+'.nii.gz')
        assert os.path.isfile(pred_seg_file) == 1

        # nrrd
        nrrd_file = os.path.join(data_path_checked, 'nrrd', 'need_'+ patient_id, 'Org3D_frame' + str(tf) + '.nrrd')
        assert os.path.isfile(nrrd_file) == 1

        Results.append([patient_id, patient_id_num, batch,'checked', e, tf, img_file, seg_file, pred_seg_file, nrrd_file])


column_list = ['Patient_ID', 'OurID', 'batch', 'checked', 'ED_ES', 'tf', 'img_file', 'seg_file', 'pred_seg_file', 'nrrd_file']
df_checked = pd.DataFrame(Results, columns = column_list)
# df_checked.to_excel(os.path.join('/mnt/mount_zc_NAS//DeepStrain/data', 'Patient_list', 'Patient_list_version1.xlsx'), index=False)

Results = []
for i in range(0,unchecked_cases.shape[0]):
    patient_id = os.path.basename(unchecked_cases[i])
    patient_id_num = ff.ID_00XX_to_XX(patient_id)

    batch = unchecked_batch[i]
    print(patient_id, patient_id_num, batch)

    ED = int(patient_list_unchecked.loc[patient_list_unchecked['OurID'] == patient_id_num]['ED'])
    ES = int(patient_list_unchecked.loc[patient_list_unchecked['OurID'] == patient_id_num]['ES'])

    print(ED,ES)

    for e in ['ED']:
        if e == 'ED':
            tf = ED
        else:
            tf = ES

        # img:
        img_file = os.path.join(data_path_unchecked, 'nii_img', patient_id, 'Org3D_frame' + str(tf) + '.nii.gz')
        print(img_file)
        assert os.path.isfile(img_file) == 1
        # manual seg:
        seg_file = os.path.join(data_path_unchecked, 'nii_manual_seg', patient_id, 'SAX_' + e + '_seg.nii.gz')
        assert os.path.isfile(seg_file) == 1

        # pred_seg 
        pred_seg_file = os.path.join('/mnt/mount_zc_NAS//DeepStrain/results/trained/seg', patient_id, 'pred_seg_frame'+str(tf)+'.nii.gz')
        assert os.path.isfile(pred_seg_file) == 1

        # nrrd
        nrrd_file = os.path.join(data_path_checked, 'nrrd', 'need_'+ patient_id, 'Org3D_frame' + str(tf) + '.nrrd')
        assert os.path.isfile(nrrd_file) == 1

        Results.append([patient_id, patient_id_num, batch,'checked', e, tf, img_file, seg_file, pred_seg_file, nrrd_file])


column_list = ['Patient_ID', 'OurID', 'batch', 'unchecked', 'ED_ES', 'tf', 'img_file', 'seg_file', 'pred_seg_file', 'nrrd_file']
df_unchecked = pd.DataFrame(Results, columns = column_list)

stacked_df = pd.concat([df_checked, df_unchecked], ignore_index=True)
sorted_df = stacked_df.sort_values(by=['batch', 'OurID'], ascending=True)

sorted_df.to_excel(os.path.join('/mnt/mount_zc_NAS//DeepStrain/data', 'Patient_list', 'Patient_list_for_seg.xlsx'), index=False)



ID_0015 15 0


AssertionError: 