Training, Validation and testing dataset were labeled by batch manually

In [9]:
import sys
sys.path.append("../")
import os
import numpy as np
import pandas as pd
import nibabel as nb
import DeepStrain.functions_collection as ff

In [3]:
data_path_checked = '/mnt/mount_zc_NAS/HFpEF/data/HFpEF_data'
data_path_unchecked = '/mnt/mount_zc_NAS/HFpEF/data/HFpEF_data/unchecked'

In [5]:
checked_cases = ff.find_all_target_files(['ID*'], os.path.join(data_path_checked, 'nii_manual_seg'))
unchecked_cases = ff.find_all_target_files(['ID*'], os.path.join(data_path_unchecked, 'nii_manual_seg'))
print('checked_cases: ', len(checked_cases))
print('unchecked_cases: ', len(unchecked_cases))

checked_cases:  50
unchecked_cases:  53


In [6]:
def split_batch(total_samples, num_batches):
    samples_per_batch = total_samples // num_batches

    # Calculate the number of samples in the last batch
    samples_in_last_batch = total_samples % num_batches

    # Create the list of batch indices
    batch_indices = []
    for i in range(num_batches):
        batch_indices.extend([i] * samples_per_batch)

    # Add the remaining samples to the last batch
    batch_indices.extend([num_batches-1] * samples_in_last_batch)
    return batch_indices

checked_batch = split_batch(len(checked_cases) * 2, 10)
unchecked_batch = split_batch(len(unchecked_cases), 10)

print(len(checked_batch))


100


In [7]:
patient_list_checked = pd.read_excel(os.path.join(data_path_checked, 'Patient_list', 'Important_HFpEF_Patient_list_unique_patient_w_notes.xlsx' ))
patient_list_unchecked = pd.read_excel(os.path.join(data_path_checked, 'Patient_list', 'full_list.xlsx' ))

# build list for segmentation

In [7]:
Results = []
for i in range(0,checked_cases.shape[0]):
    patient_id = os.path.basename(checked_cases[i])
    patient_id_num = ff.ID_00XX_to_XX(patient_id)

    batch = checked_batch[i * 2]

    print(patient_id, patient_id_num, batch)

    ED = patient_list_checked[patient_list_checked['OurID'] == patient_id_num]['ED'].values[0].astype(int)
    ES = patient_list_checked[patient_list_checked['OurID'] == patient_id_num]['ES'].values[0].astype(int)

    for e in ['ED', 'ES']:
        if e == 'ED':
            tf = ED
        else:
            tf = ES

        # img:
        img_file = os.path.join(data_path_checked, 'nii_img', patient_id, 'Org3D_frame' + str(tf) + '.nii.gz')
        assert os.path.isfile(img_file) == 1
        # manual seg:
        seg_file = os.path.join(data_path_checked, 'nii_manual_seg', patient_id, 'SAX_' + e + '_seg.nii.gz')
        assert os.path.isfile(seg_file) == 1

        # pred_seg 
        pred_seg_file = os.path.join('/mnt/mount_zc_NAS//DeepStrain/results/trained/seg', patient_id, 'pred_seg_frame'+str(tf)+'.nii.gz')
        assert os.path.isfile(pred_seg_file) == 1

        # nrrd
        nrrd_file = os.path.join(data_path_checked, 'nrrd', 'need_'+ patient_id, 'Org3D_frame' + str(tf) + '.nrrd')
        assert os.path.isfile(nrrd_file) == 1

        Results.append([patient_id, patient_id_num, batch,'checked', e, tf, img_file, seg_file, pred_seg_file, nrrd_file])


column_list = ['Patient_ID', 'OurID', 'batch', 'checked', 'ED_ES', 'tf', 'img_file', 'seg_file', 'pred_seg_file', 'nrrd_file']
df_checked = pd.DataFrame(Results, columns = column_list)
# df_checked.to_excel(os.path.join('/mnt/mount_zc_NAS//DeepStrain/data', 'Patient_list', 'Patient_list_version1.xlsx'), index=False)

Results = []
for i in range(0,unchecked_cases.shape[0]):
    patient_id = os.path.basename(unchecked_cases[i])
    patient_id_num = ff.ID_00XX_to_XX(patient_id)

    batch = unchecked_batch[i]
    print(patient_id, patient_id_num, batch)

    ED = int(patient_list_unchecked.loc[patient_list_unchecked['OurID'] == patient_id_num]['ED'])
    ES = int(patient_list_unchecked.loc[patient_list_unchecked['OurID'] == patient_id_num]['ES'])

    print(ED,ES)

    for e in ['ED']:
        if e == 'ED':
            tf = ED
        else:
            tf = ES

        # img:
        img_file = os.path.join(data_path_unchecked, 'nii_img', patient_id, 'Org3D_frame' + str(tf) + '.nii.gz')
        print(img_file)
        assert os.path.isfile(img_file) == 1
        # manual seg:
        seg_file = os.path.join(data_path_unchecked, 'nii_manual_seg', patient_id, 'SAX_' + e + '_seg.nii.gz')
        assert os.path.isfile(seg_file) == 1

        # pred_seg 
        pred_seg_file = os.path.join('/mnt/mount_zc_NAS//DeepStrain/results/trained/seg', patient_id, 'pred_seg_frame'+str(tf)+'.nii.gz')
        assert os.path.isfile(pred_seg_file) == 1

        # nrrd
        nrrd_file = os.path.join(data_path_checked, 'nrrd', 'need_'+ patient_id, 'Org3D_frame' + str(tf) + '.nrrd')
        assert os.path.isfile(nrrd_file) == 1

        Results.append([patient_id, patient_id_num, batch,'unchecked', e, tf, img_file, seg_file, pred_seg_file, nrrd_file])


column_list = ['Patient_ID', 'OurID', 'batch', 'checked', 'ED_ES', 'tf', 'img_file', 'seg_file', 'pred_seg_file', 'nrrd_file']
df_unchecked = pd.DataFrame(Results, columns = column_list)

stacked_df = pd.concat([df_checked, df_unchecked], ignore_index=True)
sorted_df = stacked_df.sort_values(by=['batch', 'OurID'], ascending=True)

sorted_df.to_excel(os.path.join('/mnt/mount_zc_NAS//DeepStrain/data', 'Patient_list', 'Patient_list_for_seg.xlsx'), index=False)



ID_0015 15 0
ID_0016 16 0
ID_0078 78 0
ID_0080 80 0
ID_0085 85 0
ID_0280 280 1
ID_0284 284 1
ID_0287 287 1
ID_0290 290 1
ID_0291 291 1
ID_0468 468 2
ID_0483 483 2
ID_0662 662 2
ID_0663 663 2
ID_0671 671 2
ID_0672 672 3
ID_0678 678 3
ID_0682 682 3
ID_0685 685 3
ID_0692 692 3
ID_0811 811 4
ID_0813 813 4
ID_0815 815 4
ID_0824 824 4
ID_0940 940 4
ID_0949 949 5
ID_0951 951 5
ID_0953 953 5
ID_0954 954 5
ID_1057 1057 5
ID_1124 1124 6
ID_1126 1126 6
ID_1130 1130 6
ID_1132 1132 6
ID_1141 1141 6
ID_1151 1151 7
ID_1163 1163 7
ID_1172 1172 7
ID_1175 1175 7
ID_1177 1177 7
ID_1180 1180 8
ID_1181 1181 8
ID_1183 1183 8
ID_1207 1207 8
ID_1208 1208 8
ID_1352 1352 9
ID_1353 1353 9
ID_1354 1354 9
ID_1361 1361 9
ID_1405 1405 9
ID_0011 11 0
1 14
/mnt/mount_zc_NAS/HFpEF/data/HFpEF_data/unchecked/nii_img/ID_0011/Org3D_frame1.nii.gz
ID_0109 109 0
1 9
/mnt/mount_zc_NAS/HFpEF/data/HFpEF_data/unchecked/nii_img/ID_0109/Org3D_frame1.nii.gz
ID_0128 128 0
1 10
/mnt/mount_zc_NAS/HFpEF/data/HFpEF_data/unchecked/nii_img

# Build list for motion estimation fine tune per case

In [10]:
Results = []
for i in range(0,checked_cases.shape[0]):
    patient_id = os.path.basename(checked_cases[i])
    patient_id_num = ff.ID_00XX_to_XX(patient_id)

    print(patient_id, patient_id_num)

    ED = patient_list_checked[patient_list_checked['OurID'] == patient_id_num]['ED'].values[0].astype(int)
    ES = patient_list_checked[patient_list_checked['OurID'] == patient_id_num]['ES'].values[0].astype(int)

    image_folder = os.path.join(data_path_checked, 'nii_img', patient_id)

    seg_folder = os.path.join(data_path_checked, 'nii_manual_seg', patient_id)

    pred_seg_folder = os.path.join('/mnt/mount_zc_NAS/DeepStrain/results/trained/seg', patient_id)

    nrrd_folder = os.path.join(data_path_checked, 'nrrd', 'need_'+ patient_id)

    # find out the heart slices
    seg_file = nb.load(os.path.join(seg_folder, 'SAX_ES_seg.nii.gz')).get_fdata()
    heart_slices = [z for z in range(seg_file.shape[2]) if np.sum(seg_file[:, :, z]) != 0]
    start_slice = heart_slices[0]
    end_slice = heart_slices[-1]


    Results.append([patient_id, patient_id_num, 'checked', ED, ES, image_folder, seg_folder, pred_seg_folder, nrrd_folder, start_slice, end_slice])


column_list = ['Patient_ID', 'OurID', 'checked', 'ED', 'ES', 'image_folder', 'seg_folder', 'pred_seg_folder', 'nrrd_folder', 'start_slice', 'end_slice']
df_checked = pd.DataFrame(Results, columns = column_list)
df_checked.to_excel(os.path.join('/mnt/mount_zc_NAS//DeepStrain/data', 'Patient_list', 'Patient_list_for_motion_checked.xlsx'), index=False)


ID_0015 15
ID_0016 16
ID_0078 78
ID_0080 80
ID_0085 85


ID_0280 280
ID_0284 284
ID_0287 287
ID_0290 290
ID_0291 291
ID_0468 468
ID_0483 483
ID_0662 662
ID_0663 663
ID_0671 671
ID_0672 672
ID_0678 678
ID_0682 682
ID_0685 685
ID_0692 692
ID_0811 811
ID_0813 813
ID_0815 815
ID_0824 824
ID_0940 940
ID_0949 949
ID_0951 951
ID_0953 953
ID_0954 954
ID_1057 1057
ID_1124 1124
ID_1126 1126
ID_1130 1130
ID_1132 1132
ID_1141 1141
ID_1151 1151
ID_1163 1163
ID_1172 1172
ID_1175 1175
ID_1177 1177
ID_1180 1180
ID_1181 1181
ID_1183 1183
ID_1207 1207
ID_1208 1208
ID_1352 1352
ID_1353 1353
ID_1354 1354
ID_1361 1361
ID_1405 1405
