In [1]:
import sys
sys.path.append('/workspace/Documents')
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import cv2
import nibabel as nb

import PSF_generation.python.functions as ff

In [24]:
# func: convert case filename:
def convert_case_filename(case_filename, from_num_to_name = True, ground_truth_label = None):
    if from_num_to_name == True:
        c = case_filename.split('.')[0]
        if c[-2:] == '00':
            position = 'lcc'
        elif c[-2:] == '01':
            position = 'lmlo'
        elif c[-2:] == '10':
            position = 'rcc'
        elif c[-2:] == '11':
            position = 'rmlo'

        ground_truth_label = [1 if c[-3] == '1' else 0][0]

        case_original_name = '2d_proc.' + c[:-3] + '.' + position + '.dcm'
        return case_original_name, ground_truth_label
    
    if from_num_to_name == False:
        assert ground_truth_label != None, 'ground_truth_label must be provided'
        c = case_filename.split('.')[1]
        if ground_truth_label == 1:
            c = c+'1'
        else:
            c = c+'0'

        position = case_filename.split('.')[2]
        if position == 'lcc':
            c = c+'00'
        elif position == 'lmlo':
            c = c+'01'
        elif position == 'rcc':
            c = c+'10'
        elif position == 'rmlo':
            c = c+'11'
        return c, ground_truth_label

In [30]:
main_path = '/mnt/BPM_NAS/cchen/'
ground_truth_path = os.path.join(main_path,'Dataset999_blur/labelsTs')
predict_path = os.path.join(main_path, 'project_results/nnUNet_results/Dataset999_blur/nnUNetTrainer__nnUNetPlans__2d/fold_0/inference_Dataset999_imagesTs')

In [31]:
case_list = pd.read_csv('/mnt/BPM_NAS/BPM/alldata/phase3_ge_origin/darwin/cleaned_labels/20240422_blur_roll_-1/all/Blur.csv')
case_list = case_list[(case_list['Dataset'] == 'test') & (case_list['Exclude'] == 0)]
print(case_list.shape)

non_blur_case_list = case_list[(case_list['Blur'] == 0) | pd.isnull(case_list['Blur'])]
real_blur_case_list = case_list[case_list['Blur'] == 1]
print('non_blur_case_list:', non_blur_case_list.shape)
print('real_blur_case_list:', real_blur_case_list.shape)

(269, 7)
non_blur_case_list: (263, 7)
real_blur_case_list: (6, 7)


# quantitative 

In [32]:
# real blur + real non-blur
result = []
for i in range(0,case_list.shape[0]):
    row = case_list.iloc[i]
    case_ori_filename = row['Image']
    print('case_ori_filename:', case_ori_filename)
    
    if row['Blur'] == 0 or pd.isnull(row['Blur']):
        ground_truth_label = 0
    elif row['Blur'] == 1:
        ground_truth_label = 1

    case_filename, ground_truth_label = convert_case_filename(case_ori_filename,  from_num_to_name = False, ground_truth_label=ground_truth_label)
    print('case name in testing: ', case_filename)

    gt_case = os.path.join(ground_truth_path, 'blur_' + case_filename + '.nii.gz')
    pred_case = os.path.join(predict_path, 'blur_' + case_filename + '.nii.gz')

    gt = nb.load(gt_case).get_fdata()
    pred = nb.load(pred_case).get_fdata()

    dice = ff.np_categorical_dice(pred, gt,1)
    print('case name: ', case_filename, ' label: ', ground_truth_label, ' dice: ', dice)

    # also count the pixel number
    pixel_num_gt = np.sum(gt)
    pixel_num_pred = np.sum(pred)

    result.append([case_filename, case_ori_filename, ground_truth_label, dice, pixel_num_gt, pixel_num_pred])

result = pd.DataFrame(result, columns = ['case_new_name', 'case_original_name', 'ground_truth_label', 'dice', 'pixel_num_gt', 'pixel_num_pred'])
result.to_excel(os.path.join(main_path, 'project_results/nnUNet_results/Dataset999_blur/nnUNetTrainer__nnUNetPlans__2d/fold_0/quantitative_real_blur+real_nonblur.xlsx'), index = False)

    

case_ori_filename: 2d_proc.0000452357.lcc.dcm
case name in testing:  0000452357100
case name:  0000452357100  label:  1  dice:  0.4576200984552769
case_ori_filename: 2d_proc.0000452357.lmlo.dcm
case name in testing:  0000452357001
case name:  0000452357001  label:  0  dice:  1.0
case_ori_filename: 2d_proc.0000452357.rcc.dcm
case name in testing:  0000452357010
case name:  0000452357010  label:  0  dice:  1.0
case_ori_filename: 2d_proc.0000452357.rmlo.dcm
case name in testing:  0000452357011
case name:  0000452357011  label:  0  dice:  1.097694840833043e-12
case_ori_filename: 2d_proc.0000452328.lcc.dcm
case name in testing:  0000452328000
case name:  0000452328000  label:  0  dice:  8.752735229751638e-13
case_ori_filename: 2d_proc.0000452328.lmlo.dcm
case name in testing:  0000452328001
case name:  0000452328001  label:  0  dice:  5.111429155589291e-13
case_ori_filename: 2d_proc.0000452328.rcc.dcm
case name in testing:  0000452328110
case name:  0000452328110  label:  1  dice:  0.276115

In [33]:
# simulated blur (from real non-blur) + real non-blur
case_list = non_blur_case_list
result = []
for i in range(0,case_list.shape[0]):
    row = case_list.iloc[i]
    case_ori_filename = row['Image']

    for k in ['simulated', 'original']:
        if k == 'original':
            ground_truth_label = 0
        else:
            ground_truth_label = 1
        case_filename, ground_truth_label = convert_case_filename(case_ori_filename,  from_num_to_name = False, ground_truth_label=ground_truth_label)

        gt_case = os.path.join(ground_truth_path, 'blur_' + case_filename + '.nii.gz')
        pred_case = os.path.join(predict_path, 'blur_' + case_filename + '.nii.gz')

        gt = nb.load(gt_case).get_fdata(); gt = np.round(gt)
        pred = nb.load(pred_case).get_fdata()

        dice = ff.np_categorical_dice(pred, gt,1)

        # also count the pixel number
        pixel_num_gt = np.sum(gt)
        pixel_num_pred = np.sum(pred)

        result.append([case_filename, case_ori_filename, ground_truth_label, dice, pixel_num_gt, pixel_num_pred])

result = pd.DataFrame(result, columns = ['case_new_name', 'case_original_name', 'ground_truth_label', 'dice', 'pixel_num_gt', 'pixel_num_pred'])
result.to_excel(os.path.join(main_path, 'project_results/nnUNet_results/Dataset999_blur/nnUNetTrainer__nnUNetPlans__2d/fold_0/quantitative_simulated_blur+real_nonblur.xlsx'), index = False)

# apply the hologic model on GE data

In [10]:
main_path = '/mnt/BPM_NAS/cchen/'
ground_truth_path = os.path.join(main_path,'Dataset992_blur/labelsTs')
predict_path = os.path.join(main_path, 'project_results/nnUNet_results/Dataset996_blur/nnUNetTrainer__nnUNetPlans__2d/fold_0/inference_Dataset992_imagesTs')

In [20]:
case_list = pd.read_csv('/mnt/BPM_NAS/BPM/alldata/phase3_ge_origin/darwin/cleaned_labels/20240406_blur/all/Blur.csv')
case_list = case_list[(case_list['Dataset'] == 'test') & (case_list['Exclude'] == 0)]
print(case_list.shape)

non_blur_case_list = case_list[(case_list['Blur'] == 0) | pd.isnull(case_list['Blur'])]
real_blur_case_list = case_list[case_list['Blur'] == 1]
print('non_blur_case_list:', non_blur_case_list.shape)
print('real_blur_case_list:', real_blur_case_list.shape)

(181, 10)
non_blur_case_list: (176, 10)
real_blur_case_list: (5, 10)


In [22]:
# real blur + real non-blur
result = []
for i in range(0,case_list.shape[0]):
    row = case_list.iloc[i]
    case_ori_filename = row['Image']
    # print('case_ori_filename:', case_ori_filename)
    
    if row['Blur'] == 0 or pd.isnull(row['Blur']):
        ground_truth_label = 0
    elif row['Blur'] == 1:
        ground_truth_label = 1

    case_filename, ground_truth_label = convert_case_filename(case_ori_filename,  from_num_to_name = False, ground_truth_label=ground_truth_label)
    # print('case name in testing: ', case_filename)

    if ground_truth_label == 0:
        gt_case = os.path.join(ground_truth_path, 'blur_' + case_filename + '.nii.gz')
    else: # not saved in the same folder
        gt_cases = os.path.join('/mnt/BPM_NAS/BPM/alldata/phase3_ge_origin/darwin/real_blurs', case_ori_filename, 'original_img', 'seg.nii.gz')
        
    pred_case = os.path.join(predict_path, 'blur_' + case_filename + '.nii.gz')

    gt = nb.load(gt_case).get_fdata(); gt = np.round(gt)
    pred = nb.load(pred_case).get_fdata()

    dice = ff.np_categorical_dice(pred, gt,1)
    # print('case name: ', case_filename, ' label: ', ground_truth_label, ' dice: ', dice)

    # also count the pixel number
    pixel_num_gt = np.sum(gt)
    pixel_num_pred = np.sum(pred)

    result.append([case_filename, case_ori_filename, ground_truth_label, dice, pixel_num_gt, pixel_num_pred])

result = pd.DataFrame(result, columns = ['case_new_name', 'case_original_name', 'ground_truth_label', 'dice', 'pixel_num_gt', 'pixel_num_pred'])
result.to_excel(os.path.join(main_path, 'project_results/nnUNet_results/Dataset996_blur/nnUNetTrainer__nnUNetPlans__2d/fold_0/quantitative_real_blur+real_nonblur-GEdata.xlsx'), index = False)

    

case_ori_filename: 2d_proc_sd.01009.lcc.dcm
case name in testing:  01009000
case name:  01009000  label:  0  dice:  1.0627150669616651e-14
case_ori_filename: 2d_proc_sd.01009.lmlo.dcm
case name in testing:  01009001
case name:  01009001  label:  0  dice:  1.0
case_ori_filename: 2d_proc_sd.01009.rcc.dcm
case name in testing:  01009010
case name:  01009010  label:  0  dice:  1.8713555350953665e-14
case_ori_filename: 2d_proc_sd.01009.rmlo.dcm
case name in testing:  01009011
case name:  01009011  label:  0  dice:  1.0
case_ori_filename: 2d_proc_sd.01021.lcc.dcm
case name in testing:  01021000
case name:  01021000  label:  0  dice:  1.0
case_ori_filename: 2d_proc_sd.01021.lmlo.dcm
case name in testing:  01021001
case name:  01021001  label:  0  dice:  1.0
case_ori_filename: 2d_proc_sd.01021.rcc.dcm
case name in testing:  01021010
case name:  01021010  label:  0  dice:  1.0
case_ori_filename: 2d_proc_sd.01021.rmlo.dcm
case name in testing:  01021011
case name:  01021011  label:  0  dice:  1.

In [23]:
# simulated blur (from real non-blur) + real non-blur
case_list = non_blur_case_list
result = []
for i in range(0,case_list.shape[0]):
    row = case_list.iloc[i]
    case_ori_filename = row['Image']

    for k in ['simulated', 'original']:
        if k == 'original':
            ground_truth_label = 0
        else:
            ground_truth_label = 1
        case_filename, ground_truth_label = convert_case_filename(case_ori_filename,  from_num_to_name = False, ground_truth_label=ground_truth_label)

        gt_case = os.path.join(ground_truth_path, 'blur_' + case_filename + '.nii.gz')
        pred_case = os.path.join(predict_path, 'blur_' + case_filename + '.nii.gz')

        gt = nb.load(gt_case).get_fdata(); gt = np.round(gt)
        pred = nb.load(pred_case).get_fdata()

        dice = ff.np_categorical_dice(pred, gt,1)

        # also count the pixel number
        pixel_num_gt = np.sum(gt)
        pixel_num_pred = np.sum(pred)

        result.append([case_filename, case_ori_filename, ground_truth_label, dice, pixel_num_gt, pixel_num_pred])

result = pd.DataFrame(result, columns = ['case_new_name', 'case_original_name', 'ground_truth_label', 'dice', 'pixel_num_gt', 'pixel_num_pred'])
result.to_excel(os.path.join(main_path, 'project_results/nnUNet_results/Dataset996_blur/nnUNetTrainer__nnUNetPlans__2d/fold_0/quantitative_simulated_blur+real_nonblur-GEdata.xlsx'), index = False)

In [12]:
case_in_ground_truth = ff.find_all_target_files(['*.nii.gz'], ground_truth_path)
case_in_predict = ff.find_all_target_files(['*.nii.gz'], predict_path)
print(case_in_ground_truth.shape, case_in_predict.shape)

# 5 more in case_in_predict are real-blur, the others are real non-blur

(352,) (357,)


In [19]:
for case1 in case_in_predict:
    case_name = os.path.basename(case1)

    potential_case_path_in_ground_truth = os.path.join(ground_truth_path, case_name)
    if not os.path.exists(potential_case_path_in_ground_truth):
        print(case_name)
        real_blur = True
    else:
        real_blur = False
    
    case_name_without_blur = case_name.split('blur_')[1]
    case_name_ori,ground_truth_label = convert_case_filename(case_name_without_blur, from_num_to_name = True, ground_truth_label = ground_truth_label)

    if real_blur == True:
        assert ground_truth_label == 1
        gt_path = os.path.join('/mnt/BPM_NAS/BPM/alldata/phase3_ge_origin/darwin/real_blurs', case_name_ori, 'original_img', 'seg.nii.gz')
    else:
        assert ground_truth_label == 0
        gt_path = os.path.join(ground_truth_path, case_name)

    gt = nb.load(gt_path).get_fdata(); gt = np.round(gt)
    pred = nb.load(case1).get_fdata()

    dice = ff.np_categorical_dice(pred, gt,1)

    # also count the pixel number
    pixel_num_gt = np.sum(gt)
    pixel_num_pred = np.sum(pred)

    result.append([case_name_without_blur, case_name_ori, ground_truth_label, dice, pixel_num_gt, pixel_num_pred])

result = pd.DataFrame(result, columns = ['case_new_name', 'case_original_name', 'ground_truth_label', 'dice', 'pixel_num_gt', 'pixel_num_pred'])
result.to_excel(os.path.join(main_path, 'project_results/nnUNet_results/Dataset996_blur/nnUNetTrainer__nnUNetPlans__2d/fold_0/quantitative_real_blur+real_nonblur-GEdata.xlsx'), index = False)



AssertionError: 