In [3]:
from pathlib import Path

import numpy as np
import pandas as pd
import SimpleITK as sitk

from src.evaluation.scores import dice, hausdorff_distance

In [4]:
prediction_folder = '../data/segmentation_output_renamed'
groundtruth_folder = '../data/hecktor_nii/'
bb_filepath = '../data/bbox.csv'

In [5]:
# List of the files in the validation
prediction_files = [f for f in Path(prediction_folder).rglob('*.nii.gz')]

# The list is sorted, so it will match the list of ground truth files
prediction_files.sort(key=lambda x: x.name.split('_')[0])

# List of the patient_id in the validation
patient_name_predictions = [f.name.split('.')[0][:7] for f in prediction_files]


In [6]:
# List of the ground truth files
groundtruth_files = [
    f for f in Path(groundtruth_folder).rglob('*gtvt.nii.gz') if f.name.split('_')[0] in patient_name_predictions
]

In [7]:
# The bounding boxes will be used to compute the Dice score within.
bb_df = pd.read_csv(bb_filepath).set_index('PatientID')

In [8]:
# DataFrame to store the results
results_df = pd.DataFrame(columns=['PatientID', 'Dice Score'])

resampler = sitk.ResampleImageFilter()
resampler.SetInterpolator(sitk.sitkNearestNeighbor)

for f in prediction_files:
    patient_name = f.name.split('.')[0][:7]
    gt_file = [k for k in groundtruth_files if k.name[:7] == patient_name][0]

    print('Evaluating patient {}'.format(patient_name))

    sitk_pred = sitk.ReadImage(str(f.resolve()))
    sitk_gt = sitk.ReadImage(str(gt_file.resolve()))
    resampling_spacing = np.array(sitk_gt.GetSpacing())

    bb = np.array([
        bb_df.loc[patient_name, 'x1', ], bb_df.loc[patient_name, 'y1', ],
        bb_df.loc[patient_name, 'z1', ], bb_df.loc[patient_name, 'x2', ],
        bb_df.loc[patient_name, 'y2', ], bb_df.loc[patient_name, 'z2', ]
    ])

    image_size = np.round((bb[3:] - bb[:3]) / resampling_spacing).astype(int)
    resampler.SetOutputOrigin(bb[:3])
    resampler.SetSize([int(k) for k in image_size])
    resampler.SetReferenceImage(sitk_gt)

    sitk_gt = resampler.Execute(sitk_gt)
    sitk_pred = resampler.Execute(sitk_pred)

    # Store the results
    np_gt = sitk.GetArrayFromImage(sitk_gt)
    np_pred = sitk.GetArrayFromImage(sitk_pred)
    results_df = results_df.append(
        {
            'PatientID': patient_name,
            'Dice Score': dice(np_gt, np_pred),
            'Hausdorff Distance': hausdorff_distance(np_gt, np_pred),
        },
        ignore_index=True)


Evaluating patient CHGJ007
Evaluating patient CHGJ008
Evaluating patient CHGJ010
Evaluating patient CHGJ013
Evaluating patient CHGJ015
Evaluating patient CHGJ016
Evaluating patient CHGJ017
Evaluating patient CHGJ018
Evaluating patient CHGJ025
Evaluating patient CHGJ026


In [11]:
# print average results
results_df.mean()

Dice Score            1.0
Hausdorff Distance    0.0
dtype: float64