In [24]:
# Builtins
import matplotlib.pyplot as plt
from pathlib import *
# Installed
import nibabel as nib
import numpy as np
import pandas as pd
from ipywidgets import interact
from parse import parse
from scipy.ndimage import gaussian_filter

## Understand the Application's Results

In [25]:
# Define a filename for each type of result
filenames = {
    "tumor": 'brainTumorMask_SRI.nii.gz',
    "brain": 'T1_to_SRI_brain.nii.gz'
}
# Get the result directory
res_dir = Path("data/vip-team")
# Get 1 tumor file and 1 brain scan
tumor_file = next(res_dir.rglob('brainTumorMask_SRI.nii.gz'))
brain_file = next(res_dir.rglob('T1_to_SRI_brain.nii.gz'))
# Display
print("\n".join([str(tumor_file), str(brain_file)]))

data/vip-team/v181/exec_1/19/UPENN-GBM-00019/brainTumorMask_SRI.nii.gz
data/vip-team/v181/exec_1/19/UPENN-GBM-00019/T1_to_SRI_brain.nii.gz


In [26]:
brain = nib.load(brain_file).get_fdata()
tumor = nib.load(tumor_file).get_fdata()
tumor[tumor==0] = np.nan

def show_tumor_2D(tumor: np.ndarray, brain: np.ndarray, ax: plt.Axes):
    ax.imshow(brain, cmap='bone', origin="lower")
    tumor[tumor==0] = np.nan
    ax.imshow(tumor, origin="lower")
    ax.axis('off')

@interact
def show_slices(z=(0,150)) -> None:
    _, (ax_brain, ax_tumor) = plt.subplots(1, 2, figsize=(10,5))

    ax_brain.set_title("Brain Scan")
    ax_brain.imshow(brain[:,:,z], cmap='bone', origin="lower")
    ax_brain.axis('off')

    ax_tumor.set_title("With Tumor Detection")
    ax_tumor.imshow(brain[:,:,z], cmap='bone', origin="lower")
    ax_tumor.imshow(tumor[:,:,z], origin="lower")
    ax_tumor.axis('off')

    plt.show()

interactive(children=(IntSlider(value=75, description='z', max=150), Output()), _dom_classes=('widget-interact…

## Compare Execution Results

In [27]:
# Get all result files
all_files = [str(path) for path in res_dir.rglob(filenames["tumor"])] \
          + [str(path) for path in res_dir.rglob(filenames["brain"])]
all_files[0]

'data/vip-team/v181/exec_1/19/UPENN-GBM-00019/brainTumorMask_SRI.nii.gz'

In [39]:
# Results are sorted by metadata
metadata_format = "{Version}/{Execution}/{_}/{Subject}/{Filename}"
path_format = str(res_dir / metadata_format)
metadata_keys = metadata_format.replace("{","").replace("}","").split("/")
metadata_keys.remove('_')

# Function to get the metadata from 1 path as a dataframe
def get_metadata_from_path(path: str) -> dict:
    metadata = parse(path_format, path)
    if metadata is None: 
        return {}
    result = metadata.named
    result.update({"Path": path}) 
    return result

# Get all metadata as a dataframe
data = pd.DataFrame([get_metadata_from_path(file) for file in all_files])
data.head()

Unnamed: 0,Version,Execution,Subject,Filename,Path
0,v181,exec_1,UPENN-GBM-00019,brainTumorMask_SRI.nii.gz,data/vip-team/v181/exec_1/19/UPENN-GBM-00019/b...
1,v181,exec_1,UPENN-GBM-00239,brainTumorMask_SRI.nii.gz,data/vip-team/v181/exec_1/239/UPENN-GBM-00239/...
2,v181,exec_2,UPENN-GBM-00019,brainTumorMask_SRI.nii.gz,data/vip-team/v181/exec_2/19/UPENN-GBM-00019/b...
3,v181,exec_2,UPENN-GBM-00239,brainTumorMask_SRI.nii.gz,data/vip-team/v181/exec_2/239/UPENN-GBM-00239/...
4,v190,exec_1,UPENN-GBM-00019,brainTumorMask_SRI.nii.gz,data/vip-team/v190/exec_1/19/UPENN-GBM-00019/b...


# Checksums

In [40]:
from hashlib import md5
def md5sum(file: str) -> str:
    """Computes the md5sum of `file`"""
    with open(file, "rb") as fid:
        return md5(fid.read()).hexdigest()

checksums = data.copy()
checksums["md5sum"] = checksums["Path"].apply(md5sum)

# Compare executions and versions
checksums.drop(columns="Path", inplace=True)
checksums.set_index(metadata_keys).unstack("Execution")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,md5sum,md5sum
Unnamed: 0_level_1,Unnamed: 1_level_1,Execution,exec_1,exec_2
Version,Subject,Filename,Unnamed: 3_level_2,Unnamed: 4_level_2
v181,UPENN-GBM-00019,T1_to_SRI_brain.nii.gz,f9148b0776a5747bc11422f890871f48,f9148b0776a5747bc11422f890871f48
v181,UPENN-GBM-00019,brainTumorMask_SRI.nii.gz,ffa4be1d93358af9fd6525de09c4fcd5,ffa4be1d93358af9fd6525de09c4fcd5
v181,UPENN-GBM-00239,T1_to_SRI_brain.nii.gz,2a83c47465694fdd5249da3139252acf,2a83c47465694fdd5249da3139252acf
v181,UPENN-GBM-00239,brainTumorMask_SRI.nii.gz,836c9afb5bec1230a4bfa916359fa79c,836c9afb5bec1230a4bfa916359fa79c
v190,UPENN-GBM-00019,T1_to_SRI_brain.nii.gz,92213e8515122b7217c10b18e1a14f4b,92213e8515122b7217c10b18e1a14f4b
v190,UPENN-GBM-00019,brainTumorMask_SRI.nii.gz,6a614f4238051b776619cc3ebdaccf72,6a614f4238051b776619cc3ebdaccf72
v190,UPENN-GBM-00239,T1_to_SRI_brain.nii.gz,9cf1a2187c677412dfd1af10514e63e4,9cf1a2187c677412dfd1af10514e63e4
v190,UPENN-GBM-00239,brainTumorMask_SRI.nii.gz,fb948eb372ecab8fa6600eed411ecb59,fb948eb372ecab8fa6600eed411ecb59


# Differences

In [43]:
images = pd.DataFrame(data)

# Pre-load all files from subject and execution
def load(file: str) -> str:
    """Computes the md5sum of `file`"""
    with open(file, "rb") as fid:
        return nib.load(file)
images["Img"] = images["Path"].apply(load)

images.drop(columns="Path", inplace=True)
images.head()

Unnamed: 0,Version,Execution,Subject,Filename,Img
0,v181,exec_1,UPENN-GBM-00019,brainTumorMask_SRI.nii.gz,<class 'nibabel.nifti1.Nifti1Image'>\ndata sha...
1,v181,exec_1,UPENN-GBM-00239,brainTumorMask_SRI.nii.gz,<class 'nibabel.nifti1.Nifti1Image'>\ndata sha...
2,v181,exec_2,UPENN-GBM-00019,brainTumorMask_SRI.nii.gz,<class 'nibabel.nifti1.Nifti1Image'>\ndata sha...
3,v181,exec_2,UPENN-GBM-00239,brainTumorMask_SRI.nii.gz,<class 'nibabel.nifti1.Nifti1Image'>\ndata sha...
4,v190,exec_1,UPENN-GBM-00019,brainTumorMask_SRI.nii.gz,<class 'nibabel.nifti1.Nifti1Image'>\ndata sha...


In [45]:
# Discard the second execution
execution = "exec_1"
subject = "UPENN-GBM-00019"
sample = images.query("Subject==@subject & Execution==@execution").reset_index(drop=True)

# To make the sample type more understandable, we map each filename to its type
filetypes = {
    'brainTumorMask_SRI.nii.gz': "tumor",
    'T1_to_SRI_brain.nii.gz': "brain",
}
sample["Result"] = sample.pop("Filename").map(filetypes)

# Sort the dataframe
sample.drop(columns=["Subject", "Execution"], inplace=True)
sample.set_index(["Result", "Version"], inplace=True)
sample

Unnamed: 0_level_0,Unnamed: 1_level_0,Img
Result,Version,Unnamed: 2_level_1
tumor,v181,<class 'nibabel.nifti1.Nifti1Image'>\ndata sha...
tumor,v190,<class 'nibabel.nifti1.Nifti1Image'>\ndata sha...
brain,v181,<class 'nibabel.nifti1.Nifti1Image'>\ndata sha...
brain,v190,<class 'nibabel.nifti1.Nifti1Image'>\ndata sha...


In [46]:
background = sample["Img"]["brain", "v181"]

def show_tumor(tumor: np.ndarray, brain: np.ndarray, ax: plt.Axes=plt):
    if brain is not None:
        ax.imshow(brain, cmap='bone', origin="lower")
    tumor[tumor==0] = np.nan
    ax.imshow(tumor, origin="lower")
    ax.axis('off')

def make_diff(tumor_a, tumor_b):
    values = np.unique(tumor_a)
    diff = np.zeros(np.shape(tumor_a))
    for val in values:
        diff[(tumor_a == val) ^ (tumor_b == val)] = val
    return diff

@interact
def show_diff(z=(0,150), sigma=(0, 1, 0.1)):

    brain = background.get_fdata()[:,:,z]
    _, (ax_181, ax_diff, ax_190) = plt.subplots(1, 3, figsize=(15,5))

    tumor_181 = sample["Img"]["tumor", "v181"].get_fdata()[:,:,z]
    show_tumor(tumor_181, brain, ax_181)
    ax_181.set_title("Version 1.8.1")
    
    tumor_190 = sample["Img"]["tumor", "v190"].get_fdata()[:,:,z]
    show_tumor(tumor_190, brain, ax_190)
    ax_190.set_title("Version 1.9.0")

    tumor_diff = make_diff(tumor_181, tumor_190)
    tumor_diff = gaussian_filter(tumor_diff, sigma=sigma)
    show_tumor(tumor_diff, brain, ax_diff)
    ax_diff.set_title("Difference")

    plt.show()

interactive(children=(IntSlider(value=75, description='z', max=150), FloatSlider(value=0.0, description='sigma…