In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import *
# import imageio as iio
# from skimage import io
import nibabel as nib
# from nilearn import plotting
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from ipywidgets import interact
from scipy.ndimage import gaussian_filter

from src.VipSession import VipSession

In [17]:
VipSession.init();


----------------------------------
| You are communicating with VIP |
----------------------------------



In [4]:
VipSession.show_pipeline("brats")


Available pipelines
-------------------
BraTSPipeline/1.8.1
BraTSPipeline/1.8.1_cvmfs
BraTSPipeline/1.9.0_captk
BraTSPipeline/1.9.0_cvmfs
BraTSPipelineWithFuzzy/1.8.1
BraTSPipelineWithFuzzy/1.9.0
-------------------


In [5]:
pipelines = {
    "v181": "BraTSPipeline/1.8.1_cvmfs", 
    #"v190": "BraTSPipeline/1.9.0_cvmfs"
}

In [6]:
VipSession.show_pipeline("bratspipeline/1.8.1_cvmfs")

------------------------------------------
name: BraTSPipeline | version: 1.8.1_cvmfs
------------------------------------------
pipeline_id: BraTSPipeline/1.8.1_cvmfs
------------------------------------------
input_settings: {
  "brainTumor": "[String] Flag whether to segment brain tumors or not. Defaults to 1. This uses DeepMedic: https://cbica.github.io/CaPTk/seg_DL.html",
  "t1ceImage": "[File] Input structural T1-weighted post-contrast image",
  "t2Image": "[File] Input structural T2-weighted contrast image",
  "skullStrip": "[String] Flag whether to skull strip or not. Defaults to 1. This uses DeepMedic: https://cbica.github.io/CaPTk/seg_DL.html",
  "appliOutputDir": "[String] Application output directory for final output",
  "flImage": "[File] Input structural FLAIR contrast image",
  "patientID": "[Optional][String] Patient ID to pre-pend to final output file names. If empty, final output is of the form ${modality}_to_SRI.nii.gz",
  "t1Image": "[File] Input structural T1-weigh

In [7]:
input_dir = Path("examples/Repro-Brats/data/sample_of_sample")

input_settings = {
    "t1Image": [(subject / "T1.nii.gz") for subject in input_dir.iterdir()],
    "t1ceImage": [(subject / "T1GD.nii.gz") for subject in input_dir.iterdir()],
    "t2Image": [(subject / "T2.nii.gz") for subject in input_dir.iterdir()],
    "flImage": [(subject / "T2-FLAIR.nii.gz") for subject in input_dir.iterdir()],
    "appliOutputDir": [str(subject.name) for subject in input_dir.iterdir()],
    # "brainTumor": 1,
    # "skullStrip": 1
    "patientID": 'No_value_provided'
}
input_settings

{'t1Image': [PosixPath('examples/Repro-Brats/data/sample_of_sample/UPENN-GBM-00002/T1.nii.gz')],
 't1ceImage': [PosixPath('examples/Repro-Brats/data/sample_of_sample/UPENN-GBM-00002/T1GD.nii.gz')],
 't2Image': [PosixPath('examples/Repro-Brats/data/sample_of_sample/UPENN-GBM-00002/T2.nii.gz')],
 'flImage': [PosixPath('examples/Repro-Brats/data/sample_of_sample/UPENN-GBM-00002/T2-FLAIR.nii.gz')],
 'appliOutputDir': ['UPENN-GBM-00002'],
 'patientID': 'No_value_provided'}

In [8]:
# Upload the data
upload = VipSession(
        session_name="brats-upload",
        pipeline_id=list(pipelines.values())[0],
        input_dir=input_dir,
        input_settings=input_settings
).upload_inputs()


<<< SESSION 'brats-upload' >>>

Output directory: vip_outputs/brats-upload
<< Session restored from its output directory

Pipeline ID: 'BraTSPipeline/1.8.1_cvmfs' --> checked
Input Settings --> parsed
Input Directory: 'examples/Repro-Brats/data/sample_of_sample' --> checked


<<< UPLOAD INPUTS >>>

Checking references to the dataset within Input Settings ... OK.

Uploading the dataset on VIP
-----------------------------
Cloning: examples/Repro-Brats/data/sample_of_sample ... Already on VIP.
Cloning: examples/Repro-Brats/data/sample_of_sample/UPENN-GBM-00002 ... Already on VIP.
-----------------------------
Everything is on VIP.

>> Session saved



In [9]:
session = {}
for version in pipelines:
    # Get the inputs
    session[version] = VipSession(
        session_name="repro-BraTS_%s" % version,
    ).get_inputs(upload, get_settings=True)


<<< SESSION 'repro-BraTS_v181' >>>

Output directory: vip_outputs/repro-BraTS_v181
<< Session restored from its output directory

Sessions 'repro-BraTS_v181' and 'brats-upload' share the same inputs on VIP.



In [16]:
VipSession.init()
# Launch pipelines in parallel
for version in pipelines:
    # Get the inputs
    session[version].launch_pipeline(pipelines[version])


<<< LAUNCH PIPELINE >>>

Parameter checks
----------------
Pipeline identifier: Pipeline ID: 'BraTSPipeline/1.8.1_cvmfs' --> unchecked


KeyboardInterrupt: 

In [14]:
VipSession.show_pipeline()


Available pipelines
-------------------
BraTSPipeline/1.8.1
BraTSPipeline/1.8.1_cvmfs
BraTSPipeline/1.9.0_captk
BraTSPipeline/1.9.0_cvmfs
BraTSPipelineWithFuzzy/1.8.1
BraTSPipelineWithFuzzy/1.9.0
CQUEST/0.1.1
CQUEST_fuzzy/0.1
ct-tiqua/1.4
ct-tiqua/2.2
Freesurfer (recon-all)/0.3.7
Freesurfer (recon-all)/0.3.8
FSL - FLIRT/0.3
FSL-FIRST/0.1
fsl_bet-basic/6
fsl_bet-full/6
fsl_bet-full_CVMFS_beta/6.0.5.1
fsl_bet-full_CVMFS_container/6.0.5.1
fsl_bet-full_CVMFS_guix/6.0.5.1
fsl_bet-test/6.0.1
fsl_bet_CVMFS_client_guix/6.0.5.1
GateLab/0.7.1
GateLab/0.7.4
Grep-GD/1.0
GrepTest/2.0
GrepTest/2.1
GrepTest/2.2
GrepTest/2.3
GrepTest/2.4
LCModel/0.1
Nextflow_simple/0.0
NISOx_preprocessing/5.0.11
NISOx_preprocessing/fsl_v.6.0.5.1_in_data_R2.0.4
Preprocess/0.1
SimuBloch/0.4
SimuBloch/0.5
STA_Simulator/0.1
-------------------


In [12]:
# Monitor
for version in pipelines:
    session[version].monitor_workflows()


<<< MONITOR WORKFLOW >>>

Updating worflow inventory ... Done.
1 execution(s) ended with success:
	 workflow-qffCBQ , started on: 2023/06/08 12:32:50
1 execution(s) is/are currently running on VIP:
	 workflow-08f4n4 , started on: 2023/06/08 13:51:33

-------------------------------------------------------------
The current proccess will wait until all executions are over.
Their progress can be monitored on VIP portal:
	https://vip.creatis.insa-lyon.fr/
-------------------------------------------------------------
All executions are over.
All executions (2) ended with success.

>> Session saved


<<< MONITOR WORKFLOW >>>

Updating worflow inventory ... Done.
All executions (2) ended with success.

>> Session saved



In [13]:
# Download
for version in pipelines:
    session[version].download_outputs()


<<< DOWNLOAD OUTPUTS >>>

Updating workflow status ... Done.

Downloading pipeline outputs to: vip_outputs/repro-BraTS_v181
--------------------------------
[1/2] Outputs from: workflow-qffCBQ | Started on: 2023/06/08 12:32:50 | Status: Finished
	Already in: vip_outputs/repro-BraTS_v181/08-06-2023_12:32:51
[2/2] Outputs from: workflow-08f4n4 | Started on: 2023/06/08 13:51:33 | Status: Finished
	New directory: vip_outputs/repro-BraTS_v181/08-06-2023_13:51:34
	[1/2] Downloading file (205.1MB): UPENN-GBM-00002.tar.gz ... Done.
		Extracting archive content ... Done.
	[2/2] Downloading file (222.3MB): UPENN-GBM-00005.tar.gz ... Done.
		Extracting archive content ... Done.
	Done for all files.
--------------------------------
Done for all executions.


<<< DOWNLOAD OUTPUTS >>>

Updating workflow status ... Done.

Downloading pipeline outputs to: vip_outputs/repro-BraTS_v190
--------------------------------
[1/2] Outputs from: workflow-fuxeBk | Started on: 2023/06/08 12:33:14 | Status: Finis

## Understanding the results

In [2]:
# Builtins
import matplotlib.pyplot as plt
from pathlib import *
# Installed
import nibabel as nib
import numpy as np
import pandas as pd
from ipywidgets import interact
from parse import parse
from scipy.ndimage import gaussian_filter

In [4]:
# Define a filename for each type of result
filenames = {
    "tumor": 'brainTumorMask_SRI.nii.gz',
    "brain": 'T1_to_SRI_brain.nii.gz'
}
# Get the result directory
res_dir = Path("data/results")
# Get 1 tumor file and 1 brain scan
tumor_file = next(res_dir.rglob('brainTumorMask_SRI.nii.gz'))
brain_file = next(res_dir.rglob('T1_to_SRI_brain.nii.gz'))
# Display
print("\n".join([str(tumor_file), str(brain_file)]))

data/results/v181/exec_1/UPENN-GBM-00005/brainTumorMask_SRI.nii.gz
data/results/v181/exec_1/UPENN-GBM-00005/T1_to_SRI_brain.nii.gz


In [5]:
brain = nib.load(brain_file).get_fdata()
tumor = nib.load(tumor_file).get_fdata()
tumor[tumor==0] = np.nan

def show_tumor_2D(tumor: np.ndarray, brain: np.ndarray, ax: plt.Axes):
    ax.imshow(brain, cmap='bone', origin="lower")
    tumor[tumor==0] = np.nan
    ax.imshow(tumor, origin="lower")
    ax.axis('off')

@interact
def show_slices(z=(0,150)) -> None:
    _, (ax_brain, ax_tumor) = plt.subplots(1, 2, figsize=(10,5))

    ax_brain.set_title("Brain Scan")
    ax_brain.imshow(brain[:,:,z], cmap='bone', origin="lower")
    ax_brain.axis('off')

    ax_tumor.set_title("With Tumor Detection")
    ax_tumor.imshow(brain[:,:,z], cmap='bone', origin="lower")
    ax_tumor.imshow(tumor[:,:,z], origin="lower")
    ax_tumor.axis('off')

    plt.show()

interactive(children=(IntSlider(value=75, description='z', max=150), Output()), _dom_classes=('widget-interact…

## Compare Execution Results

In [38]:
# Get all result files
all_files = [str(path) for path in res_dir.rglob(filenames["tumor"])] \
          + [str(path) for path in res_dir.rglob(filenames["brain"])]
all_files[0]

'data/results/v181/exec_1/UPENN-GBM-00005/brainTumorMask_SRI.nii.gz'

In [39]:
# Results are sorted by metadata
metadata_format = "{Version}/{Execution}/{Subject}/{Filename}"
path_format = str(res_dir / metadata_format)
metadata_keys = metadata_format.replace("{","").replace("}","").split("/")

# Function to get the metadata from 1 path
def get_metadata_from_path(path: str) -> dict:
    metadata = parse(path_format, path).named
    metadata["Path"] = path
    return metadata

# Get all metadata as a dataframe
data = pd.DataFrame([get_metadata_from_path(file) for file in all_files])
data.head()

Unnamed: 0,Version,Execution,Subject,Filename,Path
0,v181,exec_1,UPENN-GBM-00005,brainTumorMask_SRI.nii.gz,data/results/v181/exec_1/UPENN-GBM-00005/brain...
1,v181,exec_1,UPENN-GBM-00002,brainTumorMask_SRI.nii.gz,data/results/v181/exec_1/UPENN-GBM-00002/brain...
2,v181,exec_2,UPENN-GBM-00005,brainTumorMask_SRI.nii.gz,data/results/v181/exec_2/UPENN-GBM-00005/brain...
3,v181,exec_2,UPENN-GBM-00002,brainTumorMask_SRI.nii.gz,data/results/v181/exec_2/UPENN-GBM-00002/brain...
4,v190,exec_1,UPENN-GBM-00005,brainTumorMask_SRI.nii.gz,data/results/v190/exec_1/UPENN-GBM-00005/brain...


# Checksums

In [32]:
from hashlib import md5
def md5sum(result: pd.Series) -> str:
    """Computes the md5sum of `file`"""
    file = get_path_from_metadata(result.to_dict())
    with open(file, "rb") as fid:
        return md5(fid.read()).hexdigest()

checksums=pd.DataFrame(data)
checksums["md5sum"] = checksums.apply(md5sum, axis=1)

# Compare executions and versions
checksums.set_index(meta).unstack("Execution")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,md5sum,md5sum
Unnamed: 0_level_1,Unnamed: 1_level_1,Execution,exec_1,exec_2
Version,Subject,Filename,Unnamed: 3_level_2,Unnamed: 4_level_2
v181,UPENN-GBM-00002,T1_to_SRI_brain.nii.gz,6308882654173696f2455ba836a8c9b7,6308882654173696f2455ba836a8c9b7
v181,UPENN-GBM-00002,brainTumorMask_SRI.nii.gz,580db3e342349e5186238fc607505a6f,580db3e342349e5186238fc607505a6f
v181,UPENN-GBM-00005,T1_to_SRI_brain.nii.gz,194b769561ab8a403e920d0403949ddb,194b769561ab8a403e920d0403949ddb
v181,UPENN-GBM-00005,brainTumorMask_SRI.nii.gz,8af1eaa18b243a45ccbe4a3e5fed90e6,8af1eaa18b243a45ccbe4a3e5fed90e6
v190,UPENN-GBM-00002,T1_to_SRI_brain.nii.gz,2428e1305d34ab92e707c327442eb214,2428e1305d34ab92e707c327442eb214
v190,UPENN-GBM-00002,brainTumorMask_SRI.nii.gz,530edce583d634c5fe39c546bb379d42,530edce583d634c5fe39c546bb379d42
v190,UPENN-GBM-00005,T1_to_SRI_brain.nii.gz,62f83621bd71322e273cade77f772149,62f83621bd71322e273cade77f772149
v190,UPENN-GBM-00005,brainTumorMask_SRI.nii.gz,b226c549ceb4a7ebe021b266f4409b10,b226c549ceb4a7ebe021b266f4409b10


# Differences

In [36]:
subject = "UPENN-GBM-00005"
execution ="exec_1"

# Pre-load all files from subject and execution
images = metatada.query("Subject==@subject & Execution==@execution")
def load(result: pd.DataFrame) -> str:
    """Computes the md5sum of `file`"""
    file = get_path_from_metadata(result)
    with open(file, "rb") as fid:
        return nib.load(file)
images["Img"] = images.apply(load, axis=1)

# To make the result type more understandable, we need to map each filename to its type
filetypes = {
    'brainTumorMask_SRI.nii.gz': "tumor",
    'T1_to_SRI_brain.nii.gz': "brain",
}
images["Result"] = images.pop("Filename").map(filetypes)
images.set_index().unstack("Result")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  images["Img"] = images.apply(load, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  images["Result"] = images.pop("Filename").map(filetypes)


AttributeError: 'NoneType' object has no attribute 'append'

In [None]:

background = data["Img"][subject, "brain", pipelines[0], execution]

def show_tumor(tumor: np.ndarray, brain: np.ndarray, ax: plt.Axes=plt):
    if brain is not None:
        ax.imshow(brain, cmap='bone', origin="lower")
    tumor[tumor==0] = np.nan
    ax.imshow(tumor, origin="lower")
    ax.axis('off')

def make_diff(tumor_a, tumor_b):
    values = np.unique(tumor_a)
    diff = np.zeros(np.shape(tumor_a))
    for val in values:
        diff[(tumor_a == val) ^ (tumor_b == val)] = val
    return diff

@interact
def show_diff(z=(0,150), sigma=(0, 1, 0.1)):

    brain = background.get_fdata()[:,:,z]
    fig, (ax_181, ax_diff, ax_190) = plt.subplots(1, 3, figsize=(15,5))

    tumor_181 = data["Img"][subject, "tumor", "v181", execution].get_fdata()[:,:,z]
    show_tumor(tumor_181, brain, ax_181)
    
    tumor_190 = data["Img"][subject, "tumor", "v190", execution].get_fdata()[:,:,z]
    show_tumor(tumor_190, brain, ax_190)

    tumor_diff = make_diff(tumor_181, tumor_190)
    tumor_diff = gaussian_filter(tumor_diff, sigma=sigma)
    show_tumor(tumor_diff, brain, ax_diff)

    plt.show()

- Commencer par les exécutions ?
- Faire exemples sur un patient enregistré (données téléchargées)
    - Faire comparer 2 exécutions / versions sur même machine
    - Faire comparer les versions
- Télécharger les résultats sur la grille de VIP
    - Faire comparer 2 exécutions obtenues sur la grille (malgré Docker !)
    - Comparer avec variabilité inter-versions ?