<div class="alert alert-block alert-info"> <b>NOTE</b> Please select the kernel <code>Python [conda env: deepcell]</code> for this notebook. </div>

# 1. Data Preparation
Resave data as a set of tiff files in order to match conventions expected by TracX.

In [3]:
import os
import sys

import numpy as np
import tensorflow as tf
from tifffile import imwrite

from deepcell.applications import NuclearSegmentation
from deepcell_tracking.trk_io import load_trks

sys.path.append('..')
import utils

In [2]:
data_dir = 'ctc-data'
ctc_dc_dir = os.path.join(data_dir, 'CTC', 'seg-dc')
ctc_gt_dir = os.path.join(data_dir, 'CTC', 'seg-gt')
raw_dir = os.path.join(data_dir, 'raw')
seg_gt_dir = os.path.join(data_dir, 'seg-gt')
seg_dc_dir = os.path.join(data_dir, 'seg-dc')

for d in [raw_dir, seg_gt_dir, seg_dc_dir, ctc_dc_dir, ctc_gt_dir]:
    if not os.path.exists(d):
        os.makedirs(d)

Load the test split of the tracking data

In [4]:
files = ['../../data/Fluo-N2DL-Hela-test/Fluo-N2DL-HeLa-test-01.trk', '../../data/Fluo-N2DL-Hela-test/Fluo-N2DL-HeLa-test-02.trk']
X, y, lineages = [], [], []
for f in files:
    data = load_trks(f)
    X.append(data['X'])
    y.append(data['y'])
    lineages.append(data['lineages'][0])

data = {
    'X': np.stack(X, axis=0),
    'y': np.stack(y, axis=0),
    'lineages': lineages
}

Load the DeepCell nuclear segmentation model to test the algorithm on predicted instead of ground truth segmentations

In [5]:
app = NuclearSegmentation.from_version(version='1.1')

INFO:root:Checking for cached data
INFO:root:Checking NuclearSegmentation-8.tar.gz against provided file_hash...
INFO:root:NuclearSegmentation-8.tar.gz with hash 507be21f0e34e59adae689f58cc03ccb already available.
INFO:root:Extracting /Users/morganschwartz/.deepcell/models/NuclearSegmentation-8.tar.gz
INFO:root:Successfully extracted /Users/morganschwartz/.deepcell/models/NuclearSegmentation-8.tar.gz into /Users/morganschwartz/.deepcell/models
2024-07-08 15:21:22.509823: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-07-08 15:21:22.509953: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M1 Max

systemMemory: 64.00 GB
maxCacheSize: 24.00 GB





In [6]:
# ChannelName_positionXXYYZZZ_timeTTTT.tif
name_template = '{}_position{:02}{:02}000_time{:05}.tif'

In [7]:
for batch_no in range(len(data['lineages'])):
    # Build subdirectories for data
    raw_subdir = os.path.join(raw_dir, '{:03}'.format(batch_no + 1))
    seg_gt_subdir = os.path.join(seg_gt_dir, '{:03}'.format(batch_no + 1))
    seg_dc_subdir = os.path.join(seg_dc_dir, '{:03}'.format(batch_no + 1))

    # Create directories if needed
    for d in (raw_subdir, seg_gt_subdir, seg_dc_subdir):
        if not os.path.exists(d):
            os.makedirs(d)

    # Pull out relevant data for this batch
    X = data['X'][batch_no]
    y = data['y'][batch_no]
    lineage = data['lineages'][batch_no]

    # Correct discontiguous tracks, which are not allowed by CTC
    y, lineage = utils.convert_to_contiguous(y, lineage)

    # Determine position of zero padding for removal
    slc = utils.find_zero_padding(X)
    X = X[slc]
    y = y[slc]

    # Determine which frames are zero padding
    frames = np.sum(y, axis=(1,2)) # True if image not blank
    good_frames = np.where(frames)[0]
    X = X[:len(good_frames)]
    y = y[:len(good_frames)]

    # Generate deepcell predictions
    y_pred = app.predict(X)

    # Save GT in CTC format
    for d in [ctc_dc_dir, ctc_gt_dir]:
        utils.save_ctc_gt(d, batch_no + 1, y[good_frames], lineage)

    # Position info for naming convention
    x_pos = batch_no + 1
    y_pos = 1

    # Save each frame of the movie as an individual tif
    channel = 0 # These images should only have one channel
    for i in range(X.shape[0]):
        name_raw = os.path.join(raw_subdir, name_template.format('nuclear', x_pos, y_pos, i+1))
        name_gt_mask = os.path.join(seg_gt_subdir, name_template.format('mask-gt', x_pos, y_pos, i+1))
        name_dc_mask = os.path.join(seg_dc_subdir, name_template.format('mask-dc', x_pos, y_pos, i+1))

        imwrite(name_raw, X[i, ..., channel])
        imwrite(name_gt_mask, y[i, ..., channel])
        imwrite(name_dc_mask, y_pred[i, ..., channel])

INFO:root:Converting image dtype to float
2024-07-08 15:21:46.128105: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2024-07-08 15:21:47.227646: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
INFO:root:Converting image dtype to float


# 2. Tracking
1. Open Matlab > 2020 and open the TracX folder
2. From the Matlab terminal, run the following 
`addpath(genpath('tracx'));`
3. Open the `run_tracx.m` script in matlab and press run.

# 3. Evaluate

In [10]:
import glob
import os
import subprocess

import numpy as np
import pandas as pd
from tifffile import imread, imwrite

from deepcell_tracking.isbi_utils import load_tiffs
from deepcell_tracking.metrics import TrackingMetrics

In [11]:
data_dir = 'ctc-data'
ctc_dir = os.path.join(data_dir, 'CTC')
ctc_dc_dir = os.path.join(ctc_dir, 'seg-dc')
ctc_gt_dir = os.path.join(ctc_dir, 'seg-gt')
seg_gt_dir = os.path.join(data_dir, 'seg-gt')
seg_dc_dir = os.path.join(data_dir, 'seg-dc')

ids = os.listdir(seg_gt_dir)

node_match_threshold = 0.6

ctc_software = '../CTC_Evaluation_Software'
operating_system = 'Mac' # or 'Mac' or 'Win'
num_digits = '3'

In [12]:
def create_new_lineage(y):
    """Create a blank lineage dict for ids that have already been
    linked via IOU. Link only based on overlap,
    so there are no divisions/daughters/parents/deaths
    Args:
        y: (np.array) label image stack.
    Returns:
        dict: a nested dict (lineage for .trk)
    """
    new_lineage = {}
    for i, frame in enumerate(y):
        # Add to frames field if ID exists
        cells_in_frame = np.unique(frame)
        cells_in_frame = np.delete(
            cells_in_frame, np.where(cells_in_frame == 0)
        )
        cells_in_frame = list(cells_in_frame)

        for cell in cells_in_frame:
            cell = int(cell)
            if cell in new_lineage:
                new_lineage[cell]["frames"].append(i)

            # Or create a new dict because its a new cell
            else:
                new_lineage[cell] = {
                    "label": cell,
                    "frames": [i],
                    "daughters": [],
                    "capped": False,
                    "frame_div": None,
                    "parent": None,
                }

    return new_lineage

Load each movie, relabel the mask to match the new tracking ids and create the lineage information

In [13]:
for res_dir in [seg_gt_dir, seg_dc_dir]:
    for i in ids:
        print(res_dir, i)
        # Find results path
        cc_path = glob.glob(os.path.join(res_dir, i, '*CellCycleResults*'))[0]
        t_path = glob.glob(os.path.join(res_dir, i, '*TrackingResults*'))[0]

        # Load TracX results
        cc_res = pd.read_csv(cc_path, sep='\t')
        t_res = pd.read_csv(t_path, sep='\t')

        y_old = load_tiffs(os.path.join(res_dir, i))

        # Create a new y array with update cell ids
        y_new = np.zeros_like(y_old)
        for _, r in t_res.iterrows():
            id_new = int(r['track_index'])
            id_old = int(r['cell_index'])
            t = int(r['cell_frame']) - 1 # Time indexed starting at 1 in TracX
            y_new[t][y_old[t] == id_old] = id_new

        # Create lineage
        lineage = create_new_lineage(y_new)

        # Assign parents and daughters
        for _, r in cc_res.iterrows():
            tid = r['track']
            # Check if a parent should be added
            if r['parent'] != 0:
                lineage[tid]['parent'] = int(r['parent'])

            # Check of daughter should be added
            if not np.isnan(r['daughter']):
                lineage[tid]['daughters'].append(int(r['daughter']))
                lineage[tid]['frame_div'] = max(lineage[tid]['frames'])

        # Correct discontiguous tracks, which are not allowed by CTC
        y_new, lineage = utils.convert_to_contiguous(y_new, lineage)

        # Save results in CTC format
        utils.save_ctc_res(os.path.join(ctc_dir, os.path.basename(res_dir)), int(i), y_new, lineage)

ctc-data/seg-gt 001
ctc-data/seg-gt 002
ctc-data/seg-dc 001
ctc-data/seg-dc 002


In [14]:
benchmarks = []

for results_dir, s in zip([ctc_gt_dir, ctc_dc_dir], ['GT', 'Deepcell']):
    for data_id in ids:
        print(data_id)
        results = {
            'model': f'TracX - {s}',
            'data_id': data_id
        }
        gt_dir = os.path.join(results_dir, f'{data_id}_GT/TRA')
        res_dir = os.path.join(results_dir, f'{data_id}_RES')

        # Deepcell benchmarking
        m = TrackingMetrics.from_isbi_dirs(gt_dir, res_dir, threshold=node_match_threshold)
        results.update(m.stats)

        # CTC metrics
        for metric, path in [('DET', 'DETMeasure'), ('SEG', 'SEGMeasure'), ('TRA', 'TRAMeasure')]:
            p = subprocess.run([os.path.join(ctc_software, operating_system, path), results_dir, data_id, num_digits],
                               stdout=subprocess.PIPE)
            outstring = p.stdout

            try:
                val = float(outstring.decode('utf-8').split()[-1])
                results[metric] = val
            except:
                print('Benchmarking failure', path, results_dir, data_id)
                print(outstring.decode('utf-8'))

        benchmarks.append(results)

df = pd.DataFrame(benchmarks)
df.to_csv('ctc-benchmarks.csv')

001
missed node 1_23 division completely
2_20 out degree = 1, daughters mismatch.
missed node 3_29 division completely
4_20 out degree = 1, daughters mismatch.
missed node 5_1 division completely
missed node 6_7 division completely
7_5 out degree = 1, daughters mismatch.
missed node 8_13 division completely
missed node 9_3 division completely
missed node 10_5 division completely
missed node 11_43 division completely
missed node 12_25 division completely
missed node 13_3 division completely
missed node 14_41 division completely
missed node 15_45 division completely
missed node 16_36 division completely
missed node 17_58 division completely
missed node 18_27 division completely
missed node 19_10 division completely
missed node 20_30 division completely
missed node 21_25 division completely
missed node 22_33 division completely
23_25 out degree = 1, daughters mismatch.
missed node 24_51 division completely
missed node 25_50 division completely
missed node 26_22 division completely
missed 