# Cell Tracking and Lineage Construction in Live-Cell Imaging Data
---


### Global Imports

In [20]:
import os
import datetime
import errno
import argparse

import numpy as np

import deepcell

### Load the Training Data

In [21]:
# Download the data (saves to ~/.keras/datasets)
filename = '3T3_NIH.trks'
(X_train, y_train), (X_test, y_test) = deepcell.datasets.tracked.nih_3t3.load_tracked_data(filename)

print('X.shape: {}\ny.shape: {}'.format(X_train.shape, y_train.shape))

X.shape: (188, 30, 154, 182, 1)
y.shape: (188, 30, 154, 182, 1)


### Set up filepath constants

In [22]:
# The path to the data file is currently required for `train_model_()` functions

# Change DATA_DIR if you are not using `deepcell.datasets`
DATA_DIR = os.path.expanduser(os.path.join('~', '.keras', 'datasets'))

# DATA_FILE should be a trks file (contains 2 np arrays and a lineage dictionary)
DATA_FILE = os.path.join(DATA_DIR, filename)

# confirm the data file is available
assert os.path.isfile(DATA_FILE)

In [23]:
# Set up other required filepaths

# If the data file is in a subdirectory, mirror it in MODEL_DIR and LOG_DIR
PREFIX = os.path.relpath(os.path.dirname(DATA_FILE), DATA_DIR)

ROOT_DIR = '/data'  # TODO: Change this! Usually a mounted volume
MODEL_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'models', PREFIX))
LOG_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'logs', PREFIX))

# create directories if they do not exist
for d in (MODEL_DIR, LOG_DIR):
    try:
        os.makedirs(d)
    except OSError as exc:  # Guard against race condition
        if exc.errno != errno.EEXIST:
            raise

### Set up training parameters

In [24]:
from tensorflow.keras.optimizers import SGD
from deepcell.utils.train_utils import rate_scheduler

tracking_model_name = 'tracking_model'

n_epoch = 5  # Number of training epochs
test_size = .10  # % of data saved as test

optimizer = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
lr_sched = rate_scheduler(lr=0.01, decay=0.99)

# Tracking training settings
features = {'appearance', 'distance', 'neighborhood', 'regionprop'}
min_track_length = 5
neighborhood_scale_size=30
batch_size = 128  
crop_dim = 32

in_shape = (crop_dim, crop_dim, 1)
seed = None # Only needed for accuracy verification

### Training the Model

#### Instantiate the tracking model

In [25]:
from deepcell import model_zoo

tracking_model = model_zoo.siamese_model(
    input_shape=in_shape,
    neighborhood_scale_size=neighborhood_scale_size,
    features=features)

#### Option 1: Train a new tracking model

In [26]:
from deepcell.training import train_model_siamese_daughter

tracking_model = train_model_siamese_daughter(
    model=tracking_model,
    dataset=DATA_FILE,  # full path to trks file
    model_name=tracking_model_name,
    optimizer=optimizer,
    batch_size=batch_size,
    crop_dim=crop_dim,
    min_track_length=min_track_length,
    features=features,
    neighborhood_scale_size=neighborhood_scale_size,
    n_epoch=n_epoch,
    model_dir=MODEL_DIR,
    lr_sched=lr_sched,
    rotation_range=180,
    flip=True,
    shear=False,
    class_weight=None,
    seed=seed)

training on dataset: /root/.keras/datasets/3T3_NIH.trks
saving model at: /data/models/tracking_model.h5
saving loss at: /data/models/tracking_model.npz
X_train shape: (212, 30, 154, 182, 1)
y_train shape: (212, 30, 154, 182, 1)
X_test shape: (24, 30, 154, 182, 1)
y_test shape: (24, 30, 154, 182, 1)
Output Shape: (None, 3)
Training on 1 GPUs
Using real-time data augmentation.


See http://scikit-image.org/docs/0.14.x/release_notes_and_installation.html#deprecations for details on how to avoid this message.
  warn(XY_TO_RC_DEPRECATION_MESSAGE)
See http://scikit-image.org/docs/0.14.x/release_notes_and_installation.html#deprecations for details on how to avoid this message.
  warn(XY_TO_RC_DEPRECATION_MESSAGE)
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


total_train_pairs: 1441320.0
total_test_pairs: 187560.0
batch size: 128
validation_steps:  1465.0
Epoch 1/5


StopIteration: Parent cell should not be in last frame of movie

#### Option 2: Load an existing tracking model

In [27]:
# Re-instantiate the model and load weights
siamese_weights_file = 'tracking_model_raise.h5'
siamese_weights_file = os.path.join(MODEL_DIR, PREFIX, siamese_weights_file)

tracking_model.load_weights(siamese_weights_file)

#### (Optional) Investigate Model Performance with a Confusion Matrix - Requires a Seed Value

In [28]:
# Using DATA_FILE from above to extract Test Data 
# Change if you are not using `deepcell.datasets`

In [35]:
import deepcell.image_generators as generators
from deepcell.utils.data_utils import get_data

train_dict, test_dict = get_data(DATA_FILE, mode='siamese_daughters', seed=seed)

datagen_test = generators.SiameseDataGenerator(
        rotation_range=0,  # randomly rotate images by 0 to rotation_range degrees
        shear_range=0,     # randomly shear images in the range (radians , -shear_range to shear_range)
        horizontal_flip=0, # randomly flip images
        vertical_flip=0)   # randomly flip images

test_iterator = generators.SiameseIterator(test_dict,
                                           datagen_test,
                                           neighborhood_scale_size=neighborhood_scale_size,
                                           min_track_length=min_track_length,
                                           features=features)

See http://scikit-image.org/docs/0.14.x/release_notes_and_installation.html#deprecations for details on how to avoid this message.
  warn(XY_TO_RC_DEPRECATION_MESSAGE)
See http://scikit-image.org/docs/0.14.x/release_notes_and_installation.html#deprecations for details on how to avoid this message.
  warn(XY_TO_RC_DEPRECATION_MESSAGE)
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


In [36]:
from sklearn.metrics import confusion_matrix

Y = []
Y_pred = []
for i in range(1,1001):
    if i % 100 == 0:
        print(".", end="")
    lst, y_true = next(test_iterator)
    y_true = list(map(np.argmax, y_true))
    y_pred = list(map(np.argmax, tracking_model.predict(lst)))
    Y.extend(y_true)
    Y_pred.extend(y_pred)

print("")
cm = confusion_matrix(Y, Y_pred)
print(cm)

..........
[[9818  307   14]
 [5043 5221  103]
 [4738  775 4427]]


In [37]:
test_acc = sum(np.array(Y) == np.array(Y_pred)) / len(Y)
print('Accuracy across all three classes: ', test_acc)

# Normalize the diagonal entries of the confusion matrix
cm = cm.astype('float')/cm.sum(axis=1)[:, np.newaxis]
# Diagonal entries are the accuracies of each class
print('Accuracy for each individual class [Different, Same, Daughter]: ', cm.diagonal())

Accuracy across all three classes:  0.6393614924784865
Accuracy for each individual class [Different, Same, Daughter]:  [0.96834007 0.50361725 0.44537223]


### Track Multiple Movies and Generate Track Files

#### Run the model

In [None]:
# Using DATA_FILE from above for example Test Data 
# Change if you are not using `deepcell.datasets`

In [33]:
# Normalize raw images if needed
def image_norm(original_image):
    # NNs prefer input data that is 0 mean and unit variance
    normed_image = (original_image - np.mean(original_image)) / np.std(original_image)
    return normed_image

for batch in range(test_dict['X'].shape[0]):
    for frame in range(test_dict['X'].shape[1]):
        test_dict['X'][batch, frame, :, :, 0] = image_norm(test_dict['X'][batch, frame, :, :, 0]) 

In [34]:
# The tracking model is used in concert with other processes to track cells
# Import the neccesary tracking functionality
import deepcell.tracking

# Define where cell tracks will be saved
TRACK_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'track_data', PREFIX))
TRACK_FILE_NAME = 'batch_'

# create directories if they do not exist
try:
    os.makedirs(TRACK_DIR)
except OSError as exc:  # Guard against race condition
    if exc.errno != errno.EEXIST:
        raise

# Depending on the number of batches you may not want to track everything at once
#num_batches = test_dict['X'].shape[0]
num_batches = 2

# Generate a cell track for each batch
#for batch in range(test_dict['X'].shape[0]):
for batch in range(num_batches):
    trial = deepcell.tracking.cell_tracker(test_dict['X'][batch], test_dict['y'][batch],
                         tracking_model,
                         max_distance=50,
                         track_length=5, division=0.5, birth=0.9, death=0.9,
                         neighborhood_scale_size=30,
                         features=features)
    trial._track_cells()
    file_name = TRACK_FILE_NAME + str(batch).zfill(2) + '.trk'
    file_path = os.path.join(TRACK_DIR, file_name)
    trial.dump(file_path)

See http://scikit-image.org/docs/0.14.x/release_notes_and_installation.html#deprecations for details on how to avoid this message.
  warn(XY_TO_RC_DEPRECATION_MESSAGE)
See http://scikit-image.org/docs/0.14.x/release_notes_and_installation.html#deprecations for details on how to avoid this message.
  warn(XY_TO_RC_DEPRECATION_MESSAGE)
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


Tracking frame 1
Tracking frame 2
Tracking frame 3
New track
Tracking frame 4
New track
Tracking frame 5
Tracking frame 6
Tracking frame 7
Tracking frame 8
Tracking frame 9
Tracking frame 10
Tracking frame 11
Tracking frame 12
Tracking frame 13
Tracking frame 14
Tracking frame 15
Tracking frame 16
Tracking frame 17
Tracking frame 18
Tracking frame 19
Tracking frame 20
Tracking frame 21
Tracking frame 22
Tracking frame 23
Tracking frame 24
Tracking frame 25
New track
0.9999989
Division detected
New track
0.99999917
Division detected
Tracking frame 26
Tracking frame 27
Tracking frame 28
Tracking frame 29
Tracking frame 1
Tracking frame 2
Tracking frame 3
Tracking frame 4
Tracking frame 5
Tracking frame 6
Tracking frame 7
Tracking frame 8
Tracking frame 9
Tracking frame 10
Tracking frame 11
Tracking frame 12
Tracking frame 13
Tracking frame 14
Tracking frame 15
Tracking frame 16
Tracking frame 17
Tracking frame 18
Tracking frame 19
Tracking frame 20
Tracking frame 21
Tracking frame 22
Tra

#### Bundle individual track files (each batch) into one .trks file for review

In [38]:
from deepcell.utils.tracking_utils import trk_folder_to_trks
from deepcell.utils.tracking_utils import load_trks

# Define a name for the trks file
SET_NAME = 'all_batches.trks'

# Compile trk files into one trks file
trk_folder_to_trks(TRACK_DIR,SET_NAME)

### Review the results

In [39]:
# Load the file we created above to review
FILE_PATH = os.path.join(os.path.dirname(TRACK_DIR), SET_NAME)
trks = load_trks(FILE_PATH)

lineages, raw, tracked = trks["lineages"], trks["X"], trks["y"]

#### Raw and Tracked Movies

In [40]:
# View tracked results of each batch as a video
# NB: This does not render well on GitHub
from IPython.display import HTML
from deepcell.utils.plot_utils import get_js_video

# Change this value to look at other batches of data
batch = 0

# Raw
HTML(get_js_video(raw, batch=batch, cmap='gray'))

In [41]:
# Tracked

# Scale the colors to match the max cell label
vmax = max(lineages[batch].keys())

HTML(get_js_video(tracked, batch=batch, cmap='cubehelix', vmin=0, vmax=vmax))

#### Save the Raw and Tracked Output as Images

In [42]:
import matplotlib.pyplot as plt

# Define where images (movies) will be saved
MOVIE_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'tracked_movies', PREFIX))

# create directories if they do not exist
try:
    os.makedirs(MOVIE_DIR)
except OSError as exc:  # Guard against race condition
    if exc.errno != errno.EEXIST:
        raise
        
# Scale the colors to match the max cell label
vmax = max(lineages[batch].keys())
        
channel = 0 # These images should only have one channel
for i in range(raw.shape[1]):
    name_raw = os.path.join(MOVIE_DIR,'raw_frame_{:02}_.png'.format(i))
    name_tracked = os.path.join(MOVIE_DIR,'tracked_frame_{:02}_.png'.format(i))
    plt.imsave(name_raw, raw[batch, i, :, :, channel], cmap='gray')
    plt.imsave(name_tracked, tracked[batch,i, :, :, channel], cmap='cubehelix', vmin=0, vmax=vmax)

### Bechmarking

#### Save Cell Lineages in an ISBI-Formatted Output txt

The ISBI Cell Tracking Challenge requires a text file (man_track.txt) that represents a batch's cell lineage as an acyclic graph. The format of this file is as follows: Every line corresponds to a single track that is encoded by four numbers separated by a space -  
L B E P  
where L is a unique label of the track (label of markers, 16-bit positive value),  
B is a zero-based index of the frame in which the track begins,  
E is a zero-based index of the frame in which the track ends,  
P is the label of the parent track (0 is used when no parent is defined)

N.B. DeepCell's unique approach allows for cells to be tracked even if it momentarily leaves the frame. This is not possible in convential tracking algorithms, so ISBI considers a cell's track to have ended once it leaves the frame. We adjust the output here to keep with ISBI's formatting (ie. each track only contains contiguous frames).

In [43]:
# Load the file we created above and want to translate into ISBI format
FILE_PATH = os.path.join(os.path.dirname(TRACK_DIR), SET_NAME)
trks = load_trks(FILE_PATH)

lineages, raw, tracked = trks["lineages"], trks["X"], trks["y"]

In [44]:
# Define where benchmark data will be saved
BENCHMARK_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'tracking_benchmarks', PREFIX))

# create directories if they do not exist
try:
    os.makedirs(BENCHMARK_DIR)
except OSError as exc:  # Guard against race condition
    if exc.errno != errno.EEXIST:
        raise

In [45]:
## DELETE THIS CELL (TROUBLESHOOTING ONLY):
from deepcell.utils.tracking_utils import load_trks

import importlib
importlib.reload(deepcell)

#FILE_PATH = '/data/track_data/cells/HEK293/all_batches.trks'
FILE_PATH = '/data/data/ISBI_Tracking_Challenge/HeLa/nuc/HeLa_Training/01_GT/HEK_model_track/HeLa_01_GT_Batch00.trk'

trks = load_trks(FILE_PATH)

lineages, raw, tracked = trks["lineages"], trks["X"], trks["y"]
####### END DELETE

In [46]:
# Adds a new track to the lineage and swap the labels accordingly in the images
def create_new_ISBI_track(batch_tracked, batch_info, old_label, frames, daughters, frame_div):
    
    new_track = len(batch_info.keys())
    new_label = new_track + 1
         
    batch_info[new_label] = {}
    batch_info[new_label]['old_label'] = old_label
    batch_info[new_label]['label'] = new_label

    batch_info[new_label]['frames'] = frames
    batch_info[new_label]['daughters'] = daughters
    batch_info[new_label]['frame_div'] = frame_div
    batch_info[new_label]['parent'] = None

    for frame in frames:
        batch_tracked[frame][batch_tracked[frame] == old_label] = new_label
        
    return batch_info, batch_tracked

In [47]:
# Check for contiguous tracks (tracks should only consist of consecutive tracks)
# Split one track into two if neccesary
def contig_tracks(label, batch_info, batch_tracked):
    
    original_label = label
    frames = batch_info[original_label]['frames']
    final_frame_idx = len(frames) - 1
       
    for frame_idx, frame in enumerate(frames):
        next_con_frame = frame + 1
        # If the next frame is available and contiguous we should move on to the next frame. Otherwise:
        # If the next frame is available and NONcontiguous we should separate this track into two 
        if frame_idx + 1 <= final_frame_idx and next_con_frame != frames[frame_idx + 1]:
            contig_end_idx = frame_idx

            next_trk_frames = frames[frame_idx+1:]
            daughters = batch_info[original_label]['daughters']
            frame_div = batch_info[original_label]['frame_div']
                                  
            # Create a new track to hold the information from this frame forward and add it to the batch
            batch_info, batch_tracked = create_new_ISBI_track(batch_tracked, batch_info, original_label, 
                                                                next_trk_frames, daughters, frame_div)
                        
            # Adjust the info for the current track to vacate the new track info
            batch_info[original_label]['frames'] = frames[0:contig_end_idx+1]
            batch_info[original_label]['daughters'] = []
            batch_info[original_label]['frame_div'] = None
                        
            # Because we are splitting tracks recursively, we stop here
            break
        
        # If the current frame is the last frame then were done
        # Either the last frame is contiguous and we don't alter batch_info
        # or it's not and it's been made into a new track by the previous iteration of the loop

    return batch_info, batch_tracked


In [48]:
# Translate track data to ISBI format and provide outputs for benchmarking
# Record lineage data in txt as it is generated

for batch, batch_info in enumerate(lineages):
    print('batch number: ', batch)
   
    # Build subdirectories to hold benchmark info
    B_SUB_DIR = os.path.join(BENCHMARK_DIR, '{:02}_RES'.format(batch+1))
    
    # Create directory if it doesn't exist
    try:
        os.makedirs(B_SUB_DIR)
    except OSError as exc:  # Guard against race condition
        if exc.errno != errno.EEXIST:
            raise
    
    # Prepare output txt
    text_file = open(os.path.join(B_SUB_DIR, "res_track.txt"), "w")
    
    batch_tracked = tracked[batch]
    labels = list(batch_info.keys())
    max_label = max(labels)
    
    for label in labels:
        batch_info, batch_tracked = contig_tracks(label, batch_info, batch_tracked)
               
        first_frame = np.amin(batch_info[label]['frames'])          
        last_frame = np.amax(batch_info[label]['frames'])
        if batch_info[label]['parent']:
            parent = batch_info[label]['parent']
        else:
            parent = 0

        print(label, first_frame, last_frame, parent)
        text_file.write('{} {} {} {}\n'.format(label, first_frame, last_frame, parent))
        
        # Check if the track need to be split
        if max(batch_info.keys()) > max_label:
            # If so, a new track was added
            new_max_label = max(batch_info.keys())
            labels.append(new_max_label)
            max_label = new_max_label
        
    text_file.close()
    lineages[batch] = batch_info
    tracked[batch] = batch_tracked
        

batch number:  0
1 0 17 0
2 0 37 0
3 0 1 0
4 0 16 0
5 0 22 0
6 0 3 0
7 0 31 0
8 0 0 0
9 0 33 0
10 0 30 0
11 0 11 0
12 0 30 0
13 0 13 0
14 0 7 0
15 0 4 0
16 0 91 0
17 0 21 0
18 0 7 0
19 0 13 0
20 0 1 0
21 0 10 0
22 0 9 0
23 0 33 0
24 0 13 0
25 0 10 0
26 0 35 0
27 0 14 0
28 0 13 0
29 0 8 0
30 0 68 0
31 0 2 0
32 0 25 0
33 0 1 0
34 0 12 0
35 0 13 0
36 0 4 0
37 0 19 0
38 0 20 0
39 0 3 0
40 0 36 0
41 0 91 0
42 0 6 0
43 1 44 8
44 1 3 0
45 2 2 0
46 2 35 3
47 2 91 0
48 2 2 33
49 3 13 45
50 3 38 0
51 3 12 48
52 3 37 48
53 3 3 48
54 4 47 6
55 4 48 6
56 4 41 39
57 4 44 39
58 5 51 15
59 5 54 15
60 5 40 36
61 6 41 0
62 7 15 42
63 7 36 42
64 8 63 14
65 8 86 14
66 8 17 0
67 8 39 0
68 9 48 29
69 9 43 29
70 10 10 0
71 10 11 0
72 11 48 21
73 11 42 21
74 11 41 70
75 11 39 25
76 11 39 25
77 12 12 0
78 13 50 0
79 13 18 44
80 13 13 44
81 13 37 51
82 13 89 0
83 13 84 0
84 13 14 51
85 14 91 11
86 14 51 0
87 14 43 24
88 14 47 24
89 14 14 0
90 14 23 35
91 14 49 35
92 15 49 0
93 15 84 13
94 15 69 27
95 15 91 0
96

#### Generate new images to match the new ISBI-formatted lineage data

In [1]:
from skimage.external.tifffile import imsave

for batch, batch_info in enumerate(lineages):
    print('batch number: ', batch)
    
    # check into appropriate benchmark subdirectory
    B_SUB_DIR = os.path.join(BENCHMARK_DIR, '{:02}_RES'.format(batch+1))
    
    channel = 0 # These images should only have one channel
    for i in range(raw.shape[1]):
#        name_raw = os.path.join(B_SUB_DIR,'raw_frame_{:03}_.tif'.format(i))
        name_tracked = os.path.join(B_SUB_DIR,'mask{:03}.tif'.format(i))
#        imsave(name_raw, raw[batch, i, :, :, channel], cmap='gray')
#        imsave(name_tracked, tracked[batch, i, :, :, channel].astype('unint16'))
        imsave(name_tracked, tracked[batch, i, :, :, channel].astype('uint16'))

NameError: name 'lineages' is not defined