# Cell Tracking and Lineage Construction in Live-Cell Imaging Data
---


### Global Imports

In [1]:
import os
import datetime
import errno
import argparse

import numpy as np

import deepcell

Using TensorFlow backend.


### Load the Training Data

In [2]:
# The path to the data file is currently required for `train_model_()` functions

# Change DATA_DIR if you are not using `deepcell.datasets`
DATA_DIR = "/data/npz_data/cells/HEK293/generic/movie/"                # USE LOCAL DATA INSTEAD

# DATA_FILE should be a trks file (contains 2 np arrays and a lineage dictionary)
DATA_FILE = os.path.join(DATA_DIR, '3T3_HeLa_HEK_corrected.trks')      # USE LOCAL DATA INSTEAD

# confirm the data file is available
assert os.path.isfile(DATA_FILE)

In [3]:
# Set up other required filepaths

# If the data file is in a subdirectory, mirror it in MODEL_DIR and LOG_DIR
PREFIX = os.path.relpath(os.path.dirname(DATA_FILE), DATA_DIR)

ROOT_DIR = '/data'  # TODO: Change this! Usually a mounted volume
MODEL_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'models', PREFIX))
LOG_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'logs', PREFIX))

# create directories if they do not exist
for d in (MODEL_DIR, LOG_DIR):
    try:
        os.makedirs(d)
    except OSError as exc:  # Guard against race condition
        if exc.errno != errno.EEXIST:
            raise

In [4]:
# Review the Data 

# Load the trks file
from deepcell.utils.data_utils import load_trks as load_trks

training_data = load_trks(DATA_FILE)
raw = training_data["X"]
tracked = training_data["y"]
# Each batch has a separate lineage dict (cell_id -> children)
lineages = [{cell: fields["daughters"]
             for cell, fields in tracks.items()}
            for tracks in training_data["lineages"]]

print("Image data shape: ", raw.shape)
print("Number of lineages (should equal batch size): ", len(training_data["lineages"]))


Image data shape:  (617, 40, 216, 256, 1)
Number of lineages (should equal batch size):  617


In [5]:
# View tracked results of each batch as a video
# NB: This does not render well on GitHub
from IPython.display import HTML
from deepcell.utils.plot_utils import get_js_video

# Change this value to look at other batches of data
batch = 3

# Raw
HTML(get_js_video(raw, batch=batch, cmap='gray'))

In [6]:
# Tracked
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML

# Scale the colors to match the max cell label
vmax = max(lineages[batch].keys())

def get_js_video(images, batch=0, channel=0):
    fig = plt.figure()    
    ims = []
    for i in range(images.shape[1]):
        im = plt.imshow(images[batch, i, :, :, channel], animated=True, cmap='cubehelix', vmin=0, vmax=vmax)
        ims.append([im])
        ani = animation.ArtistAnimation(fig, ims, interval=75, repeat_delay=1000)
    plt.close()
    return HTML(ani.to_jshtml())

get_js_video(tracked, batch=batch)

In [7]:
# Additional Movie Stats
for frame_idx, frame in enumerate(tracked[batch]):
    print('frame ', frame_idx)
    print('labels present in frame: ', np.unique(frame))

frame  0
labels present in frame:  [ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12.]
frame  1
labels present in frame:  [ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12.]
frame  2
labels present in frame:  [ 0.  1.  2.  3.  4.  5.  6.  7.  8. 10. 11. 12. 13. 14.]
frame  3
labels present in frame:  [ 0.  1.  2.  3.  4.  5.  6.  7.  8. 10. 11. 12. 13. 14.]
frame  4
labels present in frame:  [ 0.  1.  2.  3.  4.  5.  6.  7.  8. 10. 11. 12. 13. 14. 15.]
frame  5
labels present in frame:  [ 0.  1.  2.  3.  4.  5.  6.  7.  8. 10. 12. 13. 14. 15. 16. 17.]
frame  6
labels present in frame:  [ 0.  1.  2.  3.  4.  5.  6.  7.  8. 10. 12. 13. 14. 15. 16. 17.]
frame  7
labels present in frame:  [ 0.  1.  2.  3.  4.  5.  6.  7.  8. 10. 12. 13. 14. 17.]
frame  8
labels present in frame:  [ 0.  1.  2.  3.  4.  5.  6.  7.  8. 10. 12. 13. 14. 17.]
frame  9
labels present in frame:  [ 0.  1.  2.  3.  4.  5.  6.  7.  8. 10. 12. 13. 14. 17.]
frame  10
labels present in frame:  [ 0.  1.  2.  3.  4.  

### Set up training parameters

In [8]:
from tensorflow.keras.optimizers import SGD
from deepcell.utils.train_utils import rate_scheduler

tracking_model_name = 'tracking_model'

n_epoch = 5  # Number of training epochs
test_size = .10  # % of data saved as test

optimizer = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
lr_sched = rate_scheduler(lr=0.01, decay=0.99)

# Tracking training settings
features = {'appearance', 'distance', 'neighborhood', 'regionprop'}
min_track_length = 5
neighborhood_scale_size=30
batch_size = 128  

in_shape = (32, 32, 1) # Should this be calculated or hardcoded?
seed = 100 # To be removed

### Training the Model

#### Instantiate the tracking model

In [9]:
from deepcell import model_zoo

tracking_model = model_zoo.siamese_model(
    input_shape=in_shape,
    neighborhood_scale_size=neighborhood_scale_size,
    features=features)

#### Option 1: Train a new tracking model

In [None]:
from deepcell.training import train_model_siamese_daughter

tracking_model = train_model_siamese_daughter(
    model=tracking_model,
    dataset=DATA_FILE,  # full path to trks file
    model_name=tracking_model_name,
    optimizer=optimizer,
    batch_size=batch_size,
    min_track_length=min_track_length,
    features=features,
    neighborhood_scale_size=neighborhood_scale_size,
    n_epoch=n_epoch,
    model_dir=MODEL_DIR,
    lr_sched=lr_sched,
    rotation_range=180,
    flip=True,
    shear=False,
    class_weight=None,
    seed = seed)

#### Option 2: Load an existing tracking model

In [10]:
# Import the tracking model
#MODEL_DIR = '/data/models/'
#PREFIX = 'cells/HEK293/generic/'
MODEL_DIR = '/data/models/'
PREFIX = ''

# Re-instantiate the model and load weights
siamese_weights_file = 'tracking_model_raise.h5'
siamese_weights_file = os.path.join(MODEL_DIR, PREFIX, siamese_weights_file)

tracking_model.load_weights(siamese_weights_file)

#### Verify Model Accuracy with Confusion Matrix

In [11]:
# Using DATA_FILE from above to extract Test Data 
# Change if you are not using `deepcell.datasets`

In [12]:
import deepcell.image_generators as generators
from deepcell.utils.data_utils import get_data

train_dict, test_dict = get_data(DATA_FILE, mode='siamese_daughters', seed=seed)

datagen_test = generators.SiameseDataGenerator(
        rotation_range=0,  # randomly rotate images by 0 to rotation_range degrees
        shear_range=0,     # randomly shear images in the range (radians , -shear_range to shear_range)
        horizontal_flip=0, # randomly flip images
        vertical_flip=0)   # randomly flip images

test_iterator = generators.SiameseIterator(test_dict,
                                           datagen_test,
                                           neighborhood_scale_size=neighborhood_scale_size,
                                           min_track_length=min_track_length,
                                           features=features)

See http://scikit-image.org/docs/0.14.x/release_notes_and_installation.html#deprecations for details on how to avoid this message.
  warn(XY_TO_RC_DEPRECATION_MESSAGE)
See http://scikit-image.org/docs/0.14.x/release_notes_and_installation.html#deprecations for details on how to avoid this message.
  warn(XY_TO_RC_DEPRECATION_MESSAGE)
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


In [13]:
from sklearn.metrics import confusion_matrix

Y = []
Y_pred = []
for i in range(1,1001):
    if i % 100 == 0:
        print(".", end="")
    lst, y_true = next(test_iterator)
    y_true = list(map(np.argmax, y_true))
    y_pred = list(map(np.argmax, tracking_model.predict(lst)))
    Y.extend(y_true)
    Y_pred.extend(y_pred)

cm = confusion_matrix(Y, Y_pred)
print(cm)

..........[[10525     4     5]
 [   19 10209     9]
 [  174   655  9623]]


In [14]:
test_acc = sum(np.array(Y) == np.array(Y_pred)) / len(Y)
print('Accuracy across all three classes: ', test_acc)

# Normalize the diagonal entries of the confusion matrix
cm = cm.astype('float')/cm.sum(axis=1)[:, np.newaxis]
# Diagonal entries are the accuracies of each class
print('Accuracy for each individual class [Different, Same, Daughter]: ', cm.diagonal())

Accuracy across all three classes:  0.9722640361272139
Accuracy for each individual class [Different, Same, Daughter]:  [0.99914562 0.99726482 0.92068504]


### Track Multiple Movies and Generate Track Files

#### Run the model

In [None]:
# Using DATA_FILE from above for example Test Data 
# Change if you are not using `deepcell.datasets`

In [None]:
# Normalize raw images if needed
def image_norm(original_image):
    # NNs prefer input data that is 0 mean and unit variance
    normed_image = (original_image - np.mean(original_image)) / np.std(original_image)
    return normed_image

for batch in range(test_dict['X'].shape[0]):
    for frame in range(test_dict['X'].shape[1]):
        test_dict['X'][batch, frame, :, :, 0] = image_norm(test_dict['X'][batch, frame, :, :, 0]) 

In [119]:
# The tracking model is used in concert with other processes to track cells
# Import the neccesary tracking functionality
import deepcell.tracking
importlib.reload(deepcell.tracking)

# Define where cell tracks will be saved
PREFIX = 'cells/HEK293/generic'
TRACK_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'track_data', PREFIX))
TRACK_FILE_NAME = 'batch_'

# create directories if they do not exist
try:
    os.makedirs(TRACK_DIR)
except OSError as exc:  # Guard against race condition
    if exc.errno != errno.EEXIST:
        raise

# Depending on the number of batches you may not want to track everything at once
#num_batches = test_dict['X'].shape[0]
num_batches = 10

# Generate a cell track for each batch
#for batch in range(test_dict['X'].shape[0]):
#for batch in range(num_batches):
for batch in range(7,8):
    print(batch)
    trial = deepcell.tracking.cell_tracker(test_dict['X'][batch], test_dict['y'][batch],
                         tracking_model,
                         max_distance=200,
                         track_length=5, division=0.7, birth=0.9, death=0.9,
                         neighborhood_scale_size=30,
                         features=features)
    trial._track_cells()
    file_name = TRACK_FILE_NAME + str(batch).zfill(2) + '.trk'
    file_path = os.path.join(TRACK_DIR, file_name)
    trial.dump(file_path)

7


See http://scikit-image.org/docs/0.14.x/release_notes_and_installation.html#deprecations for details on how to avoid this message.
  warn(XY_TO_RC_DEPRECATION_MESSAGE)
See http://scikit-image.org/docs/0.14.x/release_notes_and_installation.html#deprecations for details on how to avoid this message.
  warn(XY_TO_RC_DEPRECATION_MESSAGE)
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


Tracking frame 1
Tracking frame 2
Tracking frame 3
Tracking frame 4
Tracking frame 5
Tracking frame 6
Tracking frame 7
Tracking frame 8
Tracking frame 9
New track
0.99999607
Division detected
Tracking frame 10
New track
0.9999031
Division detected
New track
0.99950874
Division detected
Tracking frame 11
Tracking frame 12
Tracking frame 13
Tracking frame 14
Tracking frame 15
Tracking frame 16
Tracking frame 17
Tracking frame 18
Tracking frame 19
Tracking frame 20
Tracking frame 21
Tracking frame 22
Tracking frame 23
Tracking frame 24
Tracking frame 25
Tracking frame 26
Tracking frame 27
Tracking frame 28
Tracking frame 29
Tracking frame 30
Tracking frame 31
Tracking frame 32
Tracking frame 33
Tracking frame 34
Tracking frame 35
Tracking frame 36
Tracking frame 37
Tracking frame 38
Tracking frame 39


#### Bundle individual track files (each batch) into one .trks file for review

In [123]:
from deepcell.utils.tracking_utils import trk_folder_to_trks
from deepcell.utils.tracking_utils import load_trks

import importlib
importlib.reload(deepcell.utils.tracking_utils)

# Define a name for the trks file
SET_NAME = 'all_batches.trks'

# Compile trk files into one trks file
trk_folder_to_trks(TRACK_DIR,SET_NAME)

### Review the results

In [124]:
# Load the file we created above to review
FILE_PATH = os.path.join(os.path.dirname(TRACK_DIR), SET_NAME)
trks = load_trks(FILE_PATH)

lineages, raw, tracked = trks["lineages"], trks["X"], trks["y"]

#### Raw and Tracked Movies

In [125]:
# View tracked results of each batch as a video
# NB: This does not render well on GitHub
from IPython.display import HTML
from deepcell.utils.plot_utils import get_js_video

importlib.reload(deepcell.utils.plot_utils)

# Change this value to look at other batches of data
#batch = 7
batch = 0

# Raw
HTML(get_js_video(raw, batch=batch, cmap='gray'))

In [126]:
# Tracked

# Scale the colors to match the max cell label
vmax = max(lineages[batch].keys())

HTML(get_js_video(tracked, batch=batch, cmap='cubehelix', vmin=0, vmax=vmax))

In [127]:
# Investigate lineage for this movie

for label, track in lineages[batch].items():
    print('label: ', label)
    print('in frames: ', track['frames'])
    print('daughters: ', track['daughters'])
    print('')

    

label:  1
in frames:  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
daughters:  []

label:  2
in frames:  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
daughters:  []

label:  3
in frames:  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
daughters:  []

label:  4
in frames:  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
daughters:  []

label:  5
in frames:  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
daughters:  []

label:  6
in frames:  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
daughters:  []

label:  7
in frames:  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 2

#### Save the Raw and Tracked Output as Images

In [None]:
import matplotlib.pyplot as plt

# Define where images (movies) will be saved
MOVIE_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'tracked_movies', PREFIX))

# create directories if they do not exist
try:
    os.makedirs(MOVIE_DIR)
except OSError as exc:  # Guard against race condition
    if exc.errno != errno.EEXIST:
        raise
        
channel = 0 # These images should only have one channel
for i in range(raw.shape[1]):
    name_raw = os.path.join(MOVIE_DIR,'raw_frame_{:02}_.png'.format(i))
    name_tracked = os.path.join(MOVIE_DIR,'tracked_frame_{:02}_.png'.format(i))
    plt.imsave(name_raw, raw[batch, i, :, :, channel], cmap='gray')
    plt.imsave(name_tracked, tracked[batch,i, :, :, channel], cmap='cubehelix', vmin=0, vmax=30)

### Bechmarking

#### Save Cell Lineages in an ISBI-Formatted Output txt

The ISBI Cell Tracking Challenge requires a text file (man_track.txt) that represents a batch's cell lineage as an acyclic graph. The format of this file is as follows: Every line corresponds to a single track that is encoded by four numbers separated by a space -  
L B E P  
where L is a unique label of the track (label of markers, 16-bit positive value),  
B is a zero-based index of the frame in which the track begins,  
E is a zero-based index of the frame in which the track ends,  
P is the label of the parent track (0 is used when no parent is defined)

N.B. DeepCell's unique approach allows for cells to be tracked even if it momentarily leaves the frame. This is not possible in convential tracking algorithms, so ISBI considers a cell's track to have ended once it leaves the frame. We adjust the output here to keep with ISBI's formatting (ie. each track only contains contiguous frames).

In [None]:
## TO DELETE:
from deepcell.utils.tracking_utils import load_trks
FILE_PATH = '/data/track_data/cells/HEK293/all_batches.trks'

trks = load_trks(FILE_PATH)

lineages, raw, tracked = trks["lineages"], trks["X"], trks["y"]

In [None]:
for batch, batch_info in enumerate(lineages):
    print('batch number: ', batch)
    for label in batch_info:
        first_frame = np.amin(batch_info[label]['frames'])
        last_frame = np.amax(batch_info[label]['frames'])
        if batch_info[label]['parent']:
            parent = batch_info[label]['parent']
        else:
            parent = 0
        print(label, first_frame, last_frame, parent) 

In [None]:
for batch, batch_info in enumerate(lineages):
    print('batch number: ', batch)
    for label in list(batch_info):
        batch_info = contig_tracks(label, batch_info)
        
        first_frame = np.amin(batch_info[label]['frames'])
        last_frame = np.amax(batch_info[label]['frames'])
        if batch_info[label]['parent']:
            parent = batch_info[label]['parent']
        else:
            parent = 0

        print(label, first_frame, last_frame, parent) 
        

In [None]:
def create_new_ISBI_track(batch_info, old_label, frames, daughters, frame_div):
    
    new_track = len(batch_info.keys())
    new_label = new_track + 1
         
    batch_info[new_label] = {}
    batch_info[new_label]['old_label'] = old_label
    batch_info[new_label]['label'] = new_label

    batch_info[new_label]['frames'] = frames
    batch_info[new_label]['daughters'] = daughters
    batch_info[new_label]['frame_div'] = frame_div
    batch_info[new_label]['parent'] = None

#    y[frame][self.y[frame] == old_label] = new_label
        
    return batch_info

In [None]:
def contig_tracks(label, batch_info):
    
    original_label = label
    frames = batch_info[original_label]['frames']
    final_frame_idx = len(frames) - 1
   
    for frame_idx, frame in enumerate(frames):
        next_con_frame = frame + 1
        # If the next frame is available and contiguous we should move on to the next frame. Otherwise:
        # If the next frame is available and NONcontiguous we should separate this track into two 
        if frame_idx + 1 <= final_frame_idx and next_con_frame != frames[frame_idx + 1]:
            contig_end_idx = frame_idx

            next_trk_frames = frames[frame_idx+1:]
            daughters = batch_info[original_label]['daughters']
            frame_div = batch_info[original_label]['frame_div']
        
            # Create a new track to hold the information from this frame forward and add it to the batch
            batch_info = create_new_ISBI_track(batch_info, original_label, next_trk_frames, daughters, frame_div)
            
            # Adjust the info for the current track to vacate the new track info
            batch_info[original_label]['frames'] = frames[0:contig_end_idx]
            batch_info[original_label]['daughters'] = []
            batch_info[original_label]['frame_div'] = None
            
            # Because we are splitting tracks recursively, we stop here
            break
        
        # If the current frame is the last frame then were done
        # Either the last frame is contiguous and we don't alter batch_info
        # or it's not and it's been made into a new track by the previous iteration of the loop
        
    return batch_info
        
        
        

In [None]:
x = [5, 6, 7, 8]
x[3:]

In [None]:
print(max(batch_info.keys()))

In [None]:
labels=[1,2]
for label in labels:
    if label == 2:
        labels.append(3)
        labels.append(4)
    print(label)

In [102]:
for x in range(7,8):
    print(x)

7


## Image Generator Tests

In [117]:
# Import image generator
from deepcell.image_generators import SiameseDataGenerator

importlib.reload(deepcell.utils.plot_utils)

image_data_generator = SiameseDataGenerator(
        rotation_range=0,  # randomly rotate images by 0 to rotation_range degrees
        shear_range=0, # randomly shear images in the range (radians , -shear_range to shear_range)
        horizontal_flip=0,  # randomly flip images
        vertical_flip=0) 

test_iterator = image_data_generator.flow(test_dict, image_data_generator, features)

NameError: name 'SiameseIterator' is not defined

In [None]:
(lst, y) = test_iterator.next()
print(lst[0].shape)