In [1]:
# Import necessary libraries
import os
import re
import tifffile
import numpy as np
import pandas as pd

from pathlib import Path
from skimage.measure import label

In [2]:
# Concatenate labels and generate lookup table
def concat_labels_generate_lookup(iRFP_path, PI_path):
    '''
    Parameters: iRFP670 labeled image path, PI labeled image path
    Returns: concatenated labeled image (with each cell relabeled), lookup table with the source (Live or Dead) for each label
    '''
    # Load the images
    iRFP = tifffile.imread(iRFP_path)
    PI = tifffile.imread(PI_path)
    
    # Offset and concatenate
    T, H, W = iRFP.shape
    combined_stack = np.zeros((T, H, W), dtype=np.uint16)
    
    lookup_entries = []
    
    label_id_counter = 1  # start labeling from 1 (0 is background)
    
    for t in range(T):
        iRFP_frame = iRFP[t]
        PI_frame = PI[t]
    
        # Create unique labels per frame for cytoplasm
        iRFP_labels = np.unique(iRFP_frame)
        iRFP_labels = iRFP_labels[iRFP_labels > 0]  # exclude background
    
        for lbl in iRFP_labels:
            mask = iRFP_frame == lbl
            combined_stack[t][mask] = label_id_counter
            lookup_entries.append({
                "FRAME": t,
                "ID": label_id_counter,
                "SOURCE": "Live"
            })
            label_id_counter += 1
    
        # Create unique labels for nuclei (offset labels)
        PI_labels = np.unique(PI_frame)
        PI_labels = PI_labels[PI_labels > 0]
    
        for lbl in PI_labels:
            mask = PI_frame == lbl
            combined_stack[t][mask] = label_id_counter
            lookup_entries.append({
                "FRAME": t,
                "ID": label_id_counter,
                "SOURCE": "Dead"
            })
            label_id_counter += 1

    return combined_stack, lookup_entries

In [3]:
# Re-maps IDs in TrackMate output based on matching xy coordinates of labels in TrackMate output and original labeled image
# Maps state of cell (Live or Dead) based on lookup table
def spots_relabel(trackmate_path, label_image_path, lookup_path, pixel_size = 1):
    '''
    Parameters: TrackMate spots file path, labeled image path, lookup table path, (optional): micron-pixel ratio
    Return: TrackMate spots file with cell labels updated to match labels in labeled image, with SOURCE column added
    '''
    # Load data
    spots_df = pd.read_csv(trackmate_path)
    spots_df.drop([0,1,2], axis=0, inplace=True) # Remove duplicated column names in from TrackMate's output
    spots_df = spots_df.reset_index()
    
    # Cleanup data
    spots_df['POSITION_X'] = pd.to_numeric(spots_df['POSITION_X'], errors='coerce')
    spots_df['POSITION_Y'] = pd.to_numeric(spots_df['POSITION_Y'], errors='coerce')
    spots_df['FRAME'] = pd.to_numeric(spots_df['FRAME'], errors='coerce')

    # Drop rows where conversion failed (i.e., invalid entries became NaN)
    spots_df.dropna(subset=['POSITION_X', 'POSITION_Y','FRAME'], inplace=True)
    
    label_stack = tifffile.imread(label_image_path)
    lookup_df = pd.read_csv(lookup_path)
    
    # Convert xy positions in TrackMate output from microns to pixels
    spots_df['X_px'] = (spots_df['POSITION_X'] / pixel_size).round().astype(int)
    spots_df['Y_px'] = (spots_df['POSITION_Y'] / pixel_size).round().astype(int)
    
    # Get label ID at each (X, Y, FRAME)
    def get_label_id(row):
        t = row['FRAME']
        y = row['Y_px']
        x = row['X_px']
        try:
            return int(label_stack[t, y, x])
        except IndexError:
            return -1  # or np.nan
    
    spots_df['ID'] = spots_df.apply(get_label_id, axis=1)
    
    # Add source info the two tables based on frame and label ID
    lookup_dict = {
    (int(row['FRAME']), int(row['ID'])): row['SOURCE']
    for _, row in lookup_df.iterrows()
    }
    # Create a dictionary with (FRAME, ID) as keys and source as value
    def get_source(row):
        key = (int(row['FRAME']), int(row['ID']))
        return lookup_dict.get(key, 'unknown')  # default to 'unknown' if no match
    # Apply it
    spots_df['SOURCE'] = spots_df.apply(get_source, axis=1)
    
    return spots_df

In [4]:
# Quality check for tracking result
def tracking_quality_check_transition(spots_df):
    """
    Given a TrackMate results DataFrame with 'Track_ID', 'FRAME', and 'SOURCE' columns,
    detect tracks where a 'Dead' label appears before the last 'Live' label.
    
    Returns a list of problematic track IDs and a summary count.
    """
    bad_tracks = []

    for track_id, group in spots_df.groupby('TRACK_ID'):
        sources = group.sort_values('FRAME')['SOURCE'].tolist()

        # Find the last index of 'Live' (from iRFP670 channel)
        try:
            last_cyto_idx = len(sources) - 1 - sources[::-1].index('Live')
        except ValueError:
            last_cyto_idx = -1  # no cytoplasm at all

        # Check if there's a 'Dead' (from PI channel) before that
        if last_cyto_idx > 0 and 'Dead' in sources[:last_cyto_idx]:
            bad_tracks.append(track_id)

    return bad_tracks

In [5]:
# Quality check for tracking result
def tracking_quality_check_dead(spots_df):
    """
    Given a TrackMate results DataFrame with 'Track_ID', 'FRAME', and 'SOURCE' columns,
    detect tracks where the entire track only has 'Dead' (from PI channel) labels.
    
    Returns a list of problematic track IDs and a summary count.
    """
    # Find the first source for each track
    first_source_per_track = (
        spots_df.sort_values(['TRACK_ID', 'FRAME'])
        .groupby('TRACK_ID')
        .first()
        .reset_index()
    )
    
    # Identify bad tracks
    bad_tracks = first_source_per_track[
        (first_source_per_track['SOURCE'] == 'Dead') &
        (first_source_per_track['FRAME'] > 0)
    ]['TRACK_ID'].tolist()

    return bad_tracks

In [6]:
# Find correct path to save spots.csv with re-mapped labels
def find_output_folder(path, keyword):
    current = path
    visited = set()
    
    while True:
        for root, dirs, _ in os.walk(current):
            for d in dirs:
                if keyword in d and os.path.join(root, d) not in visited:
                    return os.path.join(root, d)
            visited.update(os.path.join(root, d) for d in dirs)
            break  # only recurse 1 level per upward step

        parent = os.path.dirname(current)
        if parent == current:
            raise ValueError(f"No folder containing keyword '{keyword}' found while walking up from: {path}")
        current = parent

In [7]:
# Batch concatenate
master_folder = '/home/vil945/live_cell_imaging/2025-06-18_live_cell_imaging'

# Recursively go through subfolders within master_folder
for root, dirs, files in os.walk(master_folder):
    # Labeled images to be concatenated should be saved in folders that contain "filtered_segmentation" in the name
    if "filtered_segmentation" in root:
        path = Path(root)
        tiff_files = list(path.glob("*.tif"))
    
# Match pairs of iRFP670 and PI segmentations by matching index i within their file names "xy[i]_iRFP670", "xy[i]_PI"

# Dictionaries: key = index i, value = file path
iRFP_files = {}
PI_files = {}

for f in tiff_files:
    iRFP_match = re.search(r'xy(\d+)_iRFP670', f.name)
    PI_match = re.search(r'xy(\d+)_PI', f.name)
    if iRFP_match:
        idx = iRFP_match.group(1)
        iRFP_files[idx] = f
    if PI_match:
        idx = PI_match.group(1)
        PI_files[idx] = f

matched_indices = sorted(set(iRFP_files.keys()) & set(PI_files.keys()))

for i in matched_indices:
    iRFP_file = iRFP_files[i]
    PI_file = PI_files[i]
    
    # Call concatenate label function
    combined_lbl, lookup_entries = concat_labels_generate_lookup(iRFP_file, PI_file)
    
    # Save results
    save_dir = path / "concatenated_segmentation"
    save_dir.mkdir(exist_ok=True)

    concatenated_name = f"xy{i}_target_concatenated.tif"
    lookup_name = f"xy{i}_lookup.csv"

    tifffile.imwrite(save_dir / concatenated_name, combined_lbl.astype(np.uint16))
    lookup_df = pd.DataFrame(lookup_entries)
    lookup_df.to_csv(save_dir / lookup_name, index=False)

In [11]:
# Batch relabel
# Before running this cell, run TrackMate on the concatenated label image, save the spots and tracks csv files in the "concatenated_segmentation" folder
master_folder = '/home/vil945/live_cell_imaging/2025-06-18_live_cell_imaging'

# DO NOT change anything beyond this line
# Recursively go through subfolders within master_folder
for root, dirs, files in os.walk(master_folder):
    if "concatenated_segmentation" in root:
        path = Path(root)
        
        # Navigate back up the hierarchy to find the folder to save the outputs
        anchor_folder = find_output_folder(path, 'tracking')
        
        output_folder = os.path.join(anchor_folder, 'combined_spots_relabeled')
        os.makedirs(output_folder, exist_ok=True)

        csv_files = list(path.glob("*.csv"))
        tiff_files = list(path.glob("*.tif"))
    
# Match tiplets of (1)spots csv, (2)lookup csv, (3)labels tiff by matching index i within their file names "xy[i]_combined_spots", "xy[i]_lookup", "xy[i]_target_concatenated"

# Dictionaries: key = index i, value = file path
spots_files = {}
lookup_files = {}
concat_files = {}

for f in csv_files:
    spots_match = re.search(r'xy(\d+)_target_combined_spots', f.name)
    lookup_match = re.search(r'xy(\d+)_lookup', f.name)
    if spots_match:
        idx = spots_match.group(1)
        spots_files[idx] = f
    if lookup_match:
        idx = lookup_match.group(1)
        lookup_files[idx] = f
for f in tiff_files:
    concat_match = re.search(r'xy(\d+)_target_concatenated', f.name)
    if concat_match:
        idx = concat_match.group(1)
        concat_files[idx] = f

matched_indices = sorted(set(spots_files.keys()) & set(lookup_files.keys()) & set(concat_files.keys()))

for i in matched_indices:
    spots_file = spots_files[i]
    lookup_file = lookup_files[i]
    concat_file = concat_files[i]
    
    # Call remap label function
    spots_relabeled = spots_relabel(spots_file, concat_file, lookup_file)

    spots_relabeled_name = f"xy{i}_target_combined_spots_relabeled.csv"

    spots_relabeled.to_csv(os.path.join(output_folder, spots_relabeled_name), index=False)

In [13]:
# Batch quality check and exlusion of biologically implausible tracks
master_folder = '/home/vil945/live_cell_imaging/2025-06-18_live_cell_imaging'

# DO NOT change anything beyond this line
# Recursively go through subfolders within master_folder
for root, dirs, files in os.walk(master_folder):
    if "combined_spots_relabeled" in root:
        path = Path(root)
        spots_files = list(path.glob("*.csv"))
        
        for spots_file in spots_files:
            spots_df = pd.read_csv(spots_file)
            bad_tracks_transition = tracking_quality_check_transition(spots_df) # Call quality check function to check for implausible Dead â†’ Live transitions
            bad_tracks_dead = tracking_quality_check_dead(spots_df) # Call quality check function to check for all dead tracks
            error_rate_transition = len(bad_tracks_transition) / spots_df["TRACK_ID"].nunique() * 100
            error_rate_dead = len(bad_tracks_dead) / spots_df["TRACK_ID"].nunique() * 100

            if error_rate_transition <= 50 and error_rate_dead <= 80:
                print(f"Error rate: {error_rate_transition:.2f}% (bad transitions) {error_rate_dead:.2f}% (all dead) passed, will exclude errornous tracks and proceed")

                clean_spots_df = spots_df[
                    ~spots_df["TRACK_ID"].isin(bad_tracks_transition) & 
                    ~spots_df["TRACK_ID"].isin(bad_tracks_dead)
                ].copy()
                clean_spots_df.reset_index(drop=True, inplace=True)
                clean_spots_df.to_csv(spots_file, index=False) # Overwrites the original spots csv file

            else:
                print(f"Error rate: {error_rate_transition:.2f}% (bad transitions) {error_rate_dead:.2f}% (all dead) failed, please check segmentation and/or tracking results and retry") # Flags high error rate and proceed
                continue
                

Error rate: 38.22% (bad transitions) 7.96% (all dead) passed, will exclude errornous tracks and proceed
Error rate: 15.38% (bad transitions) 8.33% (all dead) passed, will exclude errornous tracks and proceed
Error rate: 19.46% (bad transitions) 11.07% (all dead) passed, will exclude errornous tracks and proceed
Error rate: 14.80% (bad transitions) 22.38% (all dead) passed, will exclude errornous tracks and proceed
