In [1]:
# Imports
import glob
import os
from functools import reduce

import tifffile as tf
import pandas as pd
import numpy as np

import scipy.ndimage as ndi

import line_utils
import image_utils

In [2]:
targets = {}  # Start with an empty dictionary DO NOT DELETE

# Describe all of our target proteins here
# Any protein that does not have a specific workbook and image directory associated
# will be considered a general marker, available across all workbooks
targets["MTs"] = {"alias": ["aTub", "a-tub", "atub", "tub", "tub-m", "tub-rt"]}

targets["septin"] = {"alias": ["GFP"]}

targets["DAPI"] = {}

targets["MKLP1"] = {
    "workbook": "/Volumes/Ries_Ewers/01_Macros_Analysis/MKLP1_along.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 4,
    "image_directory": "/Volumes/Ries_Ewers/MKLP1",
}

targets["RacGAP1"] = {
    "workbook": "/Volumes/Ries_Ewers/01_Macros_Analysis/RacGAP1_along.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 16,
    "image_directory": "/Volumes/Ries_Ewers/RacGAP1",
}

targets["anillin"] = {
    "workbook": "/Volumes/Ries_Ewers/01_Macros_Analysis/20241030_anillin_along.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 2,
    "image_directory": "/Volumes/Ries_Ewers/Anillin",
}

targets["myoIIA"] = {
    "workbook": "/Volumes/Ries_Ewers/01_Macros_Analysis/20241111_myosinIIA_along.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 0,
    "image_directory": "/Volumes/Ries_Ewers/Myosin IIA",
}

targets["myoIIB"] = {
    "workbook": "/Volumes/Ries_Ewers/01_Macros_Analysis/20241107_myoIIB_along.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 1,
    "image_directory": "/Volumes/Ries_Ewers/Myosin IIB",
    "alias": ["mypoIIB", "myosinIIB"]
}

targets["Cit-K"] = {
    "workbook": "/Volumes/Ries_Ewers/01_Macros_Analysis/20241017_CitK_along.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 4,
    "image_directory": "/Volumes/Ries_Ewers/Citron Kinase",
}

targets["CellMask"] = {
    "workbook": "/Volumes/Ries_Ewers/01_Macros_Analysis/20241122_CellMasko_lineprofile.xlsx",
    "workbook_sheet_name": "Tabelle1",
    "workbook_header_row": 1,
    "image_directory": "/Volumes/Ries_Ewers/CellMask Membrane",
}

targets["PRC1"] = {
    "workbook": "/Volumes/Ries_Ewers/01_Macros_Analysis/PRC1_along.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 1,
    "image_directory": "/Volumes/Ries_Ewers/PRC1",
}

# Order of stages
time_key = "Stage"
time_order = ["CF", "RC", "CS", "RS", "SM", "BA", "A"]

# Don't fit the septin ring locations at these time points
time_do_not_fit = ["BA", "A"]

# Channels per image (TODO: Auto detect)
n_ch = 4

# wavelengths to be found in the file names
# Sublists are grouped. First element of the sublist is a group name.
# NOTE: First element must be a number!
wvls = [488,[568, "orange"],[646,647]]

# desired channel order, specified by keys in targets
desired_channel_order = ["MTs", "septin", "DAPI", "MKLP1", "RacGAP1", "anillin", "myoIIA", "myoIIB", "Cit-K", "CellMask", "PRC1"]
# desired_channel_order = ["MTs", "septin", "DAPI", "MKLP1", "CellMask"]

# Length of cropped pseudotime region (should be roughly the line length)
length = 500


In [3]:
# Before we do anything, let's make sure all of our targets exist
for key in desired_channel_order:
    try:
        targets[key] 
    except KeyError:
        raise KeyError(f"Element {key} does not exist in targets dictionary!")

In [4]:
def target_names(targets, key):
    """ 
    Construct a list of the key name plus any aliases. Important for searching
    through file names.
    """
    try:
        target = targets[key]
    except KeyError:
        return []
    names = [key]
    try:
        names.extend(target["alias"])
    except KeyError:
        pass
    return names

In [5]:
# Load the table of contents
dfs = []
for k, v in targets.items():
    if k not in desired_channel_order:
        continue
    try:
        df = pd.read_excel(v['workbook'], sheet_name=v['workbook_sheet_name'], header=v['workbook_header_row'])
        df["target"] = k
        dfs.append(df)
    except KeyError:
        pass

metrics = reduce(lambda  left,right: pd.merge(left, right, how='outer'), dfs)

# Get rid of rows with no line specified
metrics = metrics[~metrics['Y'].isna()]

# merge Length into length
try:
    mask = metrics['length'].isna()
except KeyError:
    # All our length columns are capitalized, which we do not expect
    metrics.rename(columns={'Length': 'length'}, inplace=True)
    mask = metrics['length'].isna()
try:
    metrics.loc[mask, 'length'] = metrics.loc[mask, 'Length']
except KeyError:
    # We didn't run into any cases with Length
    pass

# Drop unused columns
metrics = metrics.dropna(axis=1)

In [6]:
# Now let's find the original images...
for group in metrics.groupby("target"):
    name, entries = group
    image_files = glob.glob(targets[name]["image_directory"]+"/*.nd")
    for i, ml in entries.iterrows():
        file_stub = os.path.splitext(ml["Label"])[0]
        for fn in image_files:
            if file_stub in fn:
                metrics.loc[i, "filename"] = fn
                break

metrics = metrics[~metrics['filename'].isna()]

In [7]:
groups = metrics.groupby(time_key)

plot_stack = None
n_groups = len(groups)
l2 = length // 2
group_img = np.zeros((n_groups, len(desired_channel_order), length, length))

# Establish columns for septin peaks (X12, X22) and distance between them (dX2)
metrics['dX2'], metrics['X12'], metrics['X22'] = np.nan, np.nan, np.nan

for group, tup in enumerate(groups):
    name, entries = tup
    n_group = len(entries)
    print(f"{name}: {n_group} averaged")
    im_proj = {}

    # In a first pass, fit the septin ring distances for registration
    for i, ml in entries.iterrows():
        if ml[time_key] not in time_do_not_fit:
            # If we are in a class where it makes sense...

            # Get the image associated with this row and load it with the channels sorted from high to low
            im = image_utils.NDImage(ml["filename"], load_sorted=True)

            # get x, y, angle for this row
            x, y, angle = ml[["X", "Y", "Angle"]]

            # find wavelengths in file name and sort from high to low
            wvls_dict, binned_wvls = image_utils.extract_channel_targets_from_filename(ml["filename"], wvls=wvls)

            # Establish target names in this data set and sort from high to low to match image load
            channel_targets = [wvls_dict[str(wvl)] for wvl in sorted(binned_wvls)[::-1]]

            # the last channel is always DAPI, if unknown
            if len(channel_targets) < n_ch:
                channel_targets.append("DAPI") 

            # ... get the septin peaks
            mt_ch = [i for i, t in enumerate(channel_targets) if any([t == n for n in target_names(targets, "MTs")])][0]
            septin_ch = [i for i, t in enumerate(channel_targets) if any([t == n for n in target_names(targets, "septin")])][0]
            p0, p1, dX2 = line_utils.find_septin_peaks(im[:].mean(1).squeeze(), x, y, angle, length,
                                                        mt_ch=mt_ch, 
                                                        septin_ch=septin_ch)

            metrics.loc[i,['X12','X22','dX2']] = [p0, p1, dX2]


    # Now compute the average distance
    mean_dX2 = entries['dX2'].mean()

    # In our second pass, average these images
    for t, tup2 in enumerate(entries.groupby("target")):
        name2, entries2 = tup2
        n_target = len(entries2)
        print(f"  {name2}: {n_target} averaged")
        for i, ml in entries2.iterrows():
            # Get the image associated with this row
            im = image_utils.NDImage(ml["filename"], load_sorted=True)

            # find wavelengths in file name and sort from high to low
            wvls_dict, binned_wvls = image_utils.extract_channel_targets_from_filename(ml["filename"], wvls=wvls)

            # Establish target names in this data set and sort from high to low to match image load
            channel_targets = [wvls_dict[str(wvl)] for wvl in sorted(binned_wvls)[::-1]]

            # the last channel is always DAPI, if unknown
            if len(channel_targets) < n_ch:
                channel_targets.append("DAPI") 

            # Now find the resorting of the channels according to their target position
            channel_order = []
            group_channel_order = []
            for j, ch in enumerate(desired_channel_order):
                for opt in target_names(targets, ch):
                    try:
                        channel_order.append(channel_targets.index(opt))
                        group_channel_order.append(j)
                    except ValueError:
                        pass
            assert len(channel_order) == n_ch #len(desired_channel_order)
            print(channel_targets, channel_order, group_channel_order)

            im = im[:].mean(1).squeeze()
            
            # Normalize
            im = im/im.sum(-1).sum(-1)[:,None, None]
            # im_min = im.min(-1).min(-1)
            # im = (im - im_min[:,None,None])/((im.max(-1).max(-1)-im_min)[:,None,None])

            # get x, y, angle for this row
            x, y, angle = ml[["X", "Y", "Angle"]]

            # Rotate the image  # CYX
            im_rot = image_utils.pad_rot_and_trans_im(im, angle, x, y)

            # Crop the image
            xc, yc = im_rot.shape[2]//2, im_rot.shape[1]//2
            im_crop = im_rot[:,(yc-length):(yc+length),(xc-length):(xc+length)]

            # rescale the image
            # if np.isnan(ml["dX (pxl)"]):
            if np.isnan(ml["dX2"]):
                im_zoom = im_crop
            else:
                # mag = ml["dX (pxl)"]/mean_dX
                mag = ml["dX2"]/mean_dX2
                im_zoom = ndi.zoom(im_crop, (1,1,mag))

            # Crop the image again
            xc, yc = im_zoom.shape[2]//2, im_crop.shape[1]//2
            im_crop2 = im_zoom[:,(yc-l2):(yc+l2),(xc-l2):(xc+l2)]

            # Add the image with a weighting 1/length of the group 
            group_img[group,group_channel_order,...] += (im_crop2[channel_order]/np.array([n_group, n_group, n_group, n_target])[:,None,None])
        

A: 87 averaged
  CellMask: 21 averaged
['tub-m', 'CellMask', 'GFP', 'DAPI'] [0, 2, 3, 1] [0, 1, 2, 9]
['aTub', 'CellMask', 'GFP', 'DAPI'] [0, 2, 3, 1] [0, 1, 2, 9]
['aTub', 'CellMask', 'GFP', 'DAPI'] [0, 2, 3, 1] [0, 1, 2, 9]
['aTub', 'CellMask', 'GFP', 'DAPI'] [0, 2, 3, 1] [0, 1, 2, 9]
['aTub', 'CellMask', 'GFP', 'DAPI'] [0, 2, 3, 1] [0, 1, 2, 9]
['aTub', 'CellMask', 'GFP', 'DAPI'] [0, 2, 3, 1] [0, 1, 2, 9]
['aTub', 'CellMask', 'GFP', 'DAPI'] [0, 2, 3, 1] [0, 1, 2, 9]
['aTub', 'CellMask', 'GFP', 'DAPI'] [0, 2, 3, 1] [0, 1, 2, 9]
['aTub', 'CellMask', 'GFP', 'DAPI'] [0, 2, 3, 1] [0, 1, 2, 9]
['aTub', 'CellMask', 'GFP', 'DAPI'] [0, 2, 3, 1] [0, 1, 2, 9]
['aTub', 'CellMask', 'GFP', 'DAPI'] [0, 2, 3, 1] [0, 1, 2, 9]
['aTub', 'CellMask', 'GFP', 'DAPI'] [0, 2, 3, 1] [0, 1, 2, 9]
['aTub', 'CellMask', 'GFP', 'DAPI'] [0, 2, 3, 1] [0, 1, 2, 9]
['aTub', 'CellMask', 'GFP', 'DAPI'] [0, 2, 3, 1] [0, 1, 2, 9]
['aTub', 'CellMask', 'GFP', 'DAPI'] [0, 2, 3, 1] [0, 1, 2, 9]
['aTub', 'CellMask', 'GFP', 'D

OSError: [Errno 6] Device not configured

In [None]:
group_order = list(groups[time_key].unique().keys())
group_img_sorted = [group_order.index(g) for g in time_order if g in group_order]
print(group_img_sorted)

[2, 4, 3, 5, 6, 1, 0]


In [None]:
stack_fn = f'pseudotime_images_{"_".join([x[0:2] for x in desired_channel_order])}.ome.tif'
tf.imwrite(stack_fn, group_img[group_img_sorted,...], metadata={'axes': 'TCYX'}, dtype=group_img.dtype)