In [1]:
# Imports
import glob
import os
from functools import reduce

import tifffile as tf
import pandas as pd
import numpy as np

import scipy.ndimage as ndi

import line_utils
import image_utils

In [None]:
targets = {}  # Start with an empty dictionary DO NOT DELETE

# Describe all of our target proteins here
# Any protein that does not have a specific workbook and image directory associated
# will be considered a general marker, available across all workbooks
targets["MTs"] = {"alias": ["aTub", "a-tub", "atub", "tub", "tub-m", "tub-rt"]}

targets["septin"] = {"alias": ["GFP"]}

targets["DAPI"] = {}

targets["MKLP1"] = {
    "workbook": r"H:\01_Macros_Analysis\20241125_MKLP1_along_2024_only.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 4,
    "image_directory": r"H:\MKLP1",
}

targets["RacGAP1"] = {
    "workbook": r"H:\01_Macros_Analysis\RacGAP1_along.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 16,
    "image_directory": r"H:\RacGAP1",
}

targets["anillin"] = {
    "workbook": r"H:\01_Macros_Analysis\20241125_anillin_along_2024_only.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 2,
    "image_directory": r"H:\Anillin",
}

targets["myoIIA"] = {
    "workbook": r"H:\01_Macros_Analysis\20241111_myosinIIA_along.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 0,
    "image_directory": r"H:\Myosin IIA",
}

targets["myoIIB"] = {
    "workbook": r"H:\01_Macros_Analysis\20241107_myoIIB_along.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 1,
    "image_directory":  r"H:\Myosin IIB",
    "alias": ["mypoIIB", "myosinIIB"]
}

targets["Cit-K"] = {
    "workbook": r"H:\01_Macros_Analysis\20241017_CitK_along.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 4,
    "image_directory":  r"H:\Citron Kinase",
}

targets["CellMask"] = {
    "workbook": r"H:\01_Macros_Analysis\20241125_CellMasko_2024_only.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 1,
    "image_directory":  r"H:\CellMask Membrane",
}

targets["PRC1"] = {
   "workbook": r"H:\01_Macros_Analysis\PRC1_along.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 1,
    "image_directory":  r"H:\PRC1",
}

targets["actin"] = {
    "workbook": r"H:\01_Macros_Analysis\20241125_Actin_profile.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 1,
    "image_directory":  r"H:\Actin",
}

targets ["Septin7"] = {
    "workbook": r"H:\01_Macros_Analysis\20241125_Septin7_profileline.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 0,
    "image_directory": r"H:\Septin7",
}

targets ["Septin11"] = {
    "workbook": r"H:\01_Macros_Analysis\20241125_Septin11_profileline.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 0,
    "image_directory": r"H:\Septin11",
}

targets ["Septin9"] = {
    "workbook": r"H:\01_Macros_Analysis\20241125_Septin9_profileline.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 0,
    "image_directory": r"H:\Septin9 Novus Bio",
}

targets ["BORG4"] = {
    "workbook": r"H:\01_Macros_Analysis\20241125_BORG4_profileline.xlsx",
    "workbook_sheet_name": "Sheet1",
    "workbook_header_row": 2,
    "image_directory": r"H:\BORG4",
}

targets ["IST1"] = {
    "workbook": r"H:\01_Macros_Analysis\20241126_IST1.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 1,
    "image_directory": r"H:\IST1",
}

targets ["CHMP4B"] = {
    "workbook": r"H:\01_Macros_Analysis\20241127_CHMP4B.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 0,
    "image_directory": r"H:\CHMP4B",
}

targets ["ALIXrb"] = {
    "workbook": r"H:\01_Macros_Analysis\20241128_ALIXrb.xlsx",
   "workbook_sheet_name": "ToC",
    "workbook_header_row": 0,
    "image_directory": r"H:\ALIX rb",
    "alias": ["ALIX"]
}

targets ["ALIXm"] = {
    "workbook": r"H:\01_Macros_Analysis\20241128_ALIXm.xlsx",
    "workbook_sheet_name": "Tabelle1",
    "workbook_header_row": 0,
    "image_directory": r"H:\ALIX m", 
}

targets ["Tsg101-ab83m"] = {
    "workbook": r"H:\01_Macros_Analysis\20241128_Tsg101_ab83.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 1,
    "image_directory": r"H:\TSG101 ab83",
 
}

targets ["Tsg101"] = {
    "workbook": r"H:\01_Macros_Analysis\20241129_Tsg101.xlsx",
    "workbook_sheet_name": "ToC",
    "workbook_header_row": 0,
    "image_directory": r"H:\Tsg101",
    
}

# Order of stages
time_key = "Stage"
# time_order = ["CF", "RC", "CS", "RS", "SM", "BA", "A"]

time_order = ["CF", "RC", "CS", "RS", "SM", "SM1", "SM2", "SM3", "BA", "BA1", "BA2", "BA3", "BA4", "A"]

# Don't fit the septin ring locations at these time points
# Added CF here because I expect no septin peaks at the furrow
time_do_not_fit = ["CF", "BA", "BA1", "BA2", "BA3", "BA4", "A"]

# Channels per image (TODO: Auto detect)
n_ch = 4

# wavelengths to be found in the file names
# Sublists are grouped. First element of the sublist is a group name.
# NOTE: First element must be a number!
wvls = [488,[568, "orange"],[646,647,657]]

# desired channel order, specified by keys in targets
# must include MTs, septin and DAPI
desired_channel_order = ["MTs", "septin", "DAPI", "CellMask", "MKLP1", "RacGAP1", "PRC1", "Cit-K", "anillin", "myoIIA", "myoIIB", "actin", "Septin7", "Septin11", "Septin9", "BORG4", "Tsg101", "Tsg101-ab83m", "ALIXrb", "ALIXm", "IST1", "CHMP4B"]
# desired_channel_order = ["MTs", "septin", "DAPI", "CellMask", "MKLP1", "RacGAP1", "PRC1", "Cit-K", "anillin", "myoIIA", "myoIIB", "actin", "Septin7", "Septin11", "Septin9", "BORG4", "Tsg101", "ALIXrb", "ALIXm", "IST1", "CHMP4B"]

# desired_channel_order = ["MTs", "septin", "DAPI", "MKLP1", "RacGAP1", "anillin", "myoIIA", "myoIIB", "Cit-K", "CellMask", "PRC1", "actin"]
# desired_channel_order = ["MTs", "septin", "DAPI", "MKLP1", "RacGAP1", "anillin", "myoIIA", "Cit-K", "CellMask", "PRC1", "actin"]
# Length of cropped pseudotime region (should be roughly the line length)
# length = 500
length = 1000

# Set this to true if we want to return a z-stack
z_stack = False
# In the worst case (one tubule is sandwiched at the top of the stack and the other
# at the bottom), this must be 2*<max stack length>-1 
num_planes = 40

In [3]:
# Before we do anything, let's make sure all of our targets exist
for key in desired_channel_order:
    try:
        targets[key] 
    except KeyError:
        raise KeyError(f"Element {key} does not exist in targets dictionary!")

In [4]:
def target_names(targets, key):
    """ 
    Construct a list of the key name plus any aliases. Important for searching
    through file names.
    """
    try:
        target = targets[key]
    except KeyError:
        return []
    names = [key]
    try:
        names.extend(target["alias"])
    except KeyError:
        pass
    return names

In [5]:
# Load the table of contents
dfs = []
for k, v in targets.items():
    if k not in desired_channel_order:
        continue
    try:
        df = pd.read_excel(v['workbook'], sheet_name=v['workbook_sheet_name'], header=v['workbook_header_row'])
        df["target"] = k
        dfs.append(df)
    except KeyError:
        pass

metrics = reduce(lambda  left,right: pd.merge(left, right, how='outer'), dfs)

# Get rid of rows with no line specified
metrics = metrics[~metrics['Y'].isna()]

# merge Length into length
try:
    mask = metrics['length'].isna()
except KeyError:
    # All our length columns are capitalized, which we do not expect
    metrics.rename(columns={'Length': 'length'}, inplace=True)
    mask = metrics['length'].isna()
try:
    metrics.loc[mask, 'length'] = metrics.loc[mask, 'Length']
except KeyError:
    # We didn't run into any cases with Length
    pass

# Drop unused columns
metrics = metrics.dropna(axis=1)

In [6]:
# Now let's find the original images...
for group in metrics.groupby("target"):
    name, entries = group
    image_files = glob.glob(targets[name]["image_directory"]+"/*.nd")
    # WARNING if no files are found in the directory
    len(image_files) == 0 and print(f"WARNING!!!! No image files found for target {name}.")
    for i, ml in entries.iterrows():
        file_stub = os.path.splitext(ml["Label"])[0].split('MAX_')[::-1][0]
        for fn in image_files:
            if file_stub in fn:
                metrics.loc[i, "filename"] = fn
                break

metrics = metrics[~metrics['filename'].isna()]

In [7]:
groups = metrics.groupby(time_key)

plot_stack = None
n_groups = len(groups)
l2 = length // 2
num_planes = num_planes if z_stack else 1
group_img = np.zeros((n_groups, len(desired_channel_order), num_planes, length, length)).squeeze()

# Establish columns for septin peaks (X12, X22) and distance between them (dX2)
metrics['dX2'], metrics['X12'], metrics['X22'] = np.nan, np.nan, np.nan

for group, tup in enumerate(groups):
    name, entries = tup
    n_group = len(entries)
    print(f"{name}: {n_group} averaged")
    im_proj = {}

    # In a first pass, fit the septin ring distances for registration
    for i, ml in entries.iterrows():
        if ml[time_key] not in time_do_not_fit:
            # If we are in a class where it makes sense...
            print(f"  Septin ring fit for {os.path.basename(ml['filename'])}")

            # Get the image associated with this row and load it with the channels sorted from high to low
            im = image_utils.NDImage(ml["filename"], load_sorted=True)

            # get x, y, angle for this row
            x, y, angle = ml[["X", "Y", "Angle"]]

            # find wavelengths in file name and sort from high to low
            wvls_dict, binned_wvls = image_utils.extract_channel_targets_from_filename(ml["filename"], wvls=wvls)

            # Establish target names in this data set and sort from high to low to match image load
            channel_targets = [wvls_dict[str(wvl)] for wvl in sorted(binned_wvls)[::-1]]

            # the last channel is always DAPI, if unknown
            if len(channel_targets) < n_ch:
                channel_targets.append("DAPI") 

            # ... get the septin peaks
            mt_ch = [i for i, t in enumerate(channel_targets) if any([t == n for n in target_names(targets, "MTs")])][0]
            septin_ch = [i for i, t in enumerate(channel_targets) if any([t == n for n in target_names(targets, "septin")])][0]
            p0, p1, dX2 = line_utils.find_septin_peaks(im[:].mean(1).squeeze(), x, y, angle, length,
                                                        mt_ch=mt_ch, 
                                                        septin_ch=septin_ch)

            metrics.loc[i,['X12','X22','dX2']] = [p0, p1, dX2]


    # Now compute the average distance
    mean_dX2 = entries['dX2'].mean()

    # In our second pass, average these images
    for t, tup2 in enumerate(entries.groupby("target")):
        name2, entries2 = tup2
        n_target = len(entries2)
        print(f"  {name2}: {n_target} averaged")
        for i, ml in entries2.iterrows():
            print(f"  Analyzing {os.path.basename(ml['filename'])}")

            # Get the image associated with this row
            im = image_utils.NDImage(ml["filename"], load_sorted=True)

            # find wavelengths in file name and sort from high to low
            wvls_dict, binned_wvls = image_utils.extract_channel_targets_from_filename(ml["filename"], wvls=wvls)

            # Establish target names in this data set and sort from high to low to match image load
            channel_targets = [wvls_dict[str(wvl)] for wvl in sorted(binned_wvls)[::-1]]

            # the last channel is always DAPI, if unknown
            if len(channel_targets) < n_ch:
                channel_targets.append("DAPI") 

            # Now find the resorting of the channels according to their target position
            channel_order = []
            group_channel_order = []
            mt_channel = 0
            for j, ch in enumerate(desired_channel_order):
                for opt in target_names(targets, ch):
                    try:
                        channel_order.append(channel_targets.index(opt))
                        group_channel_order.append(j)
                        if ch == "MTs":
                            mt_channel = channel_targets.index(opt)
                    except ValueError:
                        pass
            assert len(channel_order) == n_ch #len(desired_channel_order)
            print(f"  channel_targets: {channel_targets} channel_order: {channel_order} group_channel_order: {group_channel_order}")

            if z_stack:
                if ml[time_key] not in time_do_not_fit:
                    # Grab the z-coordinate of the central bit of the tubule, cast to integer
                    z_coord = int(round(line_utils.find_central_pos(im[:].max(2).squeeze(), ml["X"], ch=mt_channel)))
                else:
                    z_coord = im[:].shape[-3] // 2
                metrics.loc[i, "z_coord"] = z_coord
                print(f"  im.shape: {im.shape} projection shape: {im[:].max(2).squeeze().shape} z_coord: {z_coord}")

                im = im[:]/im[:].sum(-1).sum(-1).sum(-1)[:, None, None, None]
            else:
                im = im[:].mean(1).squeeze()
                
                # Normalize
                im = im/im.sum(-1).sum(-1)[:,None, None]
            # im_min = im.min(-1).min(-1)
            # im = (im - im_min[:,None,None])/((im.max(-1).max(-1)-im_min)[:,None,None])

            # get x, y, angle for this row
            x, y, angle = ml[["X", "Y", "Angle"]]

            # Rotate the image  # CYX
            im_rot = image_utils.pad_rot_and_trans_im(im, angle, x, y)

            # Crop the image
            xc, yc = im_rot.shape[-1]//2, im_rot.shape[-2]//2
            im_crop = im_rot[...,(yc-length):(yc+length),(xc-length):(xc+length)]

            # rescale the image
            # if np.isnan(ml["dX (pxl)"]):
            if np.isnan(ml["dX2"]):
                im_zoom = im_crop
            else:
                # mag = ml["dX (pxl)"]/mean_dX
                mag = ml["dX2"]/mean_dX2
                if z_stack:
                    im_zoom = ndi.zoom(im_crop, (1,1,1,mag))
                else:
                    im_zoom = ndi.zoom(im_crop, (1,1,mag))

            # Crop the image again
            xc, yc = im_zoom.shape[-1]//2, im_crop.shape[-2]//2
            im_crop2 = im_zoom[...,(yc-l2):(yc+l2),(xc-l2):(xc+l2)]

            # Add the image with a weighting 1/length of the group 
            if z_stack:
                z_length = im_crop2.shape[-3]
                z_length2, num_planes2 = z_length // 2, num_planes // 2
                zc = num_planes2 + z_length2 - z_coord
                zl, zu = (zc-z_length2), (zc+(z_length-z_length2))

                print("what", im_crop2[channel_order].shape)
                print(zl, zu)

                group_img[group,group_channel_order,zl:zu,...] += (im_crop2[channel_order]/np.array([n_group, n_group, n_group, n_target])[:,None,None,None])
            else:
                group_img[group,group_channel_order,...] += (im_crop2[channel_order]/np.array([n_group, n_group, n_group, n_target])[:,None,None])
        

A: 22 averaged
  Cit-K: 11 averaged
  Analyzing 20240604_NRK49FS2GFP_UExM_GFP-488_Cit-K-568_a-tub-647_1.5h_02.nd
  loading  ['CSU635', 'CSU561', 'CSU491', 'CSU405 QUAD'] [0 1 2 3]
  channel_targets: ['a-tub', 'Cit-K', 'GFP', 'DAPI'] channel_order: [0, 2, 3, 1] group_channel_order: [0, 1, 2, 3]
  Analyzing 20240604_NRK49FS2GFP_UExM_GFP-488_Cit-K-568_a-tub-647_1.5h_03.nd
  loading  ['CSU635', 'CSU561', 'CSU491', 'CSU405 QUAD'] [0 1 2 3]
  channel_targets: ['a-tub', 'Cit-K', 'GFP', 'DAPI'] channel_order: [0, 2, 3, 1] group_channel_order: [0, 1, 2, 3]
  Analyzing 20240604_NRK49FS2GFP_UExM_GFP-488_Cit-K-568_a-tub-647_1.5h_05.nd
  loading  ['CSU635', 'CSU561', 'CSU491', 'CSU405 QUAD'] [0 1 2 3]
  channel_targets: ['a-tub', 'Cit-K', 'GFP', 'DAPI'] channel_order: [0, 2, 3, 1] group_channel_order: [0, 1, 2, 3]
  Analyzing 20240604_NRK49FS2GFP_UExM_GFP-488_Cit-K-568_a-tub-647_1.5h_08.nd
  loading  ['CSU635', 'CSU561', 'CSU491', 'CSU405 QUAD'] [0 1 2 3]
  channel_targets: ['a-tub', 'Cit-K', 'GFP'

In [8]:
group_order = list(groups[time_key].unique().keys())
group_img_sorted = [group_order.index(g) for g in time_order if g in group_order]
print(group_img_sorted)

[3, 2, 4, 5, 1, 0]


In [9]:
#stack_fn = f'pseudotime_images_{"_".join([x[0:2] for x in desired_channel_order])}.ome.tif'
# stack_fn = f'pseudotime_images_{"_".join([''.join([x[0:2],x[-1]) for x in desired_channel_order])}.ome.tif'
ext = "_zstack.ome.tif" if z_stack else ".ome.tif"
stack_fn = f'pseudotime_images_{"_".join(["".join([x[0:2],x[-1]]) for x in desired_channel_order])}{ext}'
if z_stack:
    tf.imwrite(stack_fn, group_img[group_img_sorted,...], metadata={'axes': 'TCZYX'}, dtype=group_img.dtype)
else:
    tf.imwrite(stack_fn, group_img[group_img_sorted,...], metadata={'axes': 'TCYX'}, dtype=group_img.dtype)