In [None]:
import glob
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from dredFISH.Utils import basicu
from dredFISH.Utils import miscu
from dredFISH.Utils import powerplots
from dredFISH.Visualization import compile_tex
from matplotlib.collections import LineCollection

import importlib
importlib.reload(powerplots)
importlib.reload(miscu)
importlib.reload(compile_tex)

In [None]:
def plot_basis_box(ftrs_mat, output=None):
    """
    """
    fig, axs = plt.subplots(2, 1, figsize=(10,3*2), sharex=True)
    for ax in axs:
        sns.boxplot(data=ftrs_mat, ax=ax)
        ax.set_ylabel('zscore')
    ax.set_xlabel('basis')
    ax.set_ylim([-3,3])
    if output is not None:
        powerplots.savefig_autodate(fig, output)
    plt.show()

In [None]:
def draw_control_points(points, ax):
    """
    """
    line_segs = [
        [points[i], points[(i+1)%len(points)]]
        for i in range(len(points))
    ]

    pm = np.asarray(points)
    ax.scatter(pm[:,0], pm[:,1], color='r')
    for i, p in enumerate(points):
        ax.text(p[0], p[1], i)
    lc = LineCollection(line_segs, linewidth=1, colors='r')
    ax.add_collection(lc)
    return line_segs

In [None]:
def preview_section(meta, bbox, 
                    nuclei_signal_th=1500, 
                    title='', 
                    meta_output=None, 
                    bbox_output=None,
                    fig_output=None,
                   ):
    """
    """
    df = meta #.join(mtx)
    points = bbox

    # XY
    XY = df[['stage_x', 'stage_y']].values
    x = XY[:,0] 
    y = XY[:,1] 

    # cond = df['cytoplasm_size'] > 10
    cond = (df['nuclei_signal'] > nuclei_signal_th).values
    
    fig, axs = plt.subplots(1,2, figsize=(8*2,8))
    ax = axs[0]
    ax.scatter(x, y, s=1, edgecolor='none', color='black', rasterized=True)
    ax.set_title(title)
    ax.set_aspect('auto')
    
    ax = axs[1]
    # defined bounding box
    draw_control_points(points, ax)
    res = miscu.is_in_polygon(points, XY[cond])
    
    # filtered
    idx_filtered = np.arange(len(XY))[cond][res]
    df_filtered = df.iloc[idx_filtered]
    xmin = df_filtered['stage_x'].min()
    xmax = df_filtered['stage_x'].max()
    ymin = df_filtered['stage_y'].min()
    ymax = df_filtered['stage_y'].max()
    
    colors = np.array(['lightgray', 'black'])
    ax.scatter(x[cond], y[cond], s=1, c=colors[(res).astype(int)], edgecolor='none', rasterized=True)
    ax.set_title(f'Filtered n={len(df_filtered):,}/{len(meta):,} cells')
    ax.set_aspect('equal')
    ax.set_xlim([xmin-0.1*(xmax-xmin), xmax+0.1*(xmax-xmin)])
    ax.set_ylim([ymin-0.1*(ymax-ymin), ymax+0.1*(ymax-ymin)])
    # save figure
    if fig_output is not None:
        powerplots.savefig_autodate(fig, fig_output)
    plt.show()
    
    # save filtered meta
    if meta_output is not None:
        print(meta_output)
        df_filtered.to_csv(meta_output, header=True, index=True)
    
    # save bounding box
    if bbox_output is not None:
        print(bbox_output)
        pd.DataFrame(points).to_csv(bbox_output, header=False, index=False)

# all sections

In [None]:
path_dataset = '/bigstore/GeneralStorage/Data/dredFISH/Dataset4-t1'
path_fig = os.path.join(path_dataset, 'figures_pre')
if not os.path.isdir(path_fig):
    os.mkdir(path_fig)

files_mtx = np.sort(glob.glob('*_matrix.csv', dir_fd=path_dataset))
files_meta = np.sort(glob.glob('*_metadata.csv', dir_fd=path_dataset))
files_mtx.shape, files_meta.shape

In [None]:
sections = {i: f.replace('_matrix.csv', '').replace('DPNMF_3B_4C_5A_6A_9C_10B_2022Aug01_Section_', '2022Aug01_')
            for i, f in enumerate(files_mtx)}
sections

# Prerun all sections
- set init

In [None]:
bboxes = {}
for sctn in sections.keys():
    # sctn = 2
    sctn_name = sections[sctn]
    file_meta = files_meta[sctn]

    meta = pd.read_csv(os.path.join(path_dataset, file_meta), sep=',', index_col=0)
    bbox_output = os.path.join(path_dataset, f'bbox_sect{sctn}_{sctn_name}.csv') 
    print(file_meta)

    # default
    xmin, xmax = np.min(meta['stage_x']), np.max(meta['stage_x'])
    ymin, ymax = np.min(meta['stage_y']), np.max(meta['stage_y'])
    print(meta['nuclei_signal'].max())
    # break

    points = [
        (xmin, ymin),
        (xmax, ymin),
        (xmax, ymax),
        (xmin, ymax),
    ]
    bboxes[sctn] = points

    preview_section(meta, points, 
                    nuclei_signal_th=-1, 
                    title=f"Sec{sctn}_{sctn_name}",
                    # bbox_output=None,
                    bbox_output=bbox_output,
                   )

# Refine bounding boxes
- set init

In [None]:
bboxes_refined = bboxes.copy()

In [None]:
# sctn = 0
# sctn_name = sections[sctn]
# file_meta = files_meta[sctn]

# meta = pd.read_csv(os.path.join(path_dataset, file_meta), sep=',', index_col=0)
# bbox_output = os.path.join(path_dataset, f'bbox_sect{sctn}_{sctn_name}.csv') 
# print(file_meta)

# # refined
# points = [
#     (-6500, 20400), 
#     (-1000, 20600),
#     (0, 26000),
#     (-1000, 29000),
#     (-10000, 29000),
#     (-10000, 24000),
# ]
# bboxes_refined[sctn] = points

# preview_section(meta, points, 
#                 title=f"Sec{sctn}_{sctn_name}",
#                 # bbox_output=None,
#                 bbox_output=bbox_output,
#                )

# Recheck everything 

In [None]:
for sctn in sections.keys():
    sctn_name = sections[sctn]
    file_meta = files_meta[sctn]
    points = bboxes_refined[sctn]
    meta_output = os.path.join(path_dataset, file_meta.replace('metadata', 'metadata_filtered'))
    bbox_output = os.path.join(path_dataset, f'bbox_sect{sctn}_{sctn_name}.csv') 
    fig_output  = os.path.join(path_fig, f'fig_qc_cells_sect{sctn}_{sctn_name}.pdf')

    meta = pd.read_csv(os.path.join(path_dataset, file_meta), sep=',', index_col=0)

    preview_section(meta, points, 
                    title=f"Sec{sctn}_{sctn_name}",
                    nuclei_signal_th=-1, 
                    fig_output=fig_output,
                    meta_output=meta_output,
                    bbox_output=bbox_output,
                   )

In [None]:
importlib.reload(compile_tex)
compile_tex.main(path_dataset,
                 subpth_res='figures_pre',
                 title='dredFISH 23-section clean up', 
                 author='Wollman lab',)