In [None]:
import glob
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from dredFISH.Utils import basicu
from dredFISH.Utils import miscu
from dredFISH.Utils import powerplots
from dredFISH.Visualization import compile_tex
from matplotlib.collections import LineCollection

import importlib
importlib.reload(powerplots)
importlib.reload(miscu)
importlib.reload(compile_tex)

# 12-section brain dataset

In [None]:
path_dataset = '/bigstore/GeneralStorage/Data/dredFISH/Dataset3-t1'
path_fig = os.path.join(path_dataset, 'figures_pre')
if not os.path.isdir(path_fig):
    os.mkdir(path_fig)

files_mtx = np.sort(glob.glob('*_matrix.csv', dir_fd=path_dataset))
files_meta = np.sort(glob.glob('*_metadata.csv', dir_fd=path_dataset))
files_mtx.shape, files_meta.shape

In [None]:
sections = {i: f.replace('_matrix.csv', '').replace('DPNMF_1A_2B_11A_12B_2022Jul28_Section_', '')
            for i, f in enumerate(files_mtx)}
sections

# 1 section -- minimal clean up and show

In [None]:
def plot_basis_box(ftrs_mat, output=None):
    """
    """
    fig, axs = plt.subplots(2, 1, figsize=(10,3*2), sharex=True)
    for ax in axs:
        sns.boxplot(data=ftrs_mat, ax=ax)
        ax.set_ylabel('zscore')
    ax.set_xlabel('basis')
    ax.set_ylim([-3,3])
    if output is not None:
        powerplots.savefig_autodate(fig, output)
    plt.show()

In [None]:
def draw_control_points(points, ax):
    """
    """
    line_segs = [
        [points[i], points[(i+1)%len(points)]]
        for i in range(len(points))
    ]

    pm = np.asarray(points)
    ax.scatter(pm[:,0], pm[:,1], color='r')
    for i, p in enumerate(points):
        ax.text(p[0], p[1], i)
    lc = LineCollection(line_segs, linewidth=1, colors='r')
    ax.add_collection(lc)
    return line_segs

In [None]:
sctn = 0
sctn_name = sections[sctn]
file_mtx = files_mtx[sctn]
file_meta = files_meta[sctn]
print(files_mtx[0], files_meta[0])

meta = pd.read_csv(os.path.join(path_dataset, file_meta), sep=',', index_col=0)
df = meta #.join(mtx)

# XY
XY = df[['stage_x', 'stage_y']].values
x = XY[:,0] 
y = XY[:,1] 

# cond = df['cytoplasm_size'] > 10
cond = df['nuclei_signal'] > 1500
cond

In [None]:
# output = os.path.join(path_fig, f"fig1_xy_sect{sctn}_{sctn_name}.pdf")
output = None
fig, ax = plt.subplots(figsize=(10,10))
ax.scatter(x, y, s=1, edgecolor='none', color='black', rasterized=True)
ax.set_title(sctn_name)
ax.set_aspect('equal')
plt.show()

fig, ax = plt.subplots(figsize=(10,10))
ax.scatter(x[cond], y[cond], s=1, edgecolor='none', color='black', rasterized=True)
ax.set_title(sctn_name)
ax.set_aspect('equal')
plt.show()

In [None]:
# define bounding box
points = [
    (-6500, 20400), 
    (-1000, 20600),
    (0, 26000),
    (-1000, 29000),
    (-10000, 29000),
    (-10000, 24000),
]
res = miscu.is_in_polygon(points, XY[cond])

output = "" #os.path.join(path_fig, f"fig1_xy_sect{sctn}_{sctn_name}.pdf")
fig, ax = plt.subplots(figsize=(10,10))
ax.scatter(x, y, s=1, edgecolor='none', color='gray', rasterized=True)
draw_control_points(points, ax)
ax.set_title(sctn_name)
ax.set_aspect('equal')
plt.show()

In [None]:
df.shape, df[cond].shape, XY.shape, XY[cond][res].shape

In [None]:
# save bounding box
output = os.path.join(path_dataset, f'bbox_{sctn}_{sctn_name}.csv') 
print(output)
pd.DataFrame(points).to_csv(output, header=False, index=False)

# all sections - test

In [None]:
def preview_section(meta, bbox, nuclei_signal_th=1500, title='', meta_output=None, bbox_output=None):
    """
    """
    df = meta #.join(mtx)
    points = bbox

    # XY
    XY = df[['stage_x', 'stage_y']].values
    x = XY[:,0] 
    y = XY[:,1] 

    # cond = df['cytoplasm_size'] > 10
    cond = (df['nuclei_signal'] > nuclei_signal_th).values
    
    fig, axs = plt.subplots(1,2, figsize=(8*2,8))
    ax = axs[0]
    ax.scatter(x, y, s=1, edgecolor='none', color='black', rasterized=True)
    ax.set_title(title)
    ax.set_aspect('auto')
    
    ax = axs[1]
    # defined bounding box
    draw_control_points(points, ax)
    res = miscu.is_in_polygon(points, XY[cond])
    
    # filtered
    idx_filtered = np.arange(len(XY))[cond][res]
    df_filtered = df.iloc[idx_filtered]
    print(df_filtered.shape)
    xmin = df_filtered['stage_x'].min()
    xmax = df_filtered['stage_x'].max()
    ymin = df_filtered['stage_y'].min()
    ymax = df_filtered['stage_y'].max()
    
    colors = np.array(['lightgray', 'black'])
    ax.scatter(x[cond], y[cond], s=1, c=colors[(res).astype(int)], edgecolor='none', rasterized=True)
    ax.set_title('filtered by nuclei signal and defined bounding box')
    ax.set_aspect('equal')
    print(xmin, xmax, ymin, ymax)
    ax.set_xlim([xmin-0.1*(xmax-xmin), xmax+0.1*(xmax-xmin)])
    ax.set_ylim([ymin-0.1*(ymax-ymin), ymax+0.1*(ymax-ymin)])
    plt.show()
    
    # save filtered meta
    if meta_output is not None:
        print(meta_output)
        df_filtered.to_csv(meta_output, header=True, index=True)
    
    # save bounding box
    if bbox_output is not None:
        print(bbox_output)
        pd.DataFrame(points).to_csv(bbox_output, header=False, index=False)

In [None]:
df = meta

In [None]:
sctn = 0
sctn_name = sections[sctn]
file_meta = files_meta[sctn]
points = [
    (-6500, 20400), 
    (-1000, 20600),
    (0, 26000),
    (-1000, 29000),
    (-10000, 29000),
    (-10000, 24000),
]

meta = pd.read_csv(os.path.join(path_dataset, file_meta), sep=',', index_col=0)
bbox_output = os.path.join(path_dataset, f'bbox_sect{sctn}_{sctn_name}.csv') 
print(file_meta)

preview_section(meta, points, 
                title=f"Sec{sctn}_{sctn_name}",
                # bbox_output=None,
                bbox_output=bbox_output,
               )

In [None]:
sctn = 1
sctn_name = sections[sctn]
file_meta = files_meta[sctn]
points = [
    (-6500, 20400), 
    (-2000, 20700),
    (-2000, 14000),
    (-12000, 14000),
    (-12000, 22000),
    (-8000, 22000),
]

meta = pd.read_csv(os.path.join(path_dataset, file_meta), sep=',', index_col=0)
bbox_output = os.path.join(path_dataset, f'bbox_sect{sctn}_{sctn_name}.csv') 
print(file_meta)

preview_section(meta, points, 
                title=f"Sec{sctn}_{sctn_name}",
                # bbox_output=None,
                bbox_output=bbox_output,
               )

In [None]:
sctn = 4
sctn_name = sections[sctn]
file_meta = files_meta[sctn]

meta = pd.read_csv(os.path.join(path_dataset, file_meta), sep=',', index_col=0)
bbox_output = os.path.join(path_dataset, f'bbox_sect{sctn}_{sctn_name}.csv') 
print(file_meta)

# default
xmin, xmax = np.min(meta['stage_x']), np.max(meta['stage_x'])
ymin, ymax = np.min(meta['stage_y']), np.max(meta['stage_y'])

points = [
    (xmin, ymin),
    (xmax, ymin),
    (xmax, ymax),
    (xmin, ymax),
]

preview_section(meta, points, 
                nuclei_signal_th=1500,
                title=f"Sec{sctn}_{sctn_name}",
                # bbox_output=None,
                bbox_output=bbox_output,
               )

# Prerun all sections
- set init

In [None]:
bboxes = {}
for sctn in sections.keys():
    # sctn = 2
    sctn_name = sections[sctn]
    file_meta = files_meta[sctn]

    meta = pd.read_csv(os.path.join(path_dataset, file_meta), sep=',', index_col=0)
    bbox_output = os.path.join(path_dataset, f'bbox_sect{sctn}_{sctn_name}.csv') 
    print(file_meta)

    # default
    xmin, xmax = np.min(meta['stage_x']), np.max(meta['stage_x'])
    ymin, ymax = np.min(meta['stage_y']), np.max(meta['stage_y'])

    points = [
        (xmin, ymin),
        (xmax, ymin),
        (xmax, ymax),
        (xmin, ymax),
    ]
    bboxes[sctn] = points

    preview_section(meta, points, 
                    title=f"Sec{sctn}_{sctn_name}",
                    # bbox_output=None,
                    bbox_output=bbox_output,
                   )

# Refine bounding boxes
- set init

In [None]:
bboxes_refined = bboxes.copy()
# bboxes_refined

In [None]:
sctn = 0
sctn_name = sections[sctn]
file_meta = files_meta[sctn]

meta = pd.read_csv(os.path.join(path_dataset, file_meta), sep=',', index_col=0)
bbox_output = os.path.join(path_dataset, f'bbox_sect{sctn}_{sctn_name}.csv') 
print(file_meta)

# refined
points = [
    (-6500, 20400), 
    (-1000, 20600),
    (0, 26000),
    (-1000, 29000),
    (-10000, 29000),
    (-10000, 24000),
]
bboxes_refined[sctn] = points

preview_section(meta, points, 
                title=f"Sec{sctn}_{sctn_name}",
                # bbox_output=None,
                bbox_output=bbox_output,
               )

In [None]:
sctn = 1
sctn_name = sections[sctn]
file_meta = files_meta[sctn]

meta = pd.read_csv(os.path.join(path_dataset, file_meta), sep=',', index_col=0)
bbox_output = os.path.join(path_dataset, f'bbox_sect{sctn}_{sctn_name}.csv') 
print(file_meta)

# refined
points = [
    (-6500, 20400), 
    (-2000, 20700),
    (-2000, 14000),
    (-12000, 14000),
    (-12000, 22000),
    (-8000, 22000),
]
bboxes_refined[sctn] = points

preview_section(meta, points, 
                title=f"Sec{sctn}_{sctn_name}",
                # bbox_output=None,
                bbox_output=bbox_output,
               )

In [None]:
sctn = 3
sctn_name = sections[sctn]
file_meta = files_meta[sctn]

meta = pd.read_csv(os.path.join(path_dataset, file_meta), sep=',', index_col=0)
bbox_output = os.path.join(path_dataset, f'bbox_sect{sctn}_{sctn_name}.csv') 
print(file_meta)

# refined
points = [
     (-14000, -27748),
     (-5500, -27748),
     (-5500, -22733),
     (-14000, -22733)
]
bboxes_refined[sctn] = points

preview_section(meta, points, 
                title=f"Sec{sctn}_{sctn_name}",
                # bbox_output=None,
                bbox_output=bbox_output,
               )

In [None]:
# importlib.reload(compile_tex)
# compile_tex.main(path_dataset, title='dredFISH 12-section check', author='Wollman lab')