In [None]:
import glob
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from dredFISH.Utils import basicu
from dredFISH.Utils import powerplots
from dredFISH.Visualization import compile_tex

import importlib
importlib.reload(powerplots)
importlib.reload(compile_tex)

# 12-section brain dataset

In [None]:
path_dataset = '/bigstore/GeneralStorage/Data/dredFISH/Dataset3-t1'
path_fig = os.path.join(path_dataset, 'figures')
if not os.path.isdir(path_fig):
    os.mkdir(path_fig)

files_mtx = np.sort(glob.glob('*_matrix.csv', dir_fd=path_dataset))
files_meta = np.sort(glob.glob('*_metadata.csv', dir_fd=path_dataset))
files_mtx.shape, files_meta.shape

In [None]:
sections = {i: f.replace('_matrix.csv', '').replace('DPNMF_1A_2B_11A_12B_2022Jul28_Section_', '')
            for i, f in enumerate(files_mtx)}
sections

# 1 section -- minimal clean up and show

In [None]:
def plot_basis_box(ftrs_mat, output=None):
    """
    """
    fig, axs = plt.subplots(2, 1, figsize=(10,3*2), sharex=True)
    for ax in axs:
        sns.boxplot(data=ftrs_mat, ax=ax)
        ax.set_ylabel('zscore')
    ax.set_xlabel('basis')
    ax.set_ylim([-3,3])
    if output is not None:
        powerplots.savefig_autodate(fig, output)
    plt.show()

In [None]:
sctn = 0
sctn_name = sections[sctn]
file_mtx = files_mtx[sctn]
file_meta = files_meta[sctn]
print(files_mtx[0], files_meta[0])

mtx = pd.read_csv(os.path.join(path_dataset, file_mtx), sep=',', index_col=0)
mtx.columns = np.char.add('br', np.arange(24).astype(str))
meta = pd.read_csv(os.path.join(path_dataset, file_meta), sep=',', index_col=0)
df = meta.join(mtx)

# cond = df['cytoplasm_size'] > 10
cond = df['nuclei_signal'] > 1500
df = df.loc[cond]
mtx = df[np.char.add('br', np.arange(24).astype(str))]

# norm
ftrs_mat = basicu.normalize_fishdata(mtx.values, norm_cell=True, norm_basis=True)
for i in range(ftrs_mat.shape[1]):
    df[f'b{i}'] = ftrs_mat[:,i]
    
# XY
XY = df[['stage_x', 'stage_y']].values
x = XY[:,0] 
y = XY[:,1] 

In [None]:
output = os.path.join(path_fig, f"fig1_xy_sect{sctn}_{sctn_name}.pdf")
fig, ax = plt.subplots(figsize=(10,10))
ax.scatter(x, y, s=1, edgecolor='none', color='black', rasterized=True)
ax.set_title(sctn_name)
ax.set_aspect('equal')
powerplots.savefig_autodate(fig, output)
plt.show()

output = os.path.join(path_fig, f"fig2_basis_box_sect{sctn}_{sctn_name}.pdf")
plot_basis_box(ftrs_mat, output=output)

output = os.path.join(path_fig, f"fig3_basis_xy_sect{sctn}_{sctn_name}.pdf")
powerplots.plot_basis_spatial(df, xcol='stage_x', ycol='stage_y', vmin=-1, vmax=1, output=output)

# all sections

In [None]:
sections

In [None]:
%%time
for sctn in sections.keys():
    sctn_name = sections[sctn]
    file_mtx = files_mtx[sctn]
    file_meta = files_meta[sctn]
    print(files_mtx[sctn], files_meta[sctn])

    mtx = pd.read_csv(os.path.join(path_dataset, file_mtx), sep=',', index_col=0)
    mtx.columns = np.char.add('br', np.arange(24).astype(str))
    meta = pd.read_csv(os.path.join(path_dataset, file_meta), sep=',', index_col=0)
    df = meta.join(mtx)
    
    # remove artifacts
    # cond = df['cytoplasm_size'] > 10
    cond = df['nuclei_signal'] > 1500
    df = df.loc[cond]
    mtx = df[np.char.add('br', np.arange(24).astype(str))]

    # norm
    ftrs_mat = basicu.normalize_fishdata(mtx.values, norm_cell=True, norm_basis=True)
    for i in range(ftrs_mat.shape[1]):
        df[f'b{i}'] = ftrs_mat[:,i]

    # XY
    XY = df[['stage_x', 'stage_y']].values
    x = XY[:,0] 
    y = XY[:,1] 

    output = os.path.join(path_fig, f"fig1_xy_sect{sctn}_{sctn_name}.pdf")
    fig, ax = plt.subplots(figsize=(10,10))
    ax.scatter(x, y, s=1, edgecolor='none', color='black', rasterized=True)
    ax.set_title(sctn_name)
    ax.set_aspect('equal')
    powerplots.savefig_autodate(fig, output)
    plt.show()

    output = os.path.join(path_fig, f"fig2_basis_box_sect{sctn}_{sctn_name}.pdf")
    plot_basis_box(ftrs_mat, output=output)

    output = os.path.join(path_fig, f"fig3_basis_xy_sect{sctn}_{sctn_name}.pdf")
    powerplots.plot_basis_spatial(df, xcol='stage_x', ycol='stage_y', vmin=-1, vmax=1, output=output)

In [None]:
importlib.reload(compile_tex)
compile_tex.main(path_dataset, title='dredFISH 12-section check', author='Wollman lab')