# Prep data -- check and make it conform to the format of TMGs and downstream pipeline
- 


In [None]:
import os
import glob
import numpy as np
import pandas as pd
import subprocess
import anndata
from sklearn.decomposition import PCA

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
rootdir = "/bigstore/GeneralStorage/Data/dredFISH/"
!ls -alhtr $rootdir

In [None]:
# ddir = "/bigstore/GeneralStorage/Data/dredFISH/DPNMF-FR_7C_PFA+Methanol_PFA_Methanol_2022Nov7/orig"
# ddir = "/bigstore/GeneralStorage/Data/dredFISH/DPNMF-FR_Z1_Z2_9A_Z3_Z4_6C_2022Nov15/orig"
ddir = "/bigstore/GeneralStorage/Data/dredFISH/DPNMF-FR_R1_4A_UC_R2_5C_2022Nov27_Dec12_strip_tol/orig"

outdir = os.path.dirname(ddir) #+"_v2"
print(ddir)
print(outdir)

# !ls $ddir/*.csv
f_mets = np.sort(glob.glob("*_metadata.csv", dir_fd=ddir))
f_mats = np.sort(glob.glob("*_matrix.csv",   dir_fd=ddir))
samples = np.array([f[:-len("_metadata.csv")].replace(',', '_') for f in f_mets])
assert len(f_mets) == len(f_mats)
assert len(f_mets) == len(samples)
len(samples), f_mets, f_mats, samples

In [None]:
if not os.path.isdir(outdir):
    print(f"making {outdir}")
    os.mkdir(outdir)

# house keeping

In [None]:
# option = 'PCA'
option = 'centering'

In [None]:
for samp in samples:
    print(samp)
    f = os.path.join(ddir, f"{samp}_matrix.csv")
    mat = pd.read_csv(f, index_col=0)
    
    f = os.path.join(ddir, f"{samp}_metadata.csv")
    meta = pd.read_csv(f, index_col=0)
    assert np.all(mat.index.values==meta.index.values)
    
    # QC
    cond_th = 2000
    cond = meta['dapi'] > cond_th
    
    
    # select subset
    metasub = meta[cond].copy()
    matsub  = mat [cond].copy()
    
    # create tmp_x and tmp_y (sometime to switch between y and x
    metasub[['stage_x_orig', 'stage_y_orig']] = metasub[['stage_x', 'stage_y']]
    if option == 'PCA':
        XYnew = PCA(n_components=2).fit_transform(metasub[['stage_x', 'stage_y']].values)
    elif option == 'centering':
        XYnew = metasub[['stage_x', 'stage_y']].values
        XYnew = XYnew - np.mean(XYnew, axis=0)
        
    metasub['tmp_x'] =  XYnew[:,0]
    metasub['tmp_y'] =  XYnew[:,1]
    metasub[['stage_x', 'stage_y']] = metasub[['tmp_x', 'tmp_y']]
    
    # plot
    fig, axs = plt.subplots(1,3,figsize=(3*6,1*5))
    ax = axs[0]
    sns.histplot(meta['dapi'], bins=np.linspace(0,10000,100), ax=ax)
    ax.set_title(samp)
    
    ax = axs[1]
    ax.scatter(meta['stage_x'].values,
               meta['stage_y'].values,
               c=cond.astype(int),
               s=1,
               edgecolor='none',
               )
    ax.set_aspect('equal')
    
    ax = axs[2]
    ax.scatter(metasub['stage_x'].values,
               metasub['stage_y'].values,
               s=1,
               edgecolor='none',
               )
    ax.set_aspect('equal')
    plt.show()
    
    
    # make dir
    _dir = os.path.join(outdir, samp)
    if not os.path.isdir(_dir):
        print(f"making {_dir}")
        os.mkdir(_dir)
        
    # copy over 
    fout_mat  = os.path.join(outdir, samp, f"{samp}_matrix.csv")
    print(fout_mat)
    matsub.to_csv(fout_mat)
    
    fout_meta = os.path.join(outdir, samp, f"{samp}_metadata.csv")
    print(fout_meta)
    metasub.to_csv(fout_meta)
    
    # # change mod
    # subprocess.run(['chmod', '444', fout_mat])
    # subprocess.run(['chmod', '444', fout_meta])
    

# checking x and y (and rotate it)

In [None]:
from dredFISH.Visualization import viz_cell_layer
import importlib
import dredFISH
importlib.reload(dredFISH)
importlib.reload(dredFISH.Visualization)
importlib.reload(dredFISH.Visualization.viz_cell_layer)

In [None]:
# mode = 'preview' # need a notebook to see; save nothing
# mode = 'view' # go from the beginning to the end
# mode = 'analysis-only' # analysis only; no plots

mode = 'preview' # plot XY only; bypassing TMGs
# mode = 'preview-save' # plot XY only, save results in prep for TMGs

sample_split_lines = {
    'Section13': [[(  500, -7500 ), (-1500, 7500)]],
    'Section14': [[( 1000, -7500 ), (-1000, 7500)]],
    'Section17': [[(    0, -7500 ), ( -200, 7500)]],
    'Section18': [[(-1000, -7500 ), (-1000, 7500)]],
    'Section1': [[(-1200, -7500 ), (-1200, 7500)]],
    'Section21': [[(-1500, -7500 ), (  500, 7500)]],
    'Section22': [[(-4000, -7500 ), ( -500, 7500)]],
    'Section23': [[(-1000, -7500 ), (  500, 7500)]],
    'Section24': [[( -500, -7500 ), ( 2000, 7500)]],
    'Section2': [[( -500, -7500 ), ( -500, 7500)]],
    'Section5': [[( 2500, -7500 ), (-2000, 7500)]],
    'Section7': [[( -100, -7500 ), ( 1500, 7500)]],
    'Section8': [[( 1000, -7500 ), (-1000, 7500)]],
}

sample_rotate_thetas = {
    'Section13': None,
    'Section14': None,
    'Section17': None,
    'Section18': None,
    'Section1': None,
    'Section21': None,
    'Section22': None,
    'Section23': None,
    'Section24': None,
    'Section2': None,
    'Section5': None,
    'Section7': None,
    'Section8': None,
}

sample_bounding_points = {
    'Section13': None,
    'Section14': None,
    'Section17': None,
    'Section18': None,
    'Section1': None,
    'Section21': None,
    'Section22': [
        [-11000, -8000,],
        [  7000, -8000,],
        [  7000,     0,],
        [  6000,  4000,],
        [  6000,  7000,],
        [-11000,  7000,],
        ],
    'Section23': None,
    'Section24': [
        [ -8000, -8000,],
        [ 10000, -8000,],
        [ 10000,  8000,],
        [ -8000,  8000,],
        [ -8000,     0,],
        [ -7000, -4000,],
        ],
    'Section2': None,
    'Section5': [
        [ -8000, -7000,],
        [  8000, -7000,],
        [  8000,  7000,],
        [ -8000,  7000,],
        ],
    'Section7': None,
    'Section8': [
        [ -8000, -6000,],
        [ 10000, -6000,],
        [ 10000,  8000,],
        [ -8000,  8000,],
        ],
}

# TMG
for samp in samples: #[[7]]:#[[0]]: #[[3]]:
    basepth = os.path.join(outdir, samp) # outdir samp
    _samp = samp.split('_')[-2]
    print(_samp)
    
    # define a line to split things into hemi-coronal sections
    split_lines = sample_split_lines[_samp]
    theta = sample_rotate_thetas[_samp]
    bounding_points = sample_bounding_points[_samp]

    viz_cell_layer.main(mode, basepth, 
                        split_lines=split_lines,
                        rotate_theta=theta,
                        bounding_points=bounding_points,
                        title=samp,
                       )
    # break

In [None]:
# mode = 'preview' # need a notebook to see; save nothing
# mode = 'view' # go from the beginning to the end
# mode = 'analysis-only' # analysis only; no plots

# mode = 'preview' # plot XY only; bypassing TMGs
mode = 'preview-save' # plot XY only, save results in prep for TMGs

for samp in samples: #[[7]]:#[[0]]: #[[3]]:
    basepth = os.path.join(outdir, samp) # outdir samp
    _samp = samp.split('_')[-2]
    print(_samp)
    
    # define a line to split things into hemi-coronal sections
    split_lines = sample_split_lines[_samp]
    theta = sample_rotate_thetas[_samp]
    bounding_points = sample_bounding_points[_samp]

    viz_cell_layer.main(mode, basepth, 
                        split_lines=split_lines,
                        rotate_theta=theta,
                        bounding_points=bounding_points,
                        title=samp,
                       )
    # break