# Prep data -- check and make it conform to the format of TMGs and downstream pipeline
- 


In [None]:
import os
import glob
import numpy as np
import pandas as pd
import subprocess
import anndata

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
ddir = "/bigstore/GeneralStorage/Data/dredFISH/DPNMF-FR_7C_PFA+Methanol_PFA_Methanol_2022Nov07/orig"
outdir = os.path.dirname(ddir)
print(ddir)
print(outdir)

# !ls $ddir/*.csv
f_mets = np.sort(glob.glob("*_metadata.csv", dir_fd=ddir))
f_mats = np.sort(glob.glob("*_matrix.csv",   dir_fd=ddir))
samples = np.array([f[:-len("_metadata.csv")] for f in f_mets])
assert len(f_mets) == len(f_mats)
assert len(f_mets) == len(samples)
len(samples), f_mets, f_mats, samples

In [None]:
if not os.path.isdir(outdir):
    print(f"making {outdir}")
    os.mkdir(outdir)

# house keeping

In [None]:
for samp in samples[[1]]:
    print(samp)
    f = os.path.join(ddir, f"{samp}_matrix.csv")
    mat = pd.read_csv(f, index_col=0)
    
    f = os.path.join(ddir, f"{samp}_metadata.csv")
    meta = pd.read_csv(f, index_col=0)
    assert np.all(mat.index.values==meta.index.values)
    
    # create tmp_x and tmp_y
    meta[['stage_x_orig', 'stage_y_orig']] = meta[['stage_x', 'stage_y']]
    meta['tmp_x'] =  meta['stage_x_orig']
    meta['tmp_y'] = -meta['stage_y_orig']
    meta[['stage_x', 'stage_y']] = meta[['tmp_x', 'tmp_y']]
    
    # make dir
    _dir = os.path.join(outdir, samp)
    if not os.path.isdir(_dir):
        print(f"making {_dir}")
        os.mkdir(_dir)
        
    # copy over 
    fout_mat = os.path.join(outdir, samp, f"{samp}_matrix.csv")
    print(fout_mat)
    mat.to_csv(fout_mat)
    
    fout_meta = os.path.join(outdir, samp, f"{samp}_metadata.csv")
    print(fout_meta)
    meta.to_csv(fout_meta)
    
#     # change mod
#     subprocess.run(['chmod', '444', fout_mat])
#     subprocess.run(['chmod', '444', fout_meta])
    
    break

In [None]:
meta

In [None]:
sns.histplot(meta['size'])

In [None]:
sns.histplot(meta['polyt'])

In [None]:
sns.histplot(meta['dapi'])

In [None]:
sns.scatterplot(data=meta, x='dapi', y='size', s=1)

In [None]:
sns.scatterplot(data=meta, x='polyt', y='size', s=1)

In [None]:
sns.histplot(meta['dapi'], bins=np.linspace(0,10000,100))

In [None]:
cond = meta['dapi'] < 2000

In [None]:
plt.scatter(meta['stage_x'].values,
            meta['stage_y'].values,
            c=cond.astype(int), #cov.values<4000,
            s=1,
            edgecolor='none',
           )

# checking x and y (and rotate it)

In [None]:
from dredFISH.Visualization import viz_cell_layer
import importlib
import dredFISH
importlib.reload(dredFISH)
importlib.reload(dredFISH.Visualization)
importlib.reload(dredFISH.Visualization.viz_cell_layer)

In [None]:
# mode = 'preview' # need a notebook to see; save nothing
# mode = 'view' # go from the beginning to the end
# mode = 'analysis-only' # analysis only; no plots

mode = 'preview' # plot XY only; bypassing TMGs
# mode = 'preview-save' # plot XY only, save results in prep for TMGs

sample_split_lines = {
    samples[0]: [[(12500, -2500 ), (12500, -14000)]],
    samples[1]: [[(14000, -2000 ), (11000, -16000)]],
    samples[2]: [[(13500, -2000 ), (11500, -14000)]],
    samples[3]: [[(15000, -2500 ), (10000, -15000)]],
    samples[4]: [[( 7000, -2500 ), ( 9000, -17500)]],
    samples[5]: [[(13500, -2000 ), (10500, -14000)]],
    
    samples[6]: [[( 2500, -10000), (17500, -15000)]], # weird ones
    samples[7]: [[( 2500, -7500 ), (17500, -15000)]],
    samples[8]: [[( 2500, -7500 ), (17500, -16000)]],
    
    samples[9]: [[(12000, -2000 ), (12000, -14000)]],
}

# TMG
for samp in samples[[1]]: #[[3]]:
    basepth = os.path.join(outdir, samp) # outdir samp
    
    # define a line to split things into hemi-coronal sections
    split_lines = sample_split_lines[samp]

    viz_cell_layer.main(mode, basepth, split_lines,)
    break

In [None]:
# mode = 'preview' # need a notebook to see; save nothing
# mode = 'view' # go from the beginning to the end
# mode = 'analysis-only' # analysis only; no plots

# mode = 'preview' # plot XY only; bypassing TMGs
mode = 'preview-save' # plot XY only, save results in prep for TMGs

sample_split_lines = {
    samples[0]: [[(12500, -2500 ), (12500, -14000)]],
    samples[1]: [[(14000, -2000 ), (11000, -16000)]],
    samples[2]: [[(13500, -2000 ), (11500, -14000)]],
    samples[3]: [[(15000, -2500 ), (10000, -15000)]],
    samples[4]: [[( 7000, -2500 ), ( 9000, -17500)]],
    samples[5]: [[(13500, -2000 ), (10500, -14000)]],
    
    samples[6]: [[( 2500, -10000), (17500, -15000)]], # weird ones
    samples[7]: [[( 2500, -7500 ), (17500, -15000)]],
    samples[8]: [[( 2500, -7500 ), (17500, -16000)]],
    
    samples[9]: [[(12000, -2000 ), (12000, -14000)]],
}

# TMG
for samp in samples:
    basepth = os.path.join(outdir, samp) # outdir samp
    
    # define a line to split things into hemi-coronal sections
    split_lines = sample_split_lines[samp]

    viz_cell_layer.main(mode, basepth, split_lines,)