In [None]:
import scanpy as sc
import numpy as np
import spin
import gc
import os
import pandas as pd

In [None]:
xkey = 'row'
ykey = 'col'
topregion_key = 'topregion'
subregion_key = 'subregion'
region_key = 'region'
topregions = list(range(17))
dpi = 200

basepath = '/stanley/WangLab/kamal/figures/atlas/param_selection/'
if not os.path.exists(basepath):
    os.mkdir(basepath)

# 0: Cortex 1

## Num nbrs

In [None]:
topregion = '0'
regionpath = os.path.join(basepath, f'{topregion}/')
if not os.path.exists(regionpath):
    os.mkdir(regionpath)

In [None]:
num_nbrss = [f'_{i*10}nbrs' for i in range(2,7)]

nbrpath = os.path.join(regionpath, 'nbr_selection/')
if not os.path.exists(nbrpath):
    os.mkdir(nbrpath)

for num_nbrs in num_nbrss:

    adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

    tissue_path = os.path.join(nbrpath, f'tissue_colored_by_region{num_nbrs}.png')
    latent_path = os.path.join(nbrpath, f'latent_colored_by_region{num_nbrs}.png')

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                     colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Resolution

In [None]:
respath = os.path.join(regionpath, 'res_selection/')
if not os.path.exists(respath):
    os.mkdir(respath)

In [None]:
adata_og = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

In [None]:
adata_og.uns

In [None]:
num_nbrs = '_40nbrs'
# res = 1.0
res = 1.7

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
adata.write(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/final/topregion{topregion}_subclustered.h5ad')

## Plot individual subregions

In [None]:
subplotpath = os.path.join(regionpath, 'subregion_plots/')
if not os.path.exists(subplotpath):
    os.mkdir(subplotpath)

In [None]:
num_nbrs = '_40nbrs'

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells

for i in range(len(subregions)):
    subregion = subregions[i]
    tissue_path = os.path.join(subplotpath, f'tissue_colored_by_subregion{subregion}.png')
    latent_path = os.path.join(subplotpath, f'latent_colored_by_subregion{subregion}.png')
    spin.plot_regions_tissue(adata, [subregion], subregion_key, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
    spin.plot_regions_latent(adata, 'X_umap_spin_sub', [subregion], subregion_key, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Gene markers

In [None]:
markerpath = os.path.join(regionpath, 'top_10_gene_markers_per_subcluster.csv')

In [None]:
num_nbrs = '_40nbrs'
adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

sc.tl.rank_genes_groups(adata, groupby=subregion_key, use_raw=False)

subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
num_markers = 10
markers = np.zeros((len(subregions),num_markers), dtype=object)

for i in range(len(subregions)):
    top_markers = sc.get.rank_genes_groups_df(adata, group=[subregions[i]])['names'][:num_markers].values
    markers[i] = top_markers

marker_df = pd.DataFrame(data=markers, columns=[f'rank{i+1}' for i in range(num_markers)])
marker_df.index.name = 'subregion'
marker_df.to_csv(markerpath)

## Filter thresh

In [None]:
threshpath = os.path.join(regionpath, 'thresh_selection/')
if not os.path.exists(threshpath):
    os.mkdir(threshpath)

In [None]:
num_nbrs = '_40nbrs'

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')
print(f'Cluster size = {len(adata)}')
print(adata.obs[subregion_key].value_counts())

thresh = 1000
counts = adata.obs[subregion_key].value_counts()
good_regions = (counts > thresh)
good_regions = good_regions.index[np.where(good_regions)]
adata.obs['keep'] = np.isin(adata.obs[subregion_key], good_regions)

tissue_path = os.path.join(threshpath, f'tissue_colored_by_removed_cells.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_removed_cells.png')
spin.plot_regions_tissue(adata, [False], 'keep', xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_regions_latent(adata, 'X_umap_spin_sub', [False], 'keep', s=0.5, legend=True, path=latent_path, dpi=dpi)

adata = adata[np.isin(adata.obs[subregion_key], good_regions)]

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

tissue_path = os.path.join(threshpath, f'tissue_colored_by_regions_filtered.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_regions_filtered.png')
spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

### Plot region

In [None]:
subregion = '14'
spin.plot_regions_tissue(adata, [subregion], subregion_key, xkey=xkey, ykey=ykey, s=0.5, figscale=20, title=subregion)

# 1: Midbrain

## Num nbrs

In [None]:
topregion = '1'
regionpath = os.path.join(basepath, f'{topregion}/')
if not os.path.exists(regionpath):
    os.mkdir(regionpath)

In [None]:
num_nbrss = [f'_{i*10}nbrs' for i in range(2,7)]

nbrpath = os.path.join(regionpath, 'nbr_selection/')
if not os.path.exists(nbrpath):
    os.mkdir(nbrpath)

for num_nbrs in num_nbrss:

    adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

    tissue_path = os.path.join(nbrpath, f'tissue_colored_by_region{num_nbrs}.png')
    latent_path = os.path.join(nbrpath, f'latent_colored_by_region{num_nbrs}.png')

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                     colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Resolution

In [None]:
respath = os.path.join(regionpath, 'res_selection/')
if not os.path.exists(respath):
    os.mkdir(respath)

In [None]:
num_nbrs = '_60nbrs'
adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

In [None]:
res = 0.7

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

# spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
# spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
#                     colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
res = 1

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
res = 0.7

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
res = 0.4

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
res = 0.55

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
res = 0.45

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
res = 0.35

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
res = 0.7

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
adata.write(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/final/topregion{topregion}_subclustered.h5ad')

## Filter thresh

In [None]:
threshpath = os.path.join(regionpath, 'thresh_selection/')
if not os.path.exists(threshpath):
    os.mkdir(threshpath)

In [None]:
print(f'Cluster size = {len(adata)}')
print(adata.obs[subregion_key].value_counts())

thresh = 1000
counts = adata.obs[subregion_key].value_counts()
good_regions = (counts > thresh)
good_regions = good_regions.index[np.where(good_regions)]
adata.obs['keep'] = np.isin(adata.obs[subregion_key], good_regions)

tissue_path = os.path.join(threshpath, f'tissue_colored_by_removed_cells.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_removed_cells.png')
spin.plot_regions_tissue(adata, [False], 'keep', xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_regions_latent(adata, 'X_umap_spin_sub', [False], 'keep', s=0.5, legend=True, path=latent_path, dpi=dpi)

adata = adata[np.isin(adata.obs[subregion_key], good_regions)]

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

tissue_path = os.path.join(threshpath, f'tissue_colored_by_regions_filtered.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_regions_filtered.png')
spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Plot individual subregions

In [None]:
subplotpath = os.path.join(regionpath, 'subregion_plots/')
if not os.path.exists(subplotpath):
    os.mkdir(subplotpath)

In [None]:
subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells

for i in range(len(subregions)):
    subregion = subregions[i]
    tissue_path = os.path.join(subplotpath, f'tissue_colored_by_subregion{subregion}.png')
    latent_path = os.path.join(subplotpath, f'latent_colored_by_subregion{subregion}.png')
    spin.plot_regions_tissue(adata, [subregion], subregion_key, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
    spin.plot_regions_latent(adata, 'X_umap_spin_sub', [subregion], subregion_key, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Gene markers

In [None]:
markerpath = os.path.join(regionpath, 'top_10_gene_markers_per_subcluster.csv')

In [None]:
sc.tl.rank_genes_groups(adata, groupby=subregion_key, use_raw=False)

subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
num_markers = 10
markers = np.zeros((len(subregions),num_markers), dtype=object)

for i in range(len(subregions)):
    top_markers = sc.get.rank_genes_groups_df(adata, group=[subregions[i]])['names'][:num_markers].values
    markers[i] = top_markers

marker_df = pd.DataFrame(data=markers, columns=[f'rank{i+1}' for i in range(num_markers)])
marker_df.index.name = 'subregion'
marker_df.to_csv(markerpath)

# 2: Cb 1

## Num nbrs

In [None]:
topregion = '2'
regionpath = os.path.join(basepath, f'{topregion}/')
if not os.path.exists(regionpath):
    os.mkdir(regionpath)

In [None]:
num_nbrss = [f'_{i*10}nbrs' for i in range(2,7)]
num_nbrss.remove('_30nbrs')

nbrpath = os.path.join(regionpath, 'nbr_selection/')
if not os.path.exists(nbrpath):
    os.mkdir(nbrpath)

for num_nbrs in num_nbrss:

    adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

    tissue_path = os.path.join(nbrpath, f'tissue_colored_by_region{num_nbrs}.png')
    latent_path = os.path.join(nbrpath, f'latent_colored_by_region{num_nbrs}.png')

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                     colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Resolution

In [None]:
respath = os.path.join(regionpath, 'res_selection/')
if not os.path.exists(respath):
    os.mkdir(respath)

In [None]:
num_nbrs = '_20nbrs'
res = 0.3

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
adata.write(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/final/topregion{topregion}_subclustered.h5ad')

## Plot individual subregions

In [None]:
subplotpath = os.path.join(regionpath, 'subregion_plots/')
if not os.path.exists(subplotpath):
    os.mkdir(subplotpath)

In [None]:
subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells

for i in range(len(subregions)):
    subregion = subregions[i]
    tissue_path = os.path.join(subplotpath, f'tissue_colored_by_subregion{subregion}.png')
    latent_path = os.path.join(subplotpath, f'latent_colored_by_subregion{subregion}.png')
    spin.plot_regions_tissue(adata, [subregion], subregion_key, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
    spin.plot_regions_latent(adata, 'X_umap_spin_sub', [subregion], subregion_key, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Gene markers

In [None]:
markerpath = os.path.join(regionpath, 'top_10_gene_markers_per_subcluster.csv')

In [None]:
sc.tl.rank_genes_groups(adata, groupby=subregion_key, use_raw=False)

subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
num_markers = 10
markers = np.zeros((len(subregions),num_markers), dtype=object)

for i in range(len(subregions)):
    top_markers = sc.get.rank_genes_groups_df(adata, group=[subregions[i]])['names'][:num_markers].values
    markers[i] = top_markers

marker_df = pd.DataFrame(data=markers, columns=[f'rank{i+1}' for i in range(num_markers)])
marker_df.index.name = 'subregion'
marker_df.to_csv(markerpath)

## Filter thresh

In [None]:
threshpath = os.path.join(regionpath, 'thresh_selection/')
if not os.path.exists(threshpath):
    os.mkdir(threshpath)

In [None]:
print(f'Cluster size = {len(adata)}')
print(adata.obs[subregion_key].value_counts())

thresh = 1000
counts = adata.obs[subregion_key].value_counts()
good_regions = (counts > thresh)
good_regions = good_regions.index[np.where(good_regions)]
adata.obs['keep'] = np.isin(adata.obs[subregion_key], good_regions)

tissue_path = os.path.join(threshpath, f'tissue_colored_by_removed_cells.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_removed_cells.png')
spin.plot_regions_tissue(adata, [False], 'keep', xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_regions_latent(adata, 'X_umap_spin_sub', [False], 'keep', s=0.5, legend=True, path=latent_path, dpi=dpi)

adata = adata[np.isin(adata.obs[subregion_key], good_regions)]

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

tissue_path = os.path.join(threshpath, f'tissue_colored_by_regions_filtered.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_regions_filtered.png')
spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

# 3: Str

## Num nbrs

In [None]:
topregion = '3'
regionpath = os.path.join(basepath, f'{topregion}/')
if not os.path.exists(regionpath):
    os.mkdir(regionpath)

In [None]:
num_nbrss = [f'_{i*10}nbrs' for i in range(2,7)]

nbrpath = os.path.join(regionpath, 'nbr_selection/')
if not os.path.exists(nbrpath):
    os.mkdir(nbrpath)

for num_nbrs in num_nbrss:

    adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

    tissue_path = os.path.join(nbrpath, f'tissue_colored_by_region{num_nbrs}.png')
    latent_path = os.path.join(nbrpath, f'latent_colored_by_region{num_nbrs}.png')

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                     colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Resolution

In [None]:
respath = os.path.join(regionpath, 'res_selection/')
if not os.path.exists(respath):
    os.mkdir(respath)

In [None]:
num_nbrs = '_60nbrs'
adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

In [None]:
res = 0.5

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
adata.write(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/final/topregion{topregion}_subclustered.h5ad')

## Plot individual subregions

In [None]:
subplotpath = os.path.join(regionpath, 'subregion_plots/')
if not os.path.exists(subplotpath):
    os.mkdir(subplotpath)

In [None]:
subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells

for i in range(len(subregions)):
    subregion = subregions[i]
    tissue_path = os.path.join(subplotpath, f'tissue_colored_by_subregion{subregion}.png')
    latent_path = os.path.join(subplotpath, f'latent_colored_by_subregion{subregion}.png')
    spin.plot_regions_tissue(adata, [subregion], subregion_key, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
    spin.plot_regions_latent(adata, 'X_umap_spin_sub', [subregion], subregion_key, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Gene markers

In [None]:
markerpath = os.path.join(regionpath, 'top_10_gene_markers_per_subcluster.csv')

In [None]:
sc.tl.rank_genes_groups(adata, groupby=subregion_key, use_raw=False)

subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
num_markers = 10
markers = np.zeros((len(subregions),num_markers), dtype=object)

for i in range(len(subregions)):
    top_markers = sc.get.rank_genes_groups_df(adata, group=[subregions[i]])['names'][:num_markers].values
    markers[i] = top_markers

marker_df = pd.DataFrame(data=markers, columns=[f'rank{i+1}' for i in range(num_markers)])
marker_df.index.name = 'subregion'
marker_df.to_csv(markerpath)

## Filter thresh

In [None]:
threshpath = os.path.join(regionpath, 'thresh_selection/')
if not os.path.exists(threshpath):
    os.mkdir(threshpath)

In [None]:
print(f'Cluster size = {len(adata)}')
print(adata.obs[subregion_key].value_counts())

thresh = 1000
counts = adata.obs[subregion_key].value_counts()
good_regions = (counts > thresh)
good_regions = good_regions.index[np.where(good_regions)]
adata.obs['keep'] = np.isin(adata.obs[subregion_key], good_regions)

tissue_path = os.path.join(threshpath, f'tissue_colored_by_removed_cells.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_removed_cells.png')
spin.plot_regions_tissue(adata, [False], 'keep', xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_regions_latent(adata, 'X_umap_spin_sub', [False], 'keep', s=0.5, legend=True, path=latent_path, dpi=dpi)

adata = adata[np.isin(adata.obs[subregion_key], good_regions)]

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

tissue_path = os.path.join(threshpath, f'tissue_colored_by_regions_filtered.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_regions_filtered.png')
spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

# 4: Ob 1

## Num nbrs

In [None]:
topregion = '4'
regionpath = os.path.join(basepath, f'{topregion}/')
if not os.path.exists(regionpath):
    os.mkdir(regionpath)

In [None]:
num_nbrss = [f'_{i*10}nbrs' for i in range(2,7)]

nbrpath = os.path.join(regionpath, 'nbr_selection/')
if not os.path.exists(nbrpath):
    os.mkdir(nbrpath)

for num_nbrs in num_nbrss:

    adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

    tissue_path = os.path.join(nbrpath, f'tissue_colored_by_region{num_nbrs}.png')
    latent_path = os.path.join(nbrpath, f'latent_colored_by_region{num_nbrs}.png')

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                     colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Resolution

In [None]:
respath = os.path.join(regionpath, 'res_selection/')
if not os.path.exists(respath):
    os.mkdir(respath)

In [None]:
num_nbrs = '_30nbrs'
res = 0.1

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
adata.write(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/final/topregion{topregion}_subclustered.h5ad')

## Plot individual subregions

In [None]:
subplotpath = os.path.join(regionpath, 'subregion_plots/')
if not os.path.exists(subplotpath):
    os.mkdir(subplotpath)

In [None]:
num_nbrs = '_30nbrs'
adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells

for i in range(len(subregions)):
    subregion = subregions[i]
    tissue_path = os.path.join(subplotpath, f'tissue_colored_by_subregion{subregion}.png')
    latent_path = os.path.join(subplotpath, f'latent_colored_by_subregion{subregion}.png')
    spin.plot_regions_tissue(adata, [subregion], subregion_key, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
    spin.plot_regions_latent(adata, 'X_umap_spin_sub', [subregion], subregion_key, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Gene markers

In [None]:
markerpath = os.path.join(regionpath, 'top_10_gene_markers_per_subcluster.csv')

In [None]:
sc.tl.rank_genes_groups(adata, groupby=subregion_key, use_raw=False)

subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
num_markers = 10
markers = np.zeros((len(subregions),num_markers), dtype=object)

for i in range(len(subregions)):
    top_markers = sc.get.rank_genes_groups_df(adata, group=[subregions[i]])['names'][:num_markers].values
    markers[i] = top_markers

marker_df = pd.DataFrame(data=markers, columns=[f'rank{i+1}' for i in range(num_markers)])
marker_df.index.name = 'subregion'
marker_df.to_csv(markerpath)

## Filter thresh

In [None]:
threshpath = os.path.join(regionpath, 'thresh_selection/')
if not os.path.exists(threshpath):
    os.mkdir(threshpath)

In [None]:
print(f'Cluster size = {len(adata)}')
print(adata.obs[subregion_key].value_counts())

thresh = 1000
counts = adata.obs[subregion_key].value_counts()
good_regions = (counts > thresh)
good_regions = good_regions.index[np.where(good_regions)]
adata.obs['keep'] = np.isin(adata.obs[subregion_key], good_regions)

tissue_path = os.path.join(threshpath, f'tissue_colored_by_removed_cells.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_removed_cells.png')
spin.plot_regions_tissue(adata, [False], 'keep', xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_regions_latent(adata, 'X_umap_spin_sub', [False], 'keep', s=0.5, legend=True, path=latent_path, dpi=dpi)

adata = adata[np.isin(adata.obs[subregion_key], good_regions)]

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

tissue_path = os.path.join(threshpath, f'tissue_colored_by_regions_filtered.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_regions_filtered.png')
spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

# 5: fiber tracts

## Num nbrs

In [None]:
topregion = '5'
regionpath = os.path.join(basepath, f'{topregion}/')
if not os.path.exists(regionpath):
    os.mkdir(regionpath)

In [None]:
num_nbrss = [f'_{i*10}nbrs' for i in range(2,7)]

nbrpath = os.path.join(regionpath, 'nbr_selection/')
if not os.path.exists(nbrpath):
    os.mkdir(nbrpath)

for num_nbrs in num_nbrss:

    adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

    tissue_path = os.path.join(nbrpath, f'tissue_colored_by_region{num_nbrs}.png')
    latent_path = os.path.join(nbrpath, f'latent_colored_by_region{num_nbrs}.png')

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                     colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Resolution

In [None]:
respath = os.path.join(regionpath, 'res_selection/')
if not os.path.exists(respath):
    os.mkdir(respath)

In [None]:
num_nbrs = '_50nbrs'

In [None]:
res = 0.2

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
adata.write(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/final/topregion{topregion}_subclustered.h5ad')

## Filter thresh

In [None]:
threshpath = os.path.join(regionpath, 'thresh_selection/')
if not os.path.exists(threshpath):
    os.mkdir(threshpath)

In [None]:
print(f'Cluster size = {len(adata)}')
print(adata.obs[subregion_key].value_counts())

thresh = 1000
counts = adata.obs[subregion_key].value_counts()
good_regions = (counts > thresh)
good_regions = good_regions.index[np.where(good_regions)]
adata.obs['keep'] = np.isin(adata.obs[subregion_key], good_regions)

tissue_path = os.path.join(threshpath, f'tissue_colored_by_removed_cells.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_removed_cells.png')
spin.plot_regions_tissue(adata, [False], 'keep', xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_regions_latent(adata, 'X_umap_spin_sub', [False], 'keep', s=0.5, legend=True, path=latent_path, dpi=dpi)

adata = adata[np.isin(adata.obs[subregion_key], good_regions)]

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

tissue_path = os.path.join(threshpath, f'tissue_colored_by_regions_filtered.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_regions_filtered.png')
spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Plot individual subregions

In [None]:
subplotpath = os.path.join(regionpath, 'subregion_plots/')
if not os.path.exists(subplotpath):
    os.mkdir(subplotpath)

In [None]:
subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells

for i in range(len(subregions)):
    subregion = subregions[i]
    tissue_path = os.path.join(subplotpath, f'tissue_colored_by_subregion{subregion}.png')
    latent_path = os.path.join(subplotpath, f'latent_colored_by_subregion{subregion}.png')
    spin.plot_regions_tissue(adata, [subregion], subregion_key, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
    spin.plot_regions_latent(adata, 'X_umap_spin_sub', [subregion], subregion_key, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Gene markers

In [None]:
markerpath = os.path.join(regionpath, 'top_10_gene_markers_per_subcluster.csv')

In [None]:
sc.tl.rank_genes_groups(adata, groupby=subregion_key, use_raw=False)

subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
num_markers = 10
markers = np.zeros((len(subregions),num_markers), dtype=object)

for i in range(len(subregions)):
    top_markers = sc.get.rank_genes_groups_df(adata, group=[subregions[i]])['names'][:num_markers].values
    markers[i] = top_markers

marker_df = pd.DataFrame(data=markers, columns=[f'rank{i+1}' for i in range(num_markers)])
marker_df.index.name = 'subregion'
marker_df.to_csv(markerpath)

In [None]:
gene = 'NTSR1'
spin.plot_tissue(adata, colors=adata[:,gene].X.flatten(), cmap='viridis', xkey=xkey, ykey=ykey, s=0.5, figscale=20)

In [None]:
gene = 'GKN3'
spin.plot_tissue(adata, colors=adata[:,gene].X.flatten(), cmap='viridis', xkey=xkey, ykey=ykey, s=0.5, figscale=20)

In [None]:
gene = 'HRH2'
spin.plot_tissue(adata, colors=adata[:,gene].X.flatten(), cmap='viridis', xkey=xkey, ykey=ykey, s=0.5, figscale=20)

# 6: meninges

## Num nbrs

In [None]:
topregion = '6'
regionpath = os.path.join(basepath, f'{topregion}/')
if not os.path.exists(regionpath):
    os.mkdir(regionpath)

In [None]:
num_nbrss = [f'_{i*10}nbrs' for i in range(2,7)]

nbrpath = os.path.join(regionpath, 'nbr_selection/')
if not os.path.exists(nbrpath):
    os.mkdir(nbrpath)

for num_nbrs in num_nbrss:

    adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

    tissue_path = os.path.join(nbrpath, f'tissue_colored_by_region{num_nbrs}.png')
    latent_path = os.path.join(nbrpath, f'latent_colored_by_region{num_nbrs}.png')

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                     colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Resolution

In [None]:
respath = os.path.join(regionpath, 'res_selection/')
if not os.path.exists(respath):
    os.mkdir(respath)

In [None]:
num_nbrs = '_30nbrs'

In [None]:
res = 0.2

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
adata.write(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/final/topregion{topregion}_subclustered.h5ad')

## Filter thresh

In [None]:
threshpath = os.path.join(regionpath, 'thresh_selection/')
if not os.path.exists(threshpath):
    os.mkdir(threshpath)

In [None]:
print(f'Cluster size = {len(adata)}')
print(adata.obs[subregion_key].value_counts())

thresh = 1000
counts = adata.obs[subregion_key].value_counts()
good_regions = (counts > thresh)
good_regions = good_regions.index[np.where(good_regions)]
adata.obs['keep'] = np.isin(adata.obs[subregion_key], good_regions)

tissue_path = os.path.join(threshpath, f'tissue_colored_by_removed_cells.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_removed_cells.png')
spin.plot_regions_tissue(adata, [False], 'keep', xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_regions_latent(adata, 'X_umap_spin_sub', [False], 'keep', s=0.5, legend=True, path=latent_path, dpi=dpi)

adata = adata[np.isin(adata.obs[subregion_key], good_regions)]

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

tissue_path = os.path.join(threshpath, f'tissue_colored_by_regions_filtered.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_regions_filtered.png')
spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Plot individual subregions

In [None]:
subplotpath = os.path.join(regionpath, 'subregion_plots/')
if not os.path.exists(subplotpath):
    os.mkdir(subplotpath)

In [None]:
subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells

for i in range(len(subregions)):
    subregion = subregions[i]
    tissue_path = os.path.join(subplotpath, f'tissue_colored_by_subregion{subregion}.png')
    latent_path = os.path.join(subplotpath, f'latent_colored_by_subregion{subregion}.png')
    spin.plot_regions_tissue(adata, [subregion], subregion_key, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
    spin.plot_regions_latent(adata, 'X_umap_spin_sub', [subregion], subregion_key, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Gene markers

In [None]:
markerpath = os.path.join(regionpath, 'top_10_gene_markers_per_subcluster.csv')

In [None]:
sc.tl.rank_genes_groups(adata, groupby=subregion_key, use_raw=False)

subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
num_markers = 10
markers = np.zeros((len(subregions),num_markers), dtype=object)

for i in range(len(subregions)):
    top_markers = sc.get.rank_genes_groups_df(adata, group=[subregions[i]])['names'][:num_markers].values
    markers[i] = top_markers

marker_df = pd.DataFrame(data=markers, columns=[f'rank{i+1}' for i in range(num_markers)])
marker_df.index.name = 'subregion'
marker_df.to_csv(markerpath)

In [None]:
gene = 'CST3'
spin.plot_tissue(adata, colors=adata[:,gene].X.flatten(), cmap='viridis', xkey=xkey, ykey=ykey, s=0.5, figscale=20)

# 7: Cortex 2

## Num nbrs

In [None]:
topregion = '7'
regionpath = os.path.join(basepath, f'{topregion}/')
if not os.path.exists(regionpath):
    os.mkdir(regionpath)

In [None]:
num_nbrss = [f'_{i*10}nbrs' for i in range(2,7)]

nbrpath = os.path.join(regionpath, 'nbr_selection/')
if not os.path.exists(nbrpath):
    os.mkdir(nbrpath)

for num_nbrs in num_nbrss:

    adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

    tissue_path = os.path.join(nbrpath, f'tissue_colored_by_region{num_nbrs}.png')
    latent_path = os.path.join(nbrpath, f'latent_colored_by_region{num_nbrs}.png')

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                     colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Resolution

In [None]:
respath = os.path.join(regionpath, 'res_selection/')
if not os.path.exists(respath):
    os.mkdir(respath)

In [None]:
num_nbrs = '_30nbrs'

In [None]:
res = 0.6

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
adata.write(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/final/topregion{topregion}_subclustered.h5ad')

## Gene markers

In [None]:
markerpath = os.path.join(regionpath, 'top_10_gene_markers_per_subcluster.csv')

In [None]:
sc.tl.rank_genes_groups(adata, groupby=subregion_key, use_raw=False)

subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
num_markers = 10
markers = np.zeros((len(subregions),num_markers), dtype=object)

for i in range(len(subregions)):
    top_markers = sc.get.rank_genes_groups_df(adata, group=[subregions[i]])['names'][:num_markers].values
    markers[i] = top_markers

marker_df = pd.DataFrame(data=markers, columns=[f'rank{i+1}' for i in range(num_markers)])
marker_df.index.name = 'subregion'
marker_df.to_csv(markerpath)

In [None]:
gene = 'ALCAM'
spin.plot_tissue(adata, colors=adata[:,gene].X.flatten(), cmap='viridis', xkey=xkey, ykey=ykey, s=0.5, figscale=20)

## Filter thresh

In [None]:
threshpath = os.path.join(regionpath, 'thresh_selection/')
if not os.path.exists(threshpath):
    os.mkdir(threshpath)

In [None]:
print(f'Cluster size = {len(adata)}')
print(adata.obs[subregion_key].value_counts())

thresh = 1000
counts = adata.obs[subregion_key].value_counts()
good_regions = (counts > thresh)
good_regions = good_regions.index[np.where(good_regions)]
adata.obs['keep'] = np.isin(adata.obs[subregion_key], good_regions)

tissue_path = os.path.join(threshpath, f'tissue_colored_by_removed_cells.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_removed_cells.png')
spin.plot_regions_tissue(adata, [False], 'keep', xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_regions_latent(adata, 'X_umap_spin_sub', [False], 'keep', s=0.5, legend=True, path=latent_path, dpi=dpi)

adata = adata[np.isin(adata.obs[subregion_key], good_regions)]

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

tissue_path = os.path.join(threshpath, f'tissue_colored_by_regions_filtered.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_regions_filtered.png')
spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Plot individual subregions

In [None]:
subplotpath = os.path.join(regionpath, 'subregion_plots/')
if not os.path.exists(subplotpath):
    os.mkdir(subplotpath)

In [None]:
subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells

for i in range(len(subregions)):
    subregion = subregions[i]
    tissue_path = os.path.join(subplotpath, f'tissue_colored_by_subregion{subregion}.png')
    latent_path = os.path.join(subplotpath, f'latent_colored_by_subregion{subregion}.png')
    spin.plot_regions_tissue(adata, [subregion], subregion_key, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
    spin.plot_regions_latent(adata, 'X_umap_spin_sub', [subregion], subregion_key, s=0.5, legend=True, path=latent_path, dpi=dpi)

# 8: Ob 2

## Num nbrs

In [None]:
topregion = '8'
regionpath = os.path.join(basepath, f'{topregion}/')
if not os.path.exists(regionpath):
    os.mkdir(regionpath)

In [None]:
num_nbrss = [f'_{i*10}nbrs' for i in range(2,7)]

nbrpath = os.path.join(regionpath, 'nbr_selection/')
if not os.path.exists(nbrpath):
    os.mkdir(nbrpath)

for num_nbrs in num_nbrss:

    adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

    tissue_path = os.path.join(nbrpath, f'tissue_colored_by_region{num_nbrs}.png')
    latent_path = os.path.join(nbrpath, f'latent_colored_by_region{num_nbrs}.png')

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                     colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Resolution

In [None]:
respath = os.path.join(regionpath, 'res_selection/')
if not os.path.exists(respath):
    os.mkdir(respath)

In [None]:
num_nbrs = '_20nbrs'

In [None]:
res = 0.2

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
adata.write(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/final/topregion{topregion}_subclustered.h5ad')

## Gene markers

In [None]:
markerpath = os.path.join(regionpath, 'top_10_gene_markers_per_subcluster.csv')

In [None]:
sc.tl.rank_genes_groups(adata, groupby=subregion_key, use_raw=False)

subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
num_markers = 10
markers = np.zeros((len(subregions),num_markers), dtype=object)

for i in range(len(subregions)):
    top_markers = sc.get.rank_genes_groups_df(adata, group=[subregions[i]])['names'][:num_markers].values
    markers[i] = top_markers

marker_df = pd.DataFrame(data=markers, columns=[f'rank{i+1}' for i in range(num_markers)])
marker_df.index.name = 'subregion'
marker_df.to_csv(markerpath)

In [None]:
gene = 'KCTD12'
spin.plot_tissue(adata, colors=adata[:,gene].X.flatten(), cmap='viridis', xkey=xkey, ykey=ykey, s=0.5, figscale=20)

In [None]:
gene = 'VTN'
spin.plot_tissue(adata, colors=adata[:,gene].X.flatten(), cmap='viridis', xkey=xkey, ykey=ykey, s=0.5, figscale=20)

## Filter thresh

In [None]:
threshpath = os.path.join(regionpath, 'thresh_selection/')
if not os.path.exists(threshpath):
    os.mkdir(threshpath)

In [None]:
print(f'Cluster size = {len(adata)}')
print(adata.obs[subregion_key].value_counts())

thresh = 1000
counts = adata.obs[subregion_key].value_counts()
good_regions = (counts > thresh)
good_regions = good_regions.index[np.where(good_regions)]
adata.obs['keep'] = np.isin(adata.obs[subregion_key], good_regions)

tissue_path = os.path.join(threshpath, f'tissue_colored_by_removed_cells.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_removed_cells.png')
spin.plot_regions_tissue(adata, [False], 'keep', xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_regions_latent(adata, 'X_umap_spin_sub', [False], 'keep', s=0.5, legend=True, path=latent_path, dpi=dpi)

adata = adata[np.isin(adata.obs[subregion_key], good_regions)]

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

tissue_path = os.path.join(threshpath, f'tissue_colored_by_regions_filtered.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_regions_filtered.png')
spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Plot individual subregions

In [None]:
subplotpath = os.path.join(regionpath, 'subregion_plots/')
if not os.path.exists(subplotpath):
    os.mkdir(subplotpath)

In [None]:
subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells

for i in range(len(subregions)):
    subregion = subregions[i]
    tissue_path = os.path.join(subplotpath, f'tissue_colored_by_subregion{subregion}.png')
    latent_path = os.path.join(subplotpath, f'latent_colored_by_subregion{subregion}.png')
    spin.plot_regions_tissue(adata, [subregion], subregion_key, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
    spin.plot_regions_latent(adata, 'X_umap_spin_sub', [subregion], subregion_key, s=0.5, legend=True, path=latent_path, dpi=dpi)

# 9: Thalamus

## Num nbrs

In [None]:
topregion = '9'
regionpath = os.path.join(basepath, f'{topregion}/')
if not os.path.exists(regionpath):
    os.mkdir(regionpath)

In [None]:
num_nbrss = [f'_{i*10}nbrs' for i in range(2,7)]

nbrpath = os.path.join(regionpath, 'nbr_selection/')
if not os.path.exists(nbrpath):
    os.mkdir(nbrpath)

for num_nbrs in num_nbrss:

    adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

    tissue_path = os.path.join(nbrpath, f'tissue_colored_by_region{num_nbrs}.png')
    latent_path = os.path.join(nbrpath, f'latent_colored_by_region{num_nbrs}.png')

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                     colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Resolution

In [None]:
respath = os.path.join(regionpath, 'res_selection/')
if not os.path.exists(respath):
    os.mkdir(respath)

In [None]:
num_nbrs = '_60nbrs'

In [None]:
res = 0.5

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
adata.write(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/final/topregion{topregion}_subclustered.h5ad')

## Gene markers

In [None]:
markerpath = os.path.join(regionpath, 'top_10_gene_markers_per_subcluster.csv')

In [None]:
sc.tl.rank_genes_groups(adata, groupby=subregion_key, use_raw=False)

subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
num_markers = 10
markers = np.zeros((len(subregions),num_markers), dtype=object)

for i in range(len(subregions)):
    top_markers = sc.get.rank_genes_groups_df(adata, group=[subregions[i]])['names'][:num_markers].values
    markers[i] = top_markers

marker_df = pd.DataFrame(data=markers, columns=[f'rank{i+1}' for i in range(num_markers)])
marker_df.index.name = 'subregion'
marker_df.to_csv(markerpath)

In [None]:
gene = 'RGS16'
spin.plot_tissue(adata, colors=adata[:,gene].X.flatten(), cmap='viridis', xkey=xkey, ykey=ykey, s=0.5, figscale=20)

## Filter thresh

In [None]:
threshpath = os.path.join(regionpath, 'thresh_selection/')
if not os.path.exists(threshpath):
    os.mkdir(threshpath)

In [None]:
print(f'Cluster size = {len(adata)}')
print(adata.obs[subregion_key].value_counts())

thresh = 1000
counts = adata.obs[subregion_key].value_counts()
good_regions = (counts > thresh)
good_regions = good_regions.index[np.where(good_regions)]
adata.obs['keep'] = np.isin(adata.obs[subregion_key], good_regions)

tissue_path = os.path.join(threshpath, f'tissue_colored_by_removed_cells.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_removed_cells.png')
spin.plot_regions_tissue(adata, [False], 'keep', xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_regions_latent(adata, 'X_umap_spin_sub', [False], 'keep', s=0.5, legend=True, path=latent_path, dpi=dpi)

adata = adata[np.isin(adata.obs[subregion_key], good_regions)]

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

tissue_path = os.path.join(threshpath, f'tissue_colored_by_regions_filtered.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_regions_filtered.png')
spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Plot individual subregions

In [None]:
subplotpath = os.path.join(regionpath, 'subregion_plots/')
if not os.path.exists(subplotpath):
    os.mkdir(subplotpath)

In [None]:
subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells

for i in range(len(subregions)):
    subregion = subregions[i]
    tissue_path = os.path.join(subplotpath, f'tissue_colored_by_subregion{subregion}.png')
    latent_path = os.path.join(subplotpath, f'latent_colored_by_subregion{subregion}.png')
    spin.plot_regions_tissue(adata, [subregion], subregion_key, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
    spin.plot_regions_latent(adata, 'X_umap_spin_sub', [subregion], subregion_key, s=0.5, legend=True, path=latent_path, dpi=dpi)

# 10: Cb 2

## Num nbrs

In [None]:
topregion = '10'
regionpath = os.path.join(basepath, f'{topregion}/')
if not os.path.exists(regionpath):
    os.mkdir(regionpath)

In [None]:
num_nbrss = [f'_{i*10}nbrs' for i in range(2,7)]

nbrpath = os.path.join(regionpath, 'nbr_selection/')
if not os.path.exists(nbrpath):
    os.mkdir(nbrpath)

for num_nbrs in num_nbrss:

    adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

    tissue_path = os.path.join(nbrpath, f'tissue_colored_by_region{num_nbrs}.png')
    latent_path = os.path.join(nbrpath, f'latent_colored_by_region{num_nbrs}.png')

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                     colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Resolution

In [None]:
respath = os.path.join(regionpath, 'res_selection/')
if not os.path.exists(respath):
    os.mkdir(respath)

In [None]:
num_nbrs = '_20nbrs'

In [None]:
res = 0.25

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
adata.write(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/final/topregion{topregion}_subclustered.h5ad')

## Gene markers

In [None]:
markerpath = os.path.join(regionpath, 'top_10_gene_markers_per_subcluster.csv')

In [None]:
sc.tl.rank_genes_groups(adata, groupby=subregion_key, use_raw=False)

subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
num_markers = 10
markers = np.zeros((len(subregions),num_markers), dtype=object)

for i in range(len(subregions)):
    top_markers = sc.get.rank_genes_groups_df(adata, group=[subregions[i]])['names'][:num_markers].values
    markers[i] = top_markers

marker_df = pd.DataFrame(data=markers, columns=[f'rank{i+1}' for i in range(num_markers)])
marker_df.index.name = 'subregion'
marker_df.to_csv(markerpath)

In [None]:
gene = 'NRGN'
spin.plot_tissue(adata, colors=adata[:,gene].X.flatten(), cmap='viridis', xkey=xkey, ykey=ykey, s=0.5, figscale=20)

## Filter thresh

In [None]:
threshpath = os.path.join(regionpath, 'thresh_selection/')
if not os.path.exists(threshpath):
    os.mkdir(threshpath)

In [None]:
print(f'Cluster size = {len(adata)}')
print(adata.obs[subregion_key].value_counts())

thresh = 1000
counts = adata.obs[subregion_key].value_counts()
good_regions = (counts > thresh)
good_regions = good_regions.index[np.where(good_regions)]
adata.obs['keep'] = np.isin(adata.obs[subregion_key], good_regions)

tissue_path = os.path.join(threshpath, f'tissue_colored_by_removed_cells.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_removed_cells.png')
spin.plot_regions_tissue(adata, [False], 'keep', xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_regions_latent(adata, 'X_umap_spin_sub', [False], 'keep', s=0.5, legend=True, path=latent_path, dpi=dpi)

adata = adata[np.isin(adata.obs[subregion_key], good_regions)]

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

tissue_path = os.path.join(threshpath, f'tissue_colored_by_regions_filtered.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_regions_filtered.png')
spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Plot individual subregions

In [None]:
subplotpath = os.path.join(regionpath, 'subregion_plots/')
if not os.path.exists(subplotpath):
    os.mkdir(subplotpath)

In [None]:
subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells

for i in range(len(subregions)):
    subregion = subregions[i]
    tissue_path = os.path.join(subplotpath, f'tissue_colored_by_subregion{subregion}.png')
    latent_path = os.path.join(subplotpath, f'latent_colored_by_subregion{subregion}.png')
    spin.plot_regions_tissue(adata, [subregion], subregion_key, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
    spin.plot_regions_latent(adata, 'X_umap_spin_sub', [subregion], subregion_key, s=0.5, legend=True, path=latent_path, dpi=dpi)

# 11: choroid

## Num nbrs

In [None]:
topregion = '11'
regionpath = os.path.join(basepath, f'{topregion}/')
if not os.path.exists(regionpath):
    os.mkdir(regionpath)

In [None]:
num_nbrss = [f'_{i*10}nbrs' for i in range(2,7)]

nbrpath = os.path.join(regionpath, 'nbr_selection/')
if not os.path.exists(nbrpath):
    os.mkdir(nbrpath)

for num_nbrs in num_nbrss:

    adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

    tissue_path = os.path.join(nbrpath, f'tissue_colored_by_region{num_nbrs}.png')
    latent_path = os.path.join(nbrpath, f'latent_colored_by_region{num_nbrs}.png')

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                     colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Resolution

In [None]:
respath = os.path.join(regionpath, 'res_selection/')
if not os.path.exists(respath):
    os.mkdir(respath)

In [None]:
num_nbrs = '_60nbrs'

In [None]:
res = 0.38
# res = 0.4

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
adata.write(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/final/topregion{topregion}_subclustered.h5ad')

## Filter thresh

In [None]:
threshpath = os.path.join(regionpath, 'thresh_selection/')
if not os.path.exists(threshpath):
    os.mkdir(threshpath)

In [None]:
print(f'Cluster size = {len(adata)}')
print(adata.obs[subregion_key].value_counts())

thresh = 500
counts = adata.obs[subregion_key].value_counts()
good_regions = (counts > thresh)
good_regions = good_regions.index[np.where(good_regions)]
adata.obs['keep'] = np.isin(adata.obs[subregion_key], good_regions)

tissue_path = os.path.join(threshpath, f'tissue_colored_by_removed_cells.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_removed_cells.png')
spin.plot_regions_tissue(adata, [False], 'keep', xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_regions_latent(adata, 'X_umap_spin_sub', [False], 'keep', s=0.5, legend=True, path=latent_path, dpi=dpi)

adata = adata[np.isin(adata.obs[subregion_key], good_regions)]

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

tissue_path = os.path.join(threshpath, f'tissue_colored_by_regions_filtered.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_regions_filtered.png')
spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Plot individual subregions

In [None]:
subplotpath = os.path.join(regionpath, 'subregion_plots/')
if not os.path.exists(subplotpath):
    os.mkdir(subplotpath)

In [None]:
subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells

for i in range(len(subregions)):
    subregion = subregions[i]
    tissue_path = os.path.join(subplotpath, f'tissue_colored_by_subregion{subregion}.png')
    latent_path = os.path.join(subplotpath, f'latent_colored_by_subregion{subregion}.png')
    spin.plot_regions_tissue(adata, [subregion], subregion_key, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
    spin.plot_regions_latent(adata, 'X_umap_spin_sub', [subregion], subregion_key, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Gene markers

In [None]:
markerpath = os.path.join(regionpath, 'top_10_gene_markers_per_subcluster.csv')

In [None]:
sc.tl.rank_genes_groups(adata, groupby=subregion_key, use_raw=False)

subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
num_markers = 10
markers = np.zeros((len(subregions),num_markers), dtype=object)

for i in range(len(subregions)):
    top_markers = sc.get.rank_genes_groups_df(adata, group=[subregions[i]])['names'][:num_markers].values
    markers[i] = top_markers

marker_df = pd.DataFrame(data=markers, columns=[f'rank{i+1}' for i in range(num_markers)])
marker_df.index.name = 'subregion'
marker_df.to_csv(markerpath)

In [None]:
gene = 'TMEM212'
spin.plot_tissue(adata, colors=adata[:,gene].X.flatten(), cmap='viridis', xkey=xkey, ykey=ykey, s=0.5, figscale=20)

# 12: DG

## Num nbrs

In [None]:
topregion = '12'
regionpath = os.path.join(basepath, f'{topregion}/')
if not os.path.exists(regionpath):
    os.mkdir(regionpath)

In [None]:
num_nbrss = [f'_{i*10}nbrs' for i in range(2,7)]

nbrpath = os.path.join(regionpath, 'nbr_selection/')
if not os.path.exists(nbrpath):
    os.mkdir(nbrpath)

for num_nbrs in num_nbrss:

    adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

    tissue_path = os.path.join(nbrpath, f'tissue_colored_by_region{num_nbrs}.png')
    latent_path = os.path.join(nbrpath, f'latent_colored_by_region{num_nbrs}.png')

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                     colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Resolution

In [None]:
respath = os.path.join(regionpath, 'res_selection/')
if not os.path.exists(respath):
    os.mkdir(respath)

In [None]:
num_nbrs = '_30nbrs'

In [None]:
res = 0.3
# res = 0.4

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
adata.write(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/final/topregion{topregion}_subclustered.h5ad')

## Filter thresh

In [None]:
threshpath = os.path.join(regionpath, 'thresh_selection/')
if not os.path.exists(threshpath):
    os.mkdir(threshpath)

In [None]:
print(f'Cluster size = {len(adata)}')
print(adata.obs[subregion_key].value_counts())

thresh = 1000
counts = adata.obs[subregion_key].value_counts()
good_regions = (counts > thresh)
good_regions = good_regions.index[np.where(good_regions)]
adata.obs['keep'] = np.isin(adata.obs[subregion_key], good_regions)

tissue_path = os.path.join(threshpath, f'tissue_colored_by_removed_cells.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_removed_cells.png')
spin.plot_regions_tissue(adata, [False], 'keep', xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_regions_latent(adata, 'X_umap_spin_sub', [False], 'keep', s=0.5, legend=True, path=latent_path, dpi=dpi)

adata = adata[np.isin(adata.obs[subregion_key], good_regions)]

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

tissue_path = os.path.join(threshpath, f'tissue_colored_by_regions_filtered.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_regions_filtered.png')
spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Plot individual subregions

In [None]:
subplotpath = os.path.join(regionpath, 'subregion_plots/')
if not os.path.exists(subplotpath):
    os.mkdir(subplotpath)

In [None]:
subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells

for i in range(len(subregions)):
    subregion = subregions[i]
    tissue_path = os.path.join(subplotpath, f'tissue_colored_by_subregion{subregion}.png')
    latent_path = os.path.join(subplotpath, f'latent_colored_by_subregion{subregion}.png')
    spin.plot_regions_tissue(adata, [subregion], subregion_key, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
    spin.plot_regions_latent(adata, 'X_umap_spin_sub', [subregion], subregion_key, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Gene markers

In [None]:
markerpath = os.path.join(regionpath, 'top_10_gene_markers_per_subcluster.csv')

In [None]:
sc.tl.rank_genes_groups(adata, groupby=subregion_key, use_raw=False)

subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
num_markers = 10
markers = np.zeros((len(subregions),num_markers), dtype=object)

for i in range(len(subregions)):
    top_markers = sc.get.rank_genes_groups_df(adata, group=[subregions[i]])['names'][:num_markers].values
    markers[i] = top_markers

marker_df = pd.DataFrame(data=markers, columns=[f'rank{i+1}' for i in range(num_markers)])
marker_df.index.name = 'subregion'
marker_df.to_csv(markerpath)

In [None]:
gene = 'CLU'
spin.plot_tissue(adata, colors=adata[:,gene].X.flatten(), cmap='viridis', xkey=xkey, ykey=ykey, s=0.5, figscale=20)

# 13: CA

## Num nbrs

In [None]:
topregion = '13'
regionpath = os.path.join(basepath, f'{topregion}/')
if not os.path.exists(regionpath):
    os.mkdir(regionpath)

In [None]:
num_nbrss = [f'_{i*10}nbrs' for i in range(2,7)]

nbrpath = os.path.join(regionpath, 'nbr_selection/')
if not os.path.exists(nbrpath):
    os.mkdir(nbrpath)

for num_nbrs in num_nbrss:

    adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

    tissue_path = os.path.join(nbrpath, f'tissue_colored_by_region{num_nbrs}.png')
    latent_path = os.path.join(nbrpath, f'latent_colored_by_region{num_nbrs}.png')

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                     colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Resolution

In [None]:
respath = os.path.join(regionpath, 'res_selection/')
if not os.path.exists(respath):
    os.mkdir(respath)

In [None]:
num_nbrs = '_30nbrs'

In [None]:
res = 0.15

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
adata.write(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/final/topregion{topregion}_subclustered.h5ad')

## Filter thresh

In [None]:
threshpath = os.path.join(regionpath, 'thresh_selection/')
if not os.path.exists(threshpath):
    os.mkdir(threshpath)

In [None]:
print(f'Cluster size = {len(adata)}')
print(adata.obs[subregion_key].value_counts())

thresh = 1000
counts = adata.obs[subregion_key].value_counts()
good_regions = (counts > thresh)
good_regions = good_regions.index[np.where(good_regions)]
adata.obs['keep'] = np.isin(adata.obs[subregion_key], good_regions)

tissue_path = os.path.join(threshpath, f'tissue_colored_by_removed_cells.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_removed_cells.png')
spin.plot_regions_tissue(adata, [False], 'keep', xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_regions_latent(adata, 'X_umap_spin_sub', [False], 'keep', s=0.5, legend=True, path=latent_path, dpi=dpi)

adata = adata[np.isin(adata.obs[subregion_key], good_regions)]

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

tissue_path = os.path.join(threshpath, f'tissue_colored_by_regions_filtered.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_regions_filtered.png')
spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Plot individual subregions

In [None]:
subplotpath = os.path.join(regionpath, 'subregion_plots/')
if not os.path.exists(subplotpath):
    os.mkdir(subplotpath)

In [None]:
subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells

for i in range(len(subregions)):
    subregion = subregions[i]
    tissue_path = os.path.join(subplotpath, f'tissue_colored_by_subregion{subregion}.png')
    latent_path = os.path.join(subplotpath, f'latent_colored_by_subregion{subregion}.png')
    spin.plot_regions_tissue(adata, [subregion], subregion_key, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
    spin.plot_regions_latent(adata, 'X_umap_spin_sub', [subregion], subregion_key, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Gene markers

In [None]:
markerpath = os.path.join(regionpath, 'top_10_gene_markers_per_subcluster.csv')

In [None]:
sc.tl.rank_genes_groups(adata, groupby=subregion_key, use_raw=False)

subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
num_markers = 10
markers = np.zeros((len(subregions),num_markers), dtype=object)

for i in range(len(subregions)):
    top_markers = sc.get.rank_genes_groups_df(adata, group=[subregions[i]])['names'][:num_markers].values
    markers[i] = top_markers

marker_df = pd.DataFrame(data=markers, columns=[f'rank{i+1}' for i in range(num_markers)])
marker_df.index.name = 'subregion'
marker_df.to_csv(markerpath)

In [None]:
gene = 'VAMP1'
spin.plot_tissue(adata, colors=adata[:,gene].X.flatten(), cmap='viridis', xkey=xkey, ykey=ykey, s=0.5, figscale=20)

# 14: Midbrain 2

## Num nbrs

In [None]:
topregion = '14'
regionpath = os.path.join(basepath, f'{topregion}/')
if not os.path.exists(regionpath):
    os.mkdir(regionpath)

In [None]:
num_nbrss = [f'_{i*10}nbrs' for i in range(2,7)]
num_nbrss.remove('_30nbrs')

nbrpath = os.path.join(regionpath, 'nbr_selection/')
if not os.path.exists(nbrpath):
    os.mkdir(nbrpath)

for num_nbrs in num_nbrss:

    adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

    tissue_path = os.path.join(nbrpath, f'tissue_colored_by_region{num_nbrs}.png')
    latent_path = os.path.join(nbrpath, f'latent_colored_by_region{num_nbrs}.png')

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                     colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Resolution

In [None]:
respath = os.path.join(regionpath, 'res_selection/')
if not os.path.exists(respath):
    os.mkdir(respath)

In [None]:
num_nbrs = '_50nbrs'

In [None]:
res = 0.03

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
adata.write(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/final/topregion{topregion}_subclustered.h5ad')

## Filter thresh

In [None]:
threshpath = os.path.join(regionpath, 'thresh_selection/')
if not os.path.exists(threshpath):
    os.mkdir(threshpath)

In [None]:
print(f'Cluster size = {len(adata)}')
print(adata.obs[subregion_key].value_counts())

thresh = 500
counts = adata.obs[subregion_key].value_counts()
good_regions = (counts > thresh)
good_regions = good_regions.index[np.where(good_regions)]
adata.obs['keep'] = np.isin(adata.obs[subregion_key], good_regions)

tissue_path = os.path.join(threshpath, f'tissue_colored_by_removed_cells.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_removed_cells.png')
spin.plot_regions_tissue(adata, [False], 'keep', xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_regions_latent(adata, 'X_umap_spin_sub', [False], 'keep', s=0.5, legend=True, path=latent_path, dpi=dpi)

adata = adata[np.isin(adata.obs[subregion_key], good_regions)]

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

tissue_path = os.path.join(threshpath, f'tissue_colored_by_regions_filtered.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_regions_filtered.png')
spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Plot individual subregions

In [None]:
subplotpath = os.path.join(regionpath, 'subregion_plots/')
if not os.path.exists(subplotpath):
    os.mkdir(subplotpath)

In [None]:
subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells

for i in range(len(subregions)):
    subregion = subregions[i]
    tissue_path = os.path.join(subplotpath, f'tissue_colored_by_subregion{subregion}.png')
    latent_path = os.path.join(subplotpath, f'latent_colored_by_subregion{subregion}.png')
    spin.plot_regions_tissue(adata, [subregion], subregion_key, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
    spin.plot_regions_latent(adata, 'X_umap_spin_sub', [subregion], subregion_key, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Gene markers

In [None]:
markerpath = os.path.join(regionpath, 'top_10_gene_markers_per_subcluster.csv')

In [None]:
sc.tl.rank_genes_groups(adata, groupby=subregion_key, use_raw=False)

subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
num_markers = 10
markers = np.zeros((len(subregions),num_markers), dtype=object)

for i in range(len(subregions)):
    top_markers = sc.get.rank_genes_groups_df(adata, group=[subregions[i]])['names'][:num_markers].values
    markers[i] = top_markers

marker_df = pd.DataFrame(data=markers, columns=[f'rank{i+1}' for i in range(num_markers)])
marker_df.index.name = 'subregion'
marker_df.to_csv(markerpath)

In [None]:
gene = 'VAMP1'
spin.plot_tissue(adata, colors=adata[:,gene].X.flatten(), cmap='viridis', xkey=xkey, ykey=ykey, s=0.5, figscale=20)

# 15: spinal cord

## Num nbrs

In [None]:
topregion = '15'
regionpath = os.path.join(basepath, f'{topregion}/')
if not os.path.exists(regionpath):
    os.mkdir(regionpath)

In [None]:
num_nbrss = [f'_{i*10}nbrs' for i in range(2,7)]
num_nbrss.remove('_30nbrs')

nbrpath = os.path.join(regionpath, 'nbr_selection/')
if not os.path.exists(nbrpath):
    os.mkdir(nbrpath)

for num_nbrs in num_nbrss:

    adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

    tissue_path = os.path.join(nbrpath, f'tissue_colored_by_region{num_nbrs}.png')
    latent_path = os.path.join(nbrpath, f'latent_colored_by_region{num_nbrs}.png')

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                     colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Resolution

In [None]:
respath = os.path.join(regionpath, 'res_selection/')
if not os.path.exists(respath):
    os.mkdir(respath)

In [None]:
num_nbrs = '_20nbrs'

In [None]:
res = 0.3

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

In [None]:
adata.write(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/final/topregion{topregion}_subclustered.h5ad')

## Filter thresh

In [None]:
threshpath = os.path.join(regionpath, 'thresh_selection/')
if not os.path.exists(threshpath):
    os.mkdir(threshpath)

In [None]:
print(f'Cluster size = {len(adata)}')
print(adata.obs[subregion_key].value_counts())

thresh = 500
counts = adata.obs[subregion_key].value_counts()
good_regions = (counts > thresh)
good_regions = good_regions.index[np.where(good_regions)]
adata.obs['keep'] = np.isin(adata.obs[subregion_key], good_regions)

tissue_path = os.path.join(threshpath, f'tissue_colored_by_removed_cells.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_removed_cells.png')
spin.plot_regions_tissue(adata, [False], 'keep', xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_regions_latent(adata, 'X_umap_spin_sub', [False], 'keep', s=0.5, legend=True, path=latent_path, dpi=dpi)

adata = adata[np.isin(adata.obs[subregion_key], good_regions)]

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

tissue_path = os.path.join(threshpath, f'tissue_colored_by_regions_filtered.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_regions_filtered.png')
spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Plot individual subregions

In [None]:
subplotpath = os.path.join(regionpath, 'subregion_plots/')
if not os.path.exists(subplotpath):
    os.mkdir(subplotpath)

In [None]:
subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells

for i in range(len(subregions)):
    subregion = subregions[i]
    tissue_path = os.path.join(subplotpath, f'tissue_colored_by_subregion{subregion}.png')
    latent_path = os.path.join(subplotpath, f'latent_colored_by_subregion{subregion}.png')
    spin.plot_regions_tissue(adata, [subregion], subregion_key, xkey=xkey, ykey=ykey, s=0.5, figscale=20, path=tissue_path, dpi=dpi)
    spin.plot_regions_latent(adata, 'X_umap_spin_sub', [subregion], subregion_key, s=0.5, legend=True, path=latent_path, dpi=dpi)

## Gene markers

In [None]:
markerpath = os.path.join(regionpath, 'top_10_gene_markers_per_subcluster.csv')

In [None]:
sc.tl.rank_genes_groups(adata, groupby=subregion_key, use_raw=False)

subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
num_markers = 10
markers = np.zeros((len(subregions),num_markers), dtype=object)

for i in range(len(subregions)):
    top_markers = sc.get.rank_genes_groups_df(adata, group=[subregions[i]])['names'][:num_markers].values
    markers[i] = top_markers

marker_df = pd.DataFrame(data=markers, columns=[f'rank{i+1}' for i in range(num_markers)])
marker_df.index.name = 'subregion'
marker_df.to_csv(markerpath)

In [None]:
gene = 'SYT2'
spin.plot_tissue(adata, colors=adata[:,gene].X.flatten(), cmap='viridis', xkey=xkey, ykey=ykey, s=0.5, figscale=20)

# 16: Cortex 3

## Num nbrs

In [None]:
topregion = '16'
regionpath = os.path.join(basepath, f'{topregion}/')
if not os.path.exists(regionpath):
    os.mkdir(regionpath)

In [None]:
num_nbrss = [f'_{i*10}nbrs' for i in range(2,7)]

nbrpath = os.path.join(regionpath, 'nbr_selection/')
if not os.path.exists(nbrpath):
    os.mkdir(nbrpath)

for num_nbrs in num_nbrss:

    adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

    tissue_path = os.path.join(nbrpath, f'tissue_colored_by_region{num_nbrs}.png')
    latent_path = os.path.join(nbrpath, f'latent_colored_by_region{num_nbrs}.png')

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=5, figscale=20, path=tissue_path, dpi=dpi)

    regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
    palette = sc.pl.palettes.default_102*2
    cmap = {regions[i]:palette[i] for i in range(len(regions))}

    spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                     colors=adata.obs[subregion_key], cmap=cmap, s=5, legend=True, path=latent_path, dpi=dpi)

## Resolution

In [None]:
respath = os.path.join(regionpath, 'res_selection/')
if not os.path.exists(respath):
    os.mkdir(respath)

In [None]:
num_nbrs = '_30nbrs'

In [None]:
res = 0.08

adata = sc.read_h5ad(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/nbr_titration/atlas_spin_concat_nocombat_subclustered_topregion{topregion}{num_nbrs}.h5ad')

tissue_path = os.path.join(respath, f"tissue_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")
latent_path = os.path.join(respath, f"latent_colored_by_region{num_nbrs}_res{str(res).replace('.', '')}.png")

sc.tl.leiden(adata, resolution=res, key_added=subregion_key, neighbors_key=subregion_key)

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=5, legend=True, path=latent_path, dpi=dpi)

In [None]:
adata.write(f'/stanley/WangLab/kamal/data/integrated/spatial/atlas/final/topregion{topregion}_subclustered.h5ad')

## Filter thresh

In [None]:
threshpath = os.path.join(regionpath, 'thresh_selection/')
if not os.path.exists(threshpath):
    os.mkdir(threshpath)

In [None]:
print(f'Cluster size = {len(adata)}')
print(adata.obs[subregion_key].value_counts())

thresh = 500
counts = adata.obs[subregion_key].value_counts()
good_regions = (counts > thresh)
good_regions = good_regions.index[np.where(good_regions)]
adata.obs['keep'] = np.isin(adata.obs[subregion_key], good_regions)

tissue_path = os.path.join(threshpath, f'tissue_colored_by_removed_cells.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_removed_cells.png')
spin.plot_regions_tissue(adata, [False], 'keep', xkey=xkey, ykey=ykey, s=5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_regions_latent(adata, 'X_umap_spin_sub', [False], 'keep', s=5, legend=True, path=latent_path, dpi=dpi)

adata = adata[np.isin(adata.obs[subregion_key], good_regions)]

regions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
palette = sc.pl.palettes.default_102
cmap = {regions[i]:palette[i] for i in range(len(regions))}

tissue_path = os.path.join(threshpath, f'tissue_colored_by_regions_filtered.png')
latent_path = os.path.join(threshpath, f'latent_colored_by_regions_filtered.png')
spin.plot_tissue(adata, colors=adata.obs[subregion_key], cmap=cmap, xkey=xkey, ykey=ykey, s=5, figscale=20, path=tissue_path, dpi=dpi)
spin.plot_latent(adata, adata.obsm['X_umap_spin_sub'][:,0], adata.obsm['X_umap_spin_sub'][:,1],
                    colors=adata.obs[subregion_key], cmap=cmap, s=5, legend=True, path=latent_path, dpi=dpi)

## Plot individual subregions

In [None]:
subplotpath = os.path.join(regionpath, 'subregion_plots/')
if not os.path.exists(subplotpath):
    os.mkdir(subplotpath)

In [None]:
subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells

for i in range(len(subregions)):
    subregion = subregions[i]
    tissue_path = os.path.join(subplotpath, f'tissue_colored_by_subregion{subregion}.png')
    latent_path = os.path.join(subplotpath, f'latent_colored_by_subregion{subregion}.png')
    spin.plot_regions_tissue(adata, [subregion], subregion_key, xkey=xkey, ykey=ykey, s=5, figscale=20, path=tissue_path, dpi=dpi)
    spin.plot_regions_latent(adata, 'X_umap_spin_sub', [subregion], subregion_key, s=5, legend=True, path=latent_path, dpi=dpi)

## Gene markers

In [None]:
markerpath = os.path.join(regionpath, 'top_10_gene_markers_per_subcluster.csv')

In [None]:
sc.tl.rank_genes_groups(adata, groupby=subregion_key, use_raw=False)

subregions = adata.obs[subregion_key].value_counts().index # sorted by number of cells
num_markers = 10
markers = np.zeros((len(subregions),num_markers), dtype=object)

for i in range(len(subregions)):
    top_markers = sc.get.rank_genes_groups_df(adata, group=[subregions[i]])['names'][:num_markers].values
    markers[i] = top_markers

marker_df = pd.DataFrame(data=markers, columns=[f'rank{i+1}' for i in range(num_markers)])
marker_df.index.name = 'subregion'
marker_df.to_csv(markerpath)

In [None]:
gene = 'PLCH1'
spin.plot_tissue(adata, colors=adata[:,gene].X.flatten(), cmap='viridis', xkey=xkey, ykey=ykey, s=5, figscale=20)