In [74]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import seaborn as sns
import anndata

from dredFISH.Analysis import basicu
from dredFISH.Analysis import powerplots

# prep analysis related metadata

In [81]:
# read allen cell type structures

# allen data
scrna_path = '/bigstore/GeneralStorage/fangming/projects/dredfish/data/rna/scrna_ss_ctxhippo_a_exon_DPNMF_matrix.h5ad'

# allen scrna matrix
scrna = anndata.read(scrna_path, backed='r')
scrna

AnnData object with n_obs × n_vars = 73347 × 24 backed at '/bigstore/GeneralStorage/fangming/projects/dredfish/data/rna/scrna_ss_ctxhippo_a_exon_DPNMF_matrix.h5ad'
    obs: 'donor_sex_id', 'donor_sex_label', 'donor_sex_color', 'region_id', 'region_label', 'region_color', 'platform_label', 'cluster_order', 'cluster_label', 'cluster_color', 'subclass_order', 'subclass_label', 'subclass_color', 'neighborhood_id', 'neighborhood_label', 'neighborhood_color', 'class_order', 'class_label', 'class_color', 'exp_component_name', 'external_donor_name_label', 'full_genotype_label', 'facs_population_plan_label', 'injection_roi_label', 'injection_materials_label', 'injection_method_label', 'injection_type_label', 'full_genotype_id', 'full_genotype_color', 'external_donor_name_id', 'external_donor_name_color', 'facs_population_plan_id', 'facs_population_plan_color', 'injection_materials_id', 'injection_materials_color', 'injection_method_id', 'injection_method_color', 'injection_roi_id', 'injection_r

In [82]:
celltype_levels = [
    'class_label', 
    'neighborhood_label', 
    'subclass_label', 
    # 'supertype_label', 
    'cluster_label',
]
(scrna.obs['class_label'].unique().shape, 
 scrna.obs['neighborhood_label'].unique().shape, 
 scrna.obs['subclass_label'].unique().shape, 
 scrna.obs['cluster_label'].unique().shape
)

((3,), (8,), (42,), (382,))

In [93]:
celltype_sizes = scrna.obs.groupby(celltype_levels).size()
celltype_sizes = celltype_sizes[celltype_sizes!=0]
celltype_sizes

class_label   neighborhood_label  subclass_label  cluster_label
GABAergic     CGE                 Lamp5           5_Lamp5 Lhx6     160
                                                  6_Lamp5 Lhx6     236
                                                  7_Lamp5 Lhx6     393
                                                  8_Lamp5 Lhx6     157
                                                  9_Lamp5 Lhx6      23
                                                                  ... 
Non-Neuronal  Other               SMC-Peri        381_SMC-Peri       5
                                                  382_SMC-Peri      59
                                  VLMC            383_VLMC          50
                                                  384_VLMC          64
                                                  385_VLMC           6
Length: 382, dtype: int64

In [108]:
# neighbors = scrna.obs.groupby(['neighborhood_label', 
#                               'subclass_label',
#                              ]).size()
# neighbors = neighbors[neigbors!=0]
# neighbors.reset_index(level=1)['subclass_label']

neighbor_manual_order = np.array([
    'CGE',
    'MGE',
    'L2/3 IT',
    'L4/5/6 IT Car3',
    'NP/CT/L6b',
    'PT',
    'DG/SUB/CA',
    'Other',
])

neighbors = {}
for neighbor, dfsub in scrna.obs.groupby('neighborhood_label'):
    neighbors[neighbor] = dfsub['subclass_label'].unique().tolist()

pd.Series(neighbors).loc[neighbor_manual_order]

CGE                                              [Vip, Lamp5, Sncg]
MGE                                         [Pvalb, Sst, Sst Chodl]
L2/3 IT           [L2/3 IT CTX, L2/3 IT PPP, L2/3 IT RHP, L2/3 I...
L4/5/6 IT Car3    [L5 IT CTX, L4/5 IT CTX, L6 IT CTX, Car3, L5/6...
NP/CT/L6b         [L6 CT CTX, L5/6 NP CTX, L6b CTX, L6b/CT ENT, ...
PT                                  [L5 PT CTX, L4 RSP-ACA, L5 PPP]
DG/SUB/CA                  [DG, CA1-ProS, SUB-ProS, CA3, CA2-IG-FC]
Other             [Oligo, Astro, Endo, SMC-Peri, Micro-PVM, CR, ...
dtype: object

In [136]:
subclass_manual_order = np.array([
 'Pvalb',
 'Sst',
 'Sst Chodl',
    
 'Vip',
 'Sncg',
 'Lamp5',
    
 'Meis2',
    
 'L2/3 IT CTX',
 'L4/5 IT CTX',
 'L5 IT CTX',
 'L6 IT CTX',
 'Car3',
    
 'L2 IT ENTm',
 'L2 IT ENTl',
 'L2/3 IT ENTl',
 'L2/3 IT PPP',
 'L2/3 IT RHP',
 'L3 IT ENT',
 'L5/6 IT TPE-ENT',
 'L6 IT ENTl',
    
 'L6 CT CTX',
 'CT SUB',
 'L6b CTX',
 'L6b/CT ENT',
 'L5/6 NP CTX',
 'NP PPP',
 'NP SUB',
    
 'L4 RSP-ACA',
 'L5 PPP',
 'L5 PT CTX',
    
 'SUB-ProS',
 'CA1-ProS',
 'CA2-IG-FC',
 'CA3',
 'DG',
    
 'CR',
    
 'Astro',
 'Endo',
 'Micro-PVM',
 'SMC-Peri',
 'VLMC',
 'Oligo',
])

col_manual_order = np.array([
    4, # Pvalb
    1, 2, # inhibitory
    
    5, 17, 
    12, 22, 
    23, 
    14, 20,
    
    16, 
    
    
    10, 13, 9,
    
    7, # Pan L6: NP, CT, L6b, Sub
    3, # L6b/CT
    15, # NP
    8, # PT
    0, 
    11, 19, # pan hippo 
    18, 
    21, # non-neuronal
    6, 
])

splitat = pd.Series(np.arange(len(subclass_manual_order)), index=subclass_manual_order).loc[[
    'Meis2', 
    'Car3',
    'L6 IT ENTl',
    'NP SUB',
    'L5 PT CTX',
    'DG',
    'CR',
]]

splitat_v = pd.Series(np.arange(len(col_manual_order)), index=col_manual_order).loc[[
    2, 9,
]]

In [137]:
subclass_palette_new = {clst: color 
                        for clst, color in 
                        zip(subclass_manual_order, np.array(sns.color_palette("husl", len(subclass_manual_order))))
                        }
sns.color_palette("husl", len(subclass_manual_order))

In [138]:
a = dict()
a['l2_clsts'] = list(neighbor_manual_order)
a['l2_to_l3'] = {i: list(j) for i, j in neighbors.items()}

a['l3_clsts'] = list(subclass_manual_order)
a['l3_bits'] = [int(i) for i in list(col_manual_order)]
a['l3_palette'] = {clst: powerplots.rgb_to_hex(*(color*256).astype(int)) 
                       for clst, color in subclass_palette_new.items()}
a['l3_hlines'] = splitat.tolist() 
a['l3_vlines'] = splitat_v.tolist() 
                       
output = '/bigstore/GeneralStorage/fangming/projects/dredfish/data_dump/analysis_meta_Mar31.json'
with open(output, 'w') as fh:
    json.dump(a, fh)

In [139]:
with open(output, 'r') as fh:
    b = json.load(fh)
b


{'l2_clsts': ['CGE',
  'MGE',
  'L2/3 IT',
  'L4/5/6 IT Car3',
  'NP/CT/L6b',
  'PT',
  'DG/SUB/CA',
  'Other'],
 'l2_to_l3': {'CGE': ['Vip', 'Lamp5', 'Sncg'],
  'DG/SUB/CA': ['DG', 'CA1-ProS', 'SUB-ProS', 'CA3', 'CA2-IG-FC'],
  'L2/3 IT': ['L2/3 IT CTX',
   'L2/3 IT PPP',
   'L2/3 IT RHP',
   'L2/3 IT ENTl',
   'L2 IT ENTl',
   'L3 IT ENT',
   'L2 IT ENTm'],
  'L4/5/6 IT Car3': ['L5 IT CTX',
   'L4/5 IT CTX',
   'L6 IT CTX',
   'Car3',
   'L5/6 IT TPE-ENT',
   'L6 IT ENTl'],
  'MGE': ['Pvalb', 'Sst', 'Sst Chodl'],
  'NP/CT/L6b': ['L6 CT CTX',
   'L5/6 NP CTX',
   'L6b CTX',
   'L6b/CT ENT',
   'CT SUB',
   'NP PPP',
   'NP SUB'],
  'Other': ['Oligo',
   'Astro',
   'Endo',
   'SMC-Peri',
   'Micro-PVM',
   'CR',
   'Meis2',
   'VLMC'],
  'PT': ['L5 PT CTX', 'L4 RSP-ACA', 'L5 PPP']},
 'l3_clsts': ['Pvalb',
  'Sst',
  'Sst Chodl',
  'Vip',
  'Sncg',
  'Lamp5',
  'Meis2',
  'L2/3 IT CTX',
  'L4/5 IT CTX',
  'L5 IT CTX',
  'L6 IT CTX',
  'Car3',
  'L2 IT ENTm',
  'L2 IT ENTl',
  'L2/3 IT 

In [112]:
type(b['l3_bits'][0])

int

In [71]:
b

{'l3_clsts': ['Lamp5',
  'Sncg',
  'Vip',
  'Pvalb',
  'Sst',
  'Sst Chodl',
  'Meis2',
  'L2/3 IT CTX',
  'L4/5 IT CTX',
  'L5 IT CTX',
  'L6 IT CTX',
  'L2 IT ENTm',
  'L2 IT ENTl',
  'L2/3 IT ENTl',
  'L3 IT ENT',
  'L5/6 IT TPE-ENT',
  'L6 IT ENTl',
  'L2/3 IT PPP',
  'L2/3 IT RHP',
  'Car3',
  'L6 CT CTX',
  'CT SUB',
  'L6b CTX',
  'L6b/CT ENT',
  'L5/6 NP CTX',
  'NP PPP',
  'NP SUB',
  'L4 RSP-ACA',
  'L5 PPP',
  'L5 PT CTX',
  'SUB-ProS',
  'CA1-ProS',
  'CA2-IG-FC',
  'CA3',
  'DG',
  'CR',
  'Astro',
  'Endo',
  'Micro-PVM',
  'Oligo',
  'SMC-Peri',
  'VLMC'],
 'l3_bits': [1,
  2,
  4,
  5,
  17,
  12,
  22,
  23,
  14,
  20,
  0,
  16,
  10,
  13,
  9,
  7,
  3,
  15,
  8,
  19,
  11,
  6,
  21,
  18],
 'l3_palette': {'Lamp5': '#F77089',
  'Sncg': '#F87373',
  'Vip': '#F87553',
  'Pvalb': '#F07D32',
  'Sst': '#DF8732',
  'Sst Chodl': '#D28E31',
  'Meis2': '#C69431',
  'L2/3 IT CTX': '#BC9831',
  'L4/5 IT CTX': '#B29C31',
  'L5 IT CTX': '#A89F31',
  'L6 IT CTX': '#9DA231',
 