In [1]:
import sys
sys.path.append('../src/')

import config
import logging
import scanpy as sc
from refcm import RefCM

config.start_logging(logging.DEBUG)

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
ds = ['MTG', 'ALM', 'VISp']
ds = {s: sc.read_h5ad(f'../data/{s}.h5ad') for s in ds}

[h5py._conv      ] [DEBUG   ] : Creating converter from 3 to 5


In [4]:
from embeddings import PCAEmbedder, HVGEmbedder

hvg_embedder = HVGEmbedder(100)
pca_embedder = PCAEmbedder(100, log_norm=False)

In [10]:
q, ref = 'MTG', 'ALM'

rcm = RefCM(cache_load=False, cache_save=False, embedder=hvg_embedder, discovery_threshold=None)
m = rcm.annotate(ds[q], q, ds[ref], ref, 'labels34', 'labels34')
m.eval('labels34')
m.display_matching_costs('labels34')

[refcm           ] [INFO    ] : NOTE: raw counts expected in anndata .X attributes.
[refcm           ] [DEBUG   ] : Loading cached mapping costs from cache.json.
[embeddings      ] [DEBUG   ] : Using 184 genes.
[refcm           ] [DEBUG   ] : Computing Wasserstein distances.
|████████████████| [100.00% ] : 00:18
[refcm           ] [DEBUG   ] : [[-0.94473291 -0.29525313 -0.1539428  ... -0.27813676 -0.30104014
  -0.31006409]
 [-0.18154023 -0.42922778 -0.11317159 ... -0.31747242 -0.36014148
  -0.32413976]
 [-0.06045168 -0.03543559 -0.08044876 ... -0.06032466 -0.07057711
  -0.10326209]
 ...
 [-0.21090269 -0.30372513 -0.11061631 ... -0.48983639 -0.55857203
  -0.44799415]
 [-0.23421052 -0.31528279 -0.14005787 ... -0.4936881  -0.58409793
  -0.49668477]
 [-0.2770079  -0.32123574 -0.15858277 ... -0.46666054 -0.55822165
  -0.51240534]]
[refcm           ] [DEBUG   ] : starting LP optimization
[refcm           ] [DEBUG   ] : optimization terminated w. status "Optimal"
[matchings       ] [DEBUG   ]

In [19]:
import numpy as np
gs = np.intersect1d(ds[q].var_names, ds[ref].var_names)
np.concatenate((ds[q][:, gs].X, ds[ref][:, gs].X))

array([[0.       , 0.       , 0.6931472, ..., 5.726847 , 0.       ,
        5.9269257],
       [0.       , 0.       , 0.       , ..., 5.8348107, 0.       ,
        5.2470236],
       [4.6634393, 1.0986123, 0.       , ..., 5.693732 , 1.0986123,
        3.5263605],
       ...,
       [0.       , 0.       , 0.       , ..., 4.330734 , 0.       ,
        6.1862082],
       [1.0986123, 0.       , 0.       , ..., 3.8501472, 0.       ,
        5.117994 ],
       [0.       , 0.       , 0.       , ..., 3.3672957, 0.       ,
        5.942799 ]], dtype=float32)

In [20]:
_.shape

(22183, 16024)

In [13]:
q, ref = 'MTG', 'ALM'

rcm = RefCM(cache_load=False, cache_save=False, embedder=pca_embedder, discovery_threshold=None)
m = rcm.annotate(ds[q], q, ds[ref], ref, 'labels34', 'labels34')
m.eval('labels34')
m.display_matching_costs('labels34')

[refcm           ] [INFO    ] : NOTE: raw counts expected in anndata .X attributes.
[refcm           ] [DEBUG   ] : Loading cached mapping costs from cache.json.
[embeddings      ] [DEBUG   ] : PCA fitting with 100 complete.
[refcm           ] [DEBUG   ] : Computing Wasserstein distances.
|████████████████| [100.00% ] : 01:01
[refcm           ] [DEBUG   ] : [[ 1.          1.          1.         ...  1.          1.
   1.        ]
 [ 1.          1.          1.         ...  1.          1.
   1.        ]
 [ 1.         -0.04038051  1.         ...  1.          1.
   1.        ]
 ...
 [ 1.          1.          1.         ...  1.          1.
   1.        ]
 [ 1.          1.          1.         ...  1.          1.
   1.        ]
 [ 1.          1.          1.         ...  1.          1.
   1.        ]]
[refcm           ] [DEBUG   ] : starting LP optimization
[refcm           ] [DEBUG   ] : optimization terminated w. status "Optimal"
[matchings       ] [DEBUG   ] : [31m[-][0m Exc L2/3 IT       

In [6]:
rcm = RefCM(discovery_threshold=0)

for i, q_id in enumerate(ds):
    for ref_id in [d for d in ds if d != q_id]:
        m = rcm.annotate(ds[q_id], q_id, ds[ref_id], ref_id, 'labels34', 'labels34')
        m.eval('labels34')

[refcm           ] [INFO    ] : NOTE: raw counts expected in anndata .X attributes.
|████████████████| [100.00% ] : 00:38
[matchings       ] [INFO    ] : mapped MTG                  to ALM                 
[matchings       ] [INFO    ] : (34 common cell types)
[matchings       ] [INFO    ] : 34/34 correct mappings
[matchings       ] [INFO    ] : 0 /34 incorrect mappings
|████████████████| [100.00% ] : 00:43
[matchings       ] [INFO    ] : mapped MTG                  to VISp                
[matchings       ] [INFO    ] : (34 common cell types)
[matchings       ] [INFO    ] : 34/34 correct mappings
[matchings       ] [INFO    ] : 0 /34 incorrect mappings
|████████████████| [100.00% ] : 00:36
[matchings       ] [INFO    ] : mapped ALM                  to MTG                 
[matchings       ] [INFO    ] : (34 common cell types)
[matchings       ] [INFO    ] : 34/34 correct mappings
[matchings       ] [INFO    ] : 0 /34 incorrect mappings
|████████████████| [100.00% ] : 01:01
[matchings 