In [1]:
# Cell type annotation with celltypist
from anndata import read_zarr
import celltypist
from celltypist import models
import scanpy as sc
from os.path import join
import numpy as np
from vitessce.data_utils import (
    VAR_CHUNK_SIZE,
)

In [2]:
!pwd

/Users/mkeller/research/dbmi/vitessce/paper-figures/visium/src


In [18]:
BASE_DIR = join("..", "..", "hubmap-publication-page", "data")
VIGNETTE_DIR = join("..", "..", "hubmap-publication-page", "vignettes", "vignette_02")

In [4]:
PROCESSED_DIR = join("..", "data", "processed")

In [5]:
!cp -r {PROCESSED_DIR}/human_lymph_node_10x_visium.h5ad.zarr {BASE_DIR}/human_lymph_node_10x_visium.h5ad.zarr
!cp -r {PROCESSED_DIR}/human_lymph_node_10x_visium.ome.zarr {BASE_DIR}/human_lymph_node_10x_visium.ome.zarr

In [6]:
adata = read_zarr(join(BASE_DIR, "human_lymph_node_10x_visium.h5ad.zarr"))
adata

AnnData object with n_obs × n_vars = 3861 × 19685
    obs: 'clusters'
    var: 'highly_variable'
    obsm: 'X_hvg', 'X_pca', 'X_umap', 'segmentations', 'spatial'
    varm: 'PCs'

In [7]:
# Scale/log-normalize as required by CellTypist

In [8]:
adata.X = np.expm1(adata.X)

sc.pp.normalize_total(adata, inplace=True, target_sum=1e4)
sc.pp.log1p(adata)

In [9]:
np.expm1(adata.X).sum(axis = 1)

array([ 9999.999,  9999.999,  9999.999, ..., 10000.   , 10000.001,
       10000.   ], dtype=float32)

In [10]:
#Download a list of models, for example, `Immune_All_Low.pkl` and `Immune_All_High.pkl`.
models.download_models(model = ['Immune_All_Low.pkl', 'Immune_All_High.pkl'])

📜 Retrieving model list from server https://celltypist.cog.sanger.ac.uk/models/models.json
📚 Total models in list: 19
📂 Storing models in /Users/mkeller/.celltypist/data/models
💾 Total models to download: 2
💾 Downloading model [1/2]: Immune_All_Low.pkl
💾 Downloading model [2/2]: Immune_All_High.pkl


In [11]:
low_predictions = celltypist.annotate(adata, model = 'Immune_All_Low.pkl', majority_voting = True)
adata = low_predictions.to_adata(prefix="low_")
high_predictions = celltypist.annotate(adata, model = 'Immune_All_High.pkl', majority_voting = True)
adata = high_predictions.to_adata(prefix="high_")

🔬 Input data has 3861 cells and 19685 genes
🔗 Matching reference genes in the model
🧬 5680 features used for prediction
⚖️ Scaling input data
🖋️ Predicting labels
✅ Prediction done!
👀 Can not detect a neighborhood graph, construct one before the over-clustering
  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()
⛓️ Over-clustering input data with resolution set to 5
🗳️ Majority voting the predictions
✅ Majority voting done!
🔬 Input data has 3861 cells and 19685 genes
🔗 Matching reference genes in the model
🧬 5680 features used for prediction
⚖️ Scaling input data
🖋️ Predicting labels
✅ Prediction done!
👀 Detected a neighborhood graph in the input object, will run over-clustering on the basis of it
⛓️ Over-clustering input data with resolution set to 5
🗳️ Majority voting the predictions
✅ Majority voting done!


In [12]:
predicted_adata = adata

In [13]:
predicted_adata

AnnData object with n_obs × n_vars = 3861 × 19685
    obs: 'clusters', 'low_predicted_labels', 'low_over_clustering', 'low_majority_voting', 'low_conf_score', 'high_predicted_labels', 'high_over_clustering', 'high_majority_voting', 'high_conf_score'
    var: 'highly_variable'
    uns: 'log1p', 'neighbors', 'leiden'
    obsm: 'X_hvg', 'X_pca', 'X_umap', 'segmentations', 'spatial'
    varm: 'PCs'
    obsp: 'connectivities', 'distances'

In [14]:
predicted_adata.write_zarr(join(BASE_DIR, "human_lymph_node_10x_visium_with_cell_types.h5ad.zarr"), chunks=(adata.shape[0], VAR_CHUNK_SIZE))

In [19]:
from os.path import join
from vitessce import (
    VitessceConfig,
    ViewType as vt,
    CoordinationType as ct,
    FileType as ft,
    AnnDataWrapper,
    OmeZarrWrapper,
    hconcat,
    vconcat,
    BASE_URL_PLACEHOLDER,
)
import json

In [34]:
vc = VitessceConfig(schema_version="1.0.15", name='Visium data', description='', base_dir=BASE_DIR)

In [35]:
img_zarr = join("human_lymph_node_10x_visium.ome.zarr")
adata_zarr = join("human_lymph_node_10x_visium_with_cell_types.h5ad.zarr")

In [36]:
dataset = vc.add_dataset(name='Human lymph node').add_object(AnnDataWrapper(
    adata_path=adata_zarr,
    obs_locations_path="obsm/spatial",
    obs_segmentations_path="obsm/segmentations",
    obs_embedding_paths=["obsm/X_umap", "obsm/X_pca"],
    obs_embedding_names=["UMAP", "PCA"],
    obs_set_paths=["obs/clusters", ["obs/high_majority_voting", "obs/low_majority_voting"]],
    obs_set_names=["Leiden Cluster", "Predicted Cell Type"],
    obs_feature_matrix_path="X",
    initial_feature_filter_path="var/highly_variable",
    # To be explicit that the features represent genes and gene expression, we specify that here.
    coordination_values={
        "obsType": "spot"
    }
)).add_object(OmeZarrWrapper(
    # We next run add_object with adata_path=adt_zarr to add the cell-by-ADT matrix and associated metadata.
    img_path=img_zarr,
))

In [37]:
spatial_by_cellset = vc.add_view(vt.SPATIAL, dataset=dataset, x=0, y=0, w=4, h=6)
spatial_by_expression_a = vc.add_view(vt.SPATIAL, dataset=dataset, x=4, y=0, w=4, h=6)
spatial_by_expression_b = vc.add_view(vt.SPATIAL, dataset=dataset, x=8, y=0, w=4, h=6)
lc = vc.add_view(vt.LAYER_CONTROLLER, dataset=dataset, x=0, y=6, w=4, h=6).set_props(disableChannelsIfRgbDetected=True)
cell_sets = vc.add_view(vt.OBS_SETS, dataset=dataset, x=4, y=6, w=4, h=6)
feature_list = vc.add_view(vt.FEATURE_LIST, dataset=dataset, x=8, y=6, w=4, h=6)

all_views = [
    spatial_by_cellset,
    spatial_by_expression_a,
    spatial_by_expression_b,
    lc,
    cell_sets,
    feature_list,
]

segmentation_layer = {
    "radius": 65, "stroked": True, "visible": True, "opacity": 1
}
image_layer = [
    {
      "type": "raster",
      "index": 0,
      "colormap": None,
      "transparentColor": None,
      "opacity": 1,
      "domainType": "Min/Max",
      "channels": [
        {
          "selection": { "c": 0 },
          "color": [
            255,
            0,
            0
          ],
          "visible": True,
          "slider": [
            0,
            255
          ]
        },
        {
          "selection": { "c": 1 },
          "color": [
            0,
            255,
            0
          ],
          "visible": True,
          "slider": [
            0,
            255
          ]
        },
        {
          "selection": { "c": 2 },
          "color": [
            0,
            0,
            255
          ],
          "visible": True,
          "slider": [
            0,
            255
          ]
        }
      ]
    }
]

vc.link_views(all_views, [ct.OBS_TYPE], ["spot"])
vc.link_views([spatial_by_cellset, spatial_by_expression_a, spatial_by_expression_b, lc], [ct.SPATIAL_SEGMENTATION_LAYER, ct.SPATIAL_IMAGE_LAYER, ct.SPATIAL_ZOOM, ct.SPATIAL_TARGET_X, ct.SPATIAL_TARGET_Y], [segmentation_layer, image_layer, -2.598, 1008.88, 1004.69])
vc.link_views([spatial_by_expression_a], [ct.OBS_COLOR_ENCODING, ct.FEATURE_SELECTION], ["geneSelection", ["CR2"]])
vc.link_views([spatial_by_expression_b, feature_list], [ct.OBS_COLOR_ENCODING, ct.FEATURE_SELECTION], ["geneSelection", ["FCER2"]])
vc.link_views([spatial_by_expression_a, spatial_by_expression_b], [ct.FEATURE_VALUE_COLORMAP_RANGE], [[0.5, 0.75]])

vc.link_views([spatial_by_cellset, cell_sets], [ct.OBS_COLOR_ENCODING, ct.OBS_SET_SELECTION], ["cellSetSelection", [["Predicted Cell Type", "B cells", "Germinal center B cells"]]])


vc.layout(hconcat(spatial_by_cellset, spatial_by_expression_a, spatial_by_expression_b) / hconcat(lc, cell_sets, feature_list));

In [38]:
vc.web_app()

'http://vitessce.io/#?theme=light&url=data:,%7B%22version%22%3A+%221.0.15%22%2C+%22name%22%3A+%22Visium+data%22%2C+%22description%22%3A+%22%22%2C+%22datasets%22%3A+%5B%7B%22uid%22%3A+%22A%22%2C+%22name%22%3A+%22Human+lymph+node%22%2C+%22files%22%3A+%5B%7B%22fileType%22%3A+%22anndata.zarr%22%2C+%22url%22%3A+%22http%3A%2F%2Flocalhost%3A8006%2Fhuman_lymph_node_10x_visium_with_cell_types.h5ad.zarr%22%2C+%22options%22%3A+%7B%22obsLocations%22%3A+%7B%22path%22%3A+%22obsm%2Fspatial%22%7D%2C+%22obsSegmentations%22%3A+%7B%22path%22%3A+%22obsm%2Fsegmentations%22%7D%2C+%22obsEmbedding%22%3A+%5B%7B%22path%22%3A+%22obsm%2FX_umap%22%2C+%22dims%22%3A+%5B0%2C+1%5D%2C+%22embeddingType%22%3A+%22UMAP%22%7D%2C+%7B%22path%22%3A+%22obsm%2FX_pca%22%2C+%22dims%22%3A+%5B0%2C+1%5D%2C+%22embeddingType%22%3A+%22PCA%22%7D%5D%2C+%22obsSets%22%3A+%5B%7B%22name%22%3A+%22Leiden+Cluster%22%2C+%22path%22%3A+%22obs%2Fclusters%22%7D%2C+%7B%22name%22%3A+%22Predicted+Cell+Type%22%2C+%22path%22%3A+%5B%22obs%2Fhigh_majority_v

In [None]:
os.makedirs(VIGNETTE_DIR, exist_ok=True)

In [None]:
config_dict = vc.to_dict(base_url=BASE_URL_PLACEHOLDER)

# Use `open` to create a new empty file at ./exported_data/vitessce.json
with open(join(VIGNETTE_DIR, "visium.json"), "w") as f:
    json.dump(config_dict, f)


In [None]:
vignette_md = """---
name: Use Case 2
figures:
    - name: "Visualization"
      file: visium.json
---

## Spatial transcriptomics with H&E image from the human lymph node

This dataset is provided by 10x Genomics as a demo of the Visium technology and thus is not intended to answer a particular biological question. Nonetheless, it can be used to validate that the expected lymph node cell types are present. According to the v1 HuBMAP ASCT+B table for lymph node (Börner et al., Nature Cell Biology 2021), CCL19 is expressed by the T Cell Zone Reticular Cell Type in the Interfollicular Cortex and Paracortical Sinus. Using CellPhoneDB (Efremova et al., Nature Protocols 2020), we can query for known receptors of this ligand, which include ACKR4, CCRL2, and CCR7. Using the spatial view in Vitessce, we can observe that CCL19 and CCR7 exhibit coexpression patterns in clusters 2 and 8 (defined by the Leiden unsupervised clustering method).

"""

with open(join(VIGNETTE_DIR, "description.md"), "w") as f:
    f.write(vignette_md)
