In [None]:
# Cell type annotation with celltypist
from anndata import read_zarr
import celltypist
from celltypist import models
import scanpy as sc
from os.path import join
import numpy as np
from vitessce.data_utils import (
    VAR_CHUNK_SIZE,
)

In [None]:
!pwd

In [None]:
BASE_DIR = join("..", "..", "hubmap-publication-page", "data")
VIGNETTE_DIR = join("..", "..", "hubmap-publication-page", "vignettes", "vignette_02")

In [None]:
PROCESSED_DIR = join("..", "data", "processed")

In [None]:
!cp -r {PROCESSED_DIR}/human_lymph_node_10x_visium.h5ad.zarr {BASE_DIR}/human_lymph_node_10x_visium.h5ad.zarr
!cp -r {PROCESSED_DIR}/human_lymph_node_10x_visium.ome.zarr {BASE_DIR}/human_lymph_node_10x_visium.ome.zarr

In [None]:
adata = read_zarr(join(BASE_DIR, "human_lymph_node_10x_visium.h5ad.zarr"))
adata

In [None]:
# Scale/log-normalize as required by CellTypist

In [None]:
adata.X = np.expm1(adata.X)

sc.pp.normalize_total(adata, inplace=True, target_sum=1e4)
sc.pp.log1p(adata)

In [None]:
np.expm1(adata.X).sum(axis = 1)

In [None]:
#Download a list of models, for example, `Immune_All_Low.pkl` and `Immune_All_High.pkl`.
models.download_models(model = ['Immune_All_Low.pkl', 'Immune_All_High.pkl'])

In [None]:
low_predictions = celltypist.annotate(adata, model = 'Immune_All_Low.pkl', majority_voting = True)
adata = low_predictions.to_adata(prefix="low_")
high_predictions = celltypist.annotate(adata, model = 'Immune_All_High.pkl', majority_voting = True)
adata = high_predictions.to_adata(prefix="high_")

In [None]:
predicted_adata = adata

In [None]:
predicted_adata

In [None]:
predicted_adata.write_zarr(join(BASE_DIR, "human_lymph_node_10x_visium_with_cell_types.h5ad.zarr"), chunks=(adata.shape[0], VAR_CHUNK_SIZE))

In [None]:
from os.path import join
from vitessce import (
    VitessceConfig,
    ViewType as vt,
    CoordinationType as ct,
    FileType as ft,
    AnnDataWrapper,
    OmeZarrWrapper,
    hconcat,
    vconcat,
    BASE_URL_PLACEHOLDER,
)
import json

In [None]:
processed_dir = join("..", "..", "..", "vitessce-python", "demos", "human-lymph-node-10x-visium", "data", "processed")

In [None]:
BASE_DIR = join("..", "data", "processed")

In [None]:
vc = VitessceConfig(schema_version="1.0.15", name='Visium data', description='', base_dir=BASE_DIR)

In [None]:
img_zarr = join("human_lymph_node_10x_visium.ome.zarr")
adata_zarr = join("human_lymph_node_10x_visium_with_cell_types.h5ad.zarr")

In [None]:
dataset = vc.add_dataset(name='Human lymph node').add_object(AnnDataWrapper(
    adata_path=adata_zarr,
    obs_locations_path="obsm/spatial",
    obs_segmentations_path="obsm/segmentations",
    obs_embedding_paths=["obsm/X_umap", "obsm/X_pca"],
    obs_embedding_names=["UMAP", "PCA"],
    obs_set_paths=["obs/clusters", ["obs/high_majority_voting", "obs/low_majority_voting"]],
    obs_set_names=["Leiden Cluster", "Predicted Cell Type"],
    obs_feature_matrix_path="X",
    initial_feature_filter_path="var/highly_variable",
    # To be explicit that the features represent genes and gene expression, we specify that here.
    coordination_values={
        "obsType": "spot"
    }
)).add_object(OmeZarrWrapper(
    # We next run add_object with adata_path=adt_zarr to add the cell-by-ADT matrix and associated metadata.
    img_path=img_zarr,
))

In [None]:
spatial_by_cellset = vc.add_view(vt.SPATIAL, dataset=dataset, x=0, y=0, w=4, h=6)
spatial_by_expression_a = vc.add_view(vt.SPATIAL, dataset=dataset, x=4, y=0, w=4, h=6)
spatial_by_expression_b = vc.add_view(vt.SPATIAL, dataset=dataset, x=8, y=0, w=4, h=6)
#heatmap = vc.add_view(vt.HEATMAP, dataset=dataset).set_props(transpose=True)
lc = vc.add_view(vt.LAYER_CONTROLLER, dataset=dataset, x=0, y=6, w=4, h=6).set_props(disableChannelsIfRgbDetected=True)
cell_sets = vc.add_view(vt.OBS_SETS, dataset=dataset, x=4, y=6, w=4, h=6)
feature_list = vc.add_view(vt.FEATURE_LIST, dataset=dataset, x=8, y=6, w=4, h=6)

all_views = [
    spatial_by_cellset,
    spatial_by_expression_a,
    spatial_by_expression_b,
    heatmap,
    lc,
    cell_sets,
    feature_list,
]

segmentation_layer = {
    "radius": 65, "stroked": True, "visible": True, "opacity": 1
}
image_layer = [
    {
      "type": "raster",
      "index": 0,
      "colormap": None,
      "transparentColor": None,
      "opacity": 1,
      "domainType": "Min/Max",
      "channels": [
        {
          "selection": { "c": 0 },
          "color": [
            255,
            0,
            0
          ],
          "visible": True,
          "slider": [
            0,
            255
          ]
        },
        {
          "selection": { "c": 1 },
          "color": [
            0,
            255,
            0
          ],
          "visible": True,
          "slider": [
            0,
            255
          ]
        },
        {
          "selection": { "c": 2 },
          "color": [
            0,
            0,
            255
          ],
          "visible": True,
          "slider": [
            0,
            255
          ]
        }
      ]
    }
]

vc.link_views(all_views, [ct.OBS_TYPE], ["spot"])
vc.link_views([spatial_by_cellset, spatial_by_expression_a, spatial_by_expression_b, lc], [ct.SPATIAL_SEGMENTATION_LAYER, ct.SPATIAL_IMAGE_LAYER, ct.SPATIAL_ZOOM, ct.SPATIAL_TARGET_X, ct.SPATIAL_TARGET_Y], [segmentation_layer, image_layer, -2.598, 1008.88, 1004.69])
vc.link_views([spatial_by_expression_a], [ct.OBS_COLOR_ENCODING, ct.FEATURE_SELECTION], ["geneSelection", ["CR2"]])
vc.link_views([spatial_by_expression_b, feature_list], [ct.OBS_COLOR_ENCODING, ct.FEATURE_SELECTION], ["geneSelection", ["FCER2"]])
vc.link_views([spatial_by_expression_a, spatial_by_expression_b], [ct.FEATURE_VALUE_COLORMAP_RANGE], [[0.5, 0.75]])

vc.link_views([spatial_by_cellset, cell_sets], [ct.OBS_COLOR_ENCODING, ct.OBS_SET_SELECTION], ["cellSetSelection", [["Predicted Cell Type", "B cells", "Germinal center B cells"]]])


#vc.layout((spatial_by_cellset | spatial_by_expression_a) / (hconcat(lc, cell_sets, feature_list) | spatial_by_expression_b));

In [None]:
vc.web_app()

In [None]:
os.makedirs(VIGNETTE_DIR, exist_ok=True)

In [None]:
config_dict = vc.to_dict(base_url=BASE_URL_PLACEHOLDER)

# Use `open` to create a new empty file at ./exported_data/vitessce.json
with open(join(VIGNETTE_DIR, "visium.json"), "w") as f:
    json.dump(config_dict, f)


In [None]:
vignette_md = """---
name: Use Case 2
figures:
    - name: "Visualization"
      file: visium.json
---

## Spatial transcriptomics with H&E image from the human lymph node

This dataset is provided by 10x Genomics as a demo of the Visium technology and thus is not intended to answer a particular biological question. Nonetheless, it can be used to validate that the expected lymph node cell types are present. According to the v1 HuBMAP ASCT+B table for lymph node (Börner et al., Nature Cell Biology 2021), CCL19 is expressed by the T Cell Zone Reticular Cell Type in the Interfollicular Cortex and Paracortical Sinus. Using CellPhoneDB (Efremova et al., Nature Protocols 2020), we can query for known receptors of this ligand, which include ACKR4, CCRL2, and CCR7. Using the spatial view in Vitessce, we can observe that CCL19 and CCR7 exhibit coexpression patterns in clusters 2 and 8 (defined by the Leiden unsupervised clustering method).

"""

with open(join(VIGNETTE_DIR, "description.md"), "w") as f:
    f.write(vignette_md)
