# Template: visualize single-cell transcriptomics data with Vitessce

## Code to change

Places where you will need to edit the code are marked by `# TODO(template)` comments.

In [1]:
import os
import json
from os.path import join
from vitessce import (
    VitessceConfig,
    ViewType as vt,
    CoordinationType as ct,
    FileType as ft,
    AnnDataWrapper,
    BASE_URL_PLACEHOLDER,
)
from vitessce.data_utils import (
    optimize_adata,
    VAR_CHUNK_SIZE,
)
from anndata import read_h5ad
import pandas as pd
import numpy as np

## Variables to fill in

In [2]:
# TODO(template)
PATH_TO_INPUT_ANNDATA_H5AD = join('..', 'tutorials', 'transcriptomics', 'raw_data', 'habib17.processed.h5ad')

# TODO(template)
PATH_TO_OUTPUT_ANNDATA_ZARR = join('.', 'processed_data', 'transcriptomics', 'output.anndata.zarr')

# TODO(template): specify a local folder to store the publication dataset.
PATH_TO_PUBLICATION_DATASET_DIRECTORY = join('.', 'my_publication_dataset')

# TODO(template): Provide a name for the publication dataset and this particular vignette.
PUBLICATION_DATASET_NAME = 'My nature paper'
PUBLICATION_DATASET_ID = 'my-nature-paper'
VIGNETTE_NAME = 'Non-spatial transcriptomics'
VIGNETTE_ID = 'vignette_scrnaseq'

# TODO(template): provide names and descriptions
CONFIG_NAME = 'My config'
CONFIG_DESCRIPTION = 'This dataset reveals...'
DATASET_NAME = 'My dataset'

## Validation

In [3]:
DATA_DIR = join(PATH_TO_PUBLICATION_DATASET_DIRECTORY, 'data', VIGNETTE_ID)
VIGNETTES_DIR = join(PATH_TO_PUBLICATION_DATASET_DIRECTORY, 'vignettes', VIGNETTE_ID)

if not os.path.isdir(DATA_DIR):
    os.makedirs(DATA_DIR, exist_ok=False)
if not os.path.isdir(VIGNETTES_DIR):
    os.makedirs(VIGNETTES_DIR, exist_ok=False)

## 1. Convert H5AD to AnnData-Zarr

In [4]:
# This template assumes your data is already saved as a .h5ad file.
adata = read_h5ad(PATH_TO_INPUT_ANNDATA_H5AD)


This is where adjacency matrices should go now.
  warn(

This is where adjacency matrices should go now.
  warn(


In [5]:
# TODO(template): you may or may not want to add a smaller expression matrix
# that is filtered to include a subset of genes (in this case, only those flagged as "highly_variable").
adata.obsm["X_hvg"] = adata[:, adata.var["highly_variable"]].X.todense()

In [6]:
adata

AnnData object with n_obs × n_vars = 13067 × 25587
    obs: 'CellType', 'n_counts', 'log1p_n_counts', 'n_genes', 'log1p_n_genes', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50'
    var: 'gene_ids', 'mito', 'ribo', 'hb', 'n_counts', 'n_cells', 'n_genes', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'leiden', 'neighbors', 'pca'
    obsm: 'X_umap', 'X_hvg'
    varm: 'PCs'
    obsp: 'distances', 'connectivities'

In [7]:
adata = optimize_adata(
    adata,
    # TODO(template): You will want to modify the below lists of columns and keys
    # to include all of those that will be used in the visualization
    # (i.e., parameters of the AnnDataWrapper constructor below).
    obs_cols=["CellType"],
    var_cols=["gene_ids", "highly_variable"],
    obsm_keys=["X_umap", "X_hvg"],
    varm_keys=[],
)
adata

AnnData object with n_obs × n_vars = 13067 × 25587
    obs: 'CellType'
    var: 'gene_ids', 'highly_variable'
    obsm: 'X_umap', 'X_hvg'

In [8]:
adata.write_zarr(PATH_TO_OUTPUT_ANNDATA_ZARR, chunks=(adata.shape[0], VAR_CHUNK_SIZE))

  warn('ignoring keyword argument %r' % k)


## 2. Configure the visualization

In [9]:
vc = VitessceConfig(schema_version="1.0.15", name=CONFIG_NAME, description=CONFIG_DESCRIPTION)

# Add data
dataset = vc.add_dataset(name=DATASET_NAME).add_object(AnnDataWrapper(
        adata_path=PATH_TO_OUTPUT_ANNDATA_ZARR,
        obs_embedding_paths=["obsm/X_umap"],
        obs_embedding_names=["UMAP"],
        obs_set_paths=["obs/CellType"],
        obs_set_names=["Cell Type"],
        obs_feature_matrix_path="obsm/X_hvg",
        feature_filter_path="var/highly_variable"
    )
)

# Add views
scatterplot = vc.add_view(vt.SCATTERPLOT, dataset=dataset, mapping="UMAP")
cell_sets = vc.add_view(vt.OBS_SETS, dataset=dataset)
genes = vc.add_view(vt.FEATURE_LIST, dataset=dataset)
heatmap = vc.add_view(vt.HEATMAP, dataset=dataset)
cell_set_sizes = vc.add_view(vt.OBS_SET_SIZES, dataset=dataset)

# Arrange views
vc.layout((scatterplot | cell_sets) / (heatmap | (genes | cell_set_sizes)));

### Render the widget

In [10]:
vc.widget()

VitessceWidget(config={'version': '1.0.15', 'name': 'My config', 'description': 'This dataset reveals...', 'da…

## 3. Export the configuration and data

In [11]:
config_dict = vc.export(to="files", base_url=f'{BASE_URL_PLACEHOLDER}/data/{VIGNETTE_ID}', out_dir=DATA_DIR)

# Use `open` to create a new empty file at ./exported_data/vitessce.json
with open(join(VIGNETTES_DIR, "vitessce.json"), "w") as f:
    json.dump(config_dict, f)