# Template: visualize single-cell transcriptomics data with Vitessce

## Code to change

Places where you will need to edit the code are marked by `# TODO(template)` comments.

In [None]:
import os
import json
from os.path import join
from vitessce import (
    VitessceConfig,
    ViewType as vt,
    CoordinationType as ct,
    FileType as ft,
    AnnDataWrapper,
    BASE_URL_PLACEHOLDER,
)
from vitessce.data_utils import (
    optimize_adata,
    VAR_CHUNK_SIZE,
)
from anndata import read_h5ad
import pandas as pd
import numpy as np

## Variables to fill in

In [None]:
# TODO(template)
PATH_TO_INPUT_ANNDATA_H5AD = join('..', 'tutorials', 'transcriptomics', 'raw_data', 'habib17.processed.h5ad')

# TODO(template)
PATH_TO_OUTPUT_ANNDATA_ZARR = join('.', 'processed_data', 'transcriptomics', 'output.anndata.zarr')

# TODO(template): this folder should not yet exist, but will be created in step 3.
PATH_TO_EXPORT_DIRECTORY = join('.', 'exported_transcriptomics_data')

# TODO(template): provide names and descriptions
CONFIG_NAME = 'My config'
CONFIG_DESCRIPTION = 'This dataset reveals...'
DATASET_NAME = 'My dataset'

## 1. Convert H5AD to AnnData-Zarr

In [None]:
# This template assumes your data is already saved as a .h5ad file.
adata = read_h5ad(PATH_TO_INPUT_ANNDATA_H5AD)

In [None]:
# TODO(template): you may or may not want to add a smaller expression matrix
# that is filtered to include a subset of genes (in this case, only those flagged as "highly_variable").
adata.obsm["X_hvg"] = adata[:, adata.var["highly_variable"]].X.todense()

In [None]:
adata

In [None]:
adata = optimize_adata(
    adata,
    # TODO(template): You will want to modify the below lists of columns and keys
    # to include all of those that will be used in the visualization
    # (i.e., parameters of the AnnDataWrapper constructor below).
    obs_cols=["CellType"],
    var_cols=["gene_ids", "highly_variable"],
    obsm_keys=["X_umap", "X_hvg"],
    varm_keys=[],
)
adata

In [None]:
adata.write_zarr(PATH_TO_OUTPUT_ANNDATA_ZARR, chunks=(adata.shape[0], VAR_CHUNK_SIZE))

## 2. Configure the visualization

In [None]:
vc = VitessceConfig(schema_version="1.0.15", name=CONFIG_NAME, description=CONFIG_DESCRIPTION)

# Add data
dataset = vc.add_dataset(name=DATASET_NAME).add_object(AnnDataWrapper(
        adata_path=PATH_TO_OUTPUT_ANNDATA_ZARR,
        obs_embedding_paths=["obsm/X_umap"],
        obs_embedding_names=["UMAP"],
        obs_set_paths=["obs/CellType"],
        obs_set_names=["Cell Type"],
        obs_feature_matrix_path="obsm/X_hvg",
        feature_filter_path="var/highly_variable"
    )
)

# Add views
scatterplot = vc.add_view(vt.SCATTERPLOT, dataset=dataset, mapping="UMAP")
cell_sets = vc.add_view(vt.OBS_SETS, dataset=dataset)
genes = vc.add_view(vt.FEATURE_LIST, dataset=dataset)
heatmap = vc.add_view(vt.HEATMAP, dataset=dataset)
cell_set_sizes = vc.add_view(vt.OBS_SET_SIZES, dataset=dataset)

# Arrange views
vc.layout((scatterplot | cell_sets) / (heatmap | (genes | cell_set_sizes)));

### Render the widget

In [None]:
vc.widget()

## 3. Export the configuration and data

In [None]:
# TODO(template): The export function does not clear the contents of the `out_dir`.
# You may want to ensure that this folder does not yet exist:
os.makedirs(PATH_TO_EXPORT_DIRECTORY, exist_ok=False)

config_dict = vc.export(to="files", base_url=BASE_URL_PLACEHOLDER, out_dir=PATH_TO_EXPORT_DIRECTORY)

# Use `open` to create a new empty file at ./exported_data/vitessce.json
with open(join(PATH_TO_EXPORT_DIRECTORY, "vitessce.json"), "w") as f:
    json.dump(config_dict, f)