# Vitessce Widget Tutorial

# Visualization of 10x multiome data

## 1. Import dependencies

We need to import the classes and functions that we will be using from the corresponding packages.

In [None]:
import os
from os.path import join
from urllib.request import urlretrieve
import muon as mu
from muon import atac as ac
import scanpy as sc
import anndata as ad
import numpy as np

from vitessce import (
    VitessceConfig,
    Component as cm,
    CoordinationType as ct,
    MuDataWrapper,
    AnnDataWrapper,
)

## 2. Download the dataset

Download data from https://www.10xgenomics.com/resources/datasets/fresh-embryonic-e-18-mouse-brain-5-k-1-standard-2-0-0

In [None]:
#os.makedirs("data", exist_ok=True)
h5_filepath = join("data", "multiome", "e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix.h5")
#urlretrieve('https://seurat.nygenome.org/pbmc3k_final.h5ad', adata_filepath)

## 3. Load the dataset

Load the dataset using AnnData's `read_h5ad` function.

In [None]:
mdata = mu.read_10x_h5(h5_filepath)
mdata.var_names_make_unique()

In [None]:
rna = mdata.mod['rna']
rna
atac = mdata.mod['atac']
atac

## 4.1 Pre-process the gene expression dataset
The following code is based on the Muon tutorial notebook https://github.com/PMBio/muon-tutorials/blob/master/single-cell-rna-atac/pbmc10k/1-Gene-Expression-Processing.ipynb

In [None]:
# QC for mitochondrial genes
rna.var['mt'] = rna.var_names.str.startswith('mt-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(rna, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

In [None]:
# Filtering for cells and genes with sufficient counts
mu.pp.filter_var(rna, 'n_cells_by_counts', lambda x: x >= 3)
mu.pp.filter_obs(rna, 'n_genes_by_counts', lambda x: (x >= 200) & (x < 5000))
mu.pp.filter_obs(rna, 'total_counts', lambda x: x < 15000)
mu.pp.filter_obs(rna, 'pct_counts_mt', lambda x: x < 20)

In [None]:
# Normalization and scaling
sc.pp.normalize_total(rna, target_sum=1e4)
sc.pp.log1p(rna)

sc.pp.highly_variable_genes(rna, min_mean=0.02, max_mean=4, min_disp=0.5)

sc.pp.scale(rna, max_value=10)

In [None]:
# Dimensionality reduction and clustering
sc.tl.pca(rna, svd_solver='arpack')
sc.pp.neighbors(rna, n_neighbors=10, n_pcs=20)
sc.tl.leiden(rna, resolution=.5)

## 4.2 Pre-process the chromatin accessibility dataset
The following code is based on the Muon tutorial notebook https://github.com/PMBio/muon-tutorials/blob/master/single-cell-rna-atac/pbmc10k/2-Chromatin-Accessibility-Processing.ipynb

In [None]:
# QC
sc.pp.calculate_qc_metrics(atac, percent_top=None, log1p=False, inplace=True)

In [None]:
# Filtering
mu.pp.filter_var(atac, 'n_cells_by_counts', lambda x: x >= 10)
mu.pp.filter_obs(atac, 'n_genes_by_counts', lambda x: (x >= 2000) & (x <= 15000))
mu.pp.filter_obs(atac, 'total_counts', lambda x: (x >= 4000) & (x <= 40000))

In [None]:
# Normalization
atac.layers["counts"] = atac.X # save original values in a counts layer
sc.pp.normalize_per_cell(atac, counts_per_cell_after=1e4)
sc.pp.log1p(atac)

In [None]:
sc.pp.highly_variable_genes(atac, min_mean=0.05, max_mean=1.5, min_disp=.5)
np.sum(atac.var.highly_variable)

In [None]:
atac.raw = atac

In [None]:
sc.pp.scale(atac)

In [None]:
# Dimensionality reduction and clustering
ac.tl.lsi(atac)
atac.obsm['X_lsi'] = atac.obsm['X_lsi'][:,1:]
atac.varm["LSI"] = atac.varm["LSI"][:,1:]
atac.uns["lsi"]["stdev"] = atac.uns["lsi"]["stdev"][1:]

In [None]:
sc.pp.neighbors(atac, use_rep="X_lsi", n_neighbors=10, n_pcs=30)

In [None]:
sc.tl.leiden(atac, resolution=.5)

In [None]:
mdata.mod['atac']

## 5. Create a Vitessce view config

Define the data and views you would like to include in the widget.

In [None]:
vc = VitessceConfig(name='10x multiome example')
rna_dataset = vc.add_dataset(name='RNA').add_object(AnnDataWrapper(
    rna,
    cell_set_obs=["leiden"], cell_set_obs_names=["Leiden"], 
    mappings_obsm=["X_pca"], mappings_obsm_names=["PCA"],
    expression_matrix="X",
    matrix_gene_var_filter="highly_variable"
))
atac_dataset = vc.add_dataset(name='ATAC').add_object(AnnDataWrapper(
    atac,
    cell_set_obs=["leiden"], cell_set_obs_names=["Leiden"], 
    mappings_obsm=["X_lsi"], mappings_obsm_names=["LSI"],
    expression_matrix="X",
    matrix_gene_var_filter="highly_variable"
))

pca = vc.add_view(rna_dataset, cm.SCATTERPLOT, mapping="PCA")
lsi = vc.add_view(atac_dataset, cm.SCATTERPLOT, mapping="LSI")
rna_cell_sets = vc.add_view(rna_dataset, cm.CELL_SETS).set_props(title="Cell Sets from RNA")
atac_cell_sets = vc.add_view(atac_dataset, cm.CELL_SETS).set_props(title="Cell Sets from ATAC")
genes = vc.add_view(rna_dataset, cm.GENES).set_props(title="Genes")
peaks = vc.add_view(atac_dataset, cm.GENES).set_props(title="Peaks", variablesLabelOverride="peak")
rna_heatmap = vc.add_view(rna_dataset, cm.HEATMAP).set_props(title="Heatmap from RNA")
atac_heatmap = vc.add_view(atac_dataset, cm.HEATMAP).set_props(title="Heatmap from ATAC", variablesLabelOverride="peak")

vc.link_views([pca, rna_heatmap], [ct.GENE_EXPRESSION_COLORMAP, "geneExpressionColormapRange"], ["plasma", [0.0, 1.0]])
vc.link_views([pca, rna_heatmap, genes], ["geneSelection"])
vc.link_views([pca, rna_heatmap, rna_cell_sets], [ct.CELL_SET_SELECTION, ct.CELL_SET_COLOR])
vc.link_views([lsi, atac_heatmap], [ct.GENE_EXPRESSION_COLORMAP, "geneExpressionColormapRange"], ["plasma", [0.0, 1.0]])
vc.link_views([lsi, atac_heatmap, peaks], ["geneSelection"])
vc.link_views([lsi, atac_heatmap, atac_cell_sets], [ct.CELL_SET_SELECTION, ct.CELL_SET_COLOR])

vc.layout(((pca | lsi) / (rna_cell_sets | atac_cell_sets)) | ((rna_heatmap | atac_heatmap) / (genes | peaks)));

## 5. Create the Vitessce widget

A widget can be created with the `.widget()` method on the config instance.

In [None]:
vw = vc.widget()
vw