Based on the tutorial from https://muon-tutorials.readthedocs.io/en/latest/single-cell-rna-atac/brain3k/1-Processing-and-Integration.html

In [1]:
import mudatasets as mds

In [2]:
import scipy.io
import pandas as pd
import numpy as np
from anndata import AnnData
from mudata import MuData
import scanpy as sc
import muon as mu
from muon import atac as ac
from os.path import join
from vitessce.data_utils import (
    VAR_CHUNK_SIZE,
    optimize_adata,
)

## Load the data

In [None]:
mdata = mds.load("brain3k_multiome", full=True)
mdata.var_names_make_unique()
mdata

## 1. RNA

## QC

In [None]:
# `rna` will point to `mdata['rna']`
# unless we copy it
rna = mdata['rna']

In [None]:
rna.var['mt'] = rna.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(rna, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

In [None]:
sc.pl.violin(rna, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'], jitter=0.4, multi_panel=True)

In [None]:
mu.pp.filter_obs(rna, 'n_genes_by_counts', lambda x: (x >= 200) & (x < 8000))
mu.pp.filter_obs(rna, 'total_counts', lambda x: x < 40000)
mu.pp.filter_obs(rna, 'pct_counts_mt', lambda x: x < 2)

In [None]:
sc.pl.violin(rna, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'], jitter=0.4, multi_panel=True)

## Scaling and normalization

In [None]:
rna.layers["counts"] = rna.X.copy()
sc.pp.normalize_total(rna, target_sum=1e4)
sc.pp.log1p(rna)
# rna.raw = rna
rna.layers["lognorm"] = rna.X.copy()

## Identify highly-variable genes

In [None]:
sc.pp.highly_variable_genes(rna, min_mean=0.02, max_mean=4, min_disp=0.5)

In [None]:
sc.pl.highly_variable_genes(rna)

In [None]:
sc.pp.scale(rna, max_value=10)

In [None]:
sc.tl.pca(rna, svd_solver='arpack')
sc.pl.pca(rna, color=['NRCAM', 'SLC1A2', 'SRGN', 'VCAN'])

In [None]:
sc.pl.pca_variance_ratio(rna, log=True)

In [None]:
sc.pp.neighbors(rna, n_neighbors=10, n_pcs=20)
sc.tl.leiden(rna, resolution=.5)

In [None]:
sc.tl.umap(rna, spread=1., min_dist=.5, random_state=11)
sc.pl.umap(rna, color="leiden", legend_loc="on data")

## Cell type annotation

In [None]:
sc.tl.rank_genes_groups(rna, 'leiden', method='t-test')

In [None]:
result = rna.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.set_option('display.max_columns', 50)
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names', 'pvals']}).head(10)

In [None]:
sc.pl.rank_genes_groups(rna, n_genes=20, sharey=False)

In [None]:
sc.pl.umap(rna, color=["PLP1", "CNP", "CTNNA3"])

In [None]:
sc.pl.umap(rna, color=["SLC1A2", "SRGN", "VCAN"], title=["SLC1A2 (astrocytes)", "SRGN (microglia)", "VCAN (OPCs)"])

In [None]:
new_cluster_names = {
    "0": "oligodendrocyte",
    "1": "oligodendrocyte",
    "3": "oligodendrocyte",
    "5": "oligodendrocyte",
    "14": "oligodendrocyte",
    "4": "OPC",
    "8": "microglia",
    "2": "astrocyte",
    "10": "astrocyte",
    "11": "astrocyte",
    "12": "astrocyte",
    "6": "excitatory_LAMP5",
    "13": "excitatory_RORB",
    "7": "inhibitory_LHX6",
    "9": "inhibitory_ADARB2",
    "15": "inhibitory_ADARB2",
}

In [None]:
rna.obs['celltype'] = [new_cluster_names[cl] for cl in rna.obs.leiden.astype("str").values]
rna.obs.celltype = rna.obs.celltype.astype("category")

In [None]:
sc.pl.umap(rna, color="celltype")

## 2. ATAC

In [None]:
atac = mdata.mod['atac']

In [None]:
sc.pp.calculate_qc_metrics(atac, percent_top=None, log1p=False, inplace=True)

In [None]:
mu.pl.histogram(atac, ['n_genes_by_counts', 'total_counts'], linewidth=0)

In [None]:
mu.pp.filter_var(atac, 'n_cells_by_counts', lambda x: x >= 10)

In [None]:
mu.pp.filter_obs(atac, 'total_counts', lambda x: (x >= 1000) & (x <= 80000))
mu.pp.filter_obs(atac, 'n_genes_by_counts', lambda x: (x >= 100) & (x <= 30000))

In [None]:
mu.pl.histogram(atac, ['n_genes_by_counts', 'total_counts'], linewidth=0)

In [None]:
ac.pl.fragment_histogram(atac, region='chr1:1-2000000')

In [None]:
ac.tl.nucleosome_signal(atac, n=1e6)

In [None]:
mu.pl.histogram(atac, "nucleosome_signal", linewidth=0)

In [None]:
# Check TSS enrichment
ac.tl.get_gene_annotation_from_rna(mdata['rna']).head(3)  # accepts MuData with 'rna' modality or mdata['rna'] AnnData directly

In [None]:
tss = ac.tl.tss_enrichment(mdata, n_tss=1000)  # by default, features=ac.tl.get_gene_annotation_from_rna(mdata)

In [None]:
ac.pl.tss_enrichment(tss)

In [None]:
atac.layers["counts"] = atac.X.copy()
sc.pp.normalize_total(atac, target_sum=1e4)
sc.pp.log1p(atac)
atac.layers["lognorm"] = atac.X.copy()

In [None]:
sc.pp.highly_variable_genes(atac, min_mean=0.05, max_mean=1.5, min_disp=.5)
sc.pl.highly_variable_genes(atac)

In [None]:
np.sum(atac.var.highly_variable)

In [None]:
sc.pp.scale(atac, max_value=10)
sc.tl.pca(atac, svd_solver='arpack')
ac.pl.pca(atac, color=['NRCAM', 'SLC1A2', 'SRGN', 'VCAN'], layer='lognorm', func='mean')

In [None]:
sc.pl.pca_variance_ratio(atac, log=True)

In [None]:
sc.pp.neighbors(atac, n_neighbors=10, n_pcs=20)
sc.tl.leiden(atac, resolution=.5)

In [None]:
sc.tl.umap(atac, spread=1., min_dist=.5, random_state=11)
sc.pl.umap(atac, color="leiden", legend_loc="on data")

## Marker genes and cell types

In [None]:
ac.tl.rank_peaks_groups(atac, 'leiden', method='t-test')

In [None]:
result = atac.uns['rank_genes_groups']
groups = result['names'].dtype.names

try:
    pd.set_option("max_columns", 50)
except:
    # https://pandas.pydata.org/pandas-docs/stable/user_guide/options.html
    pd.set_option("display.max_columns", 50)

pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names', 'genes', 'pvals']}).head(10)

In [None]:
mu.pp.filter_obs(atac, "leiden", lambda x: ~x.isin(["9"]))

In [None]:
new_cluster_names = {
    "0": "oligodendrocyte",
    "1": "oligodendrocyte",
    "3": "OPC",
    "7": "microglia",
    "2": "astrocyte",
    "8": "astrocyte",
    "4": "excitatory",
    "5": "inhibitory1",
    "6": "inhibitory2",
    "10": "unk"
}

In [None]:
atac.obs['celltype'] = [new_cluster_names[cl] for cl in atac.obs.leiden.astype("str").values]
atac.obs.celltype = atac.obs.celltype.astype("category")

In [None]:
sc.pl.umap(atac, color="celltype")

## 3. Multi-omics integration
Discard cells that are not in both modalities.

In [None]:
mdata.update()

In [None]:
mu.pp.intersect_obs(mdata)

In [None]:
mu.tl.mofa(mdata, n_factors=20, outfile="brain3k_mofa_model.hdf5", gpu_mode=True)

In [None]:
sc.pp.neighbors(mdata, use_rep="X_mofa")
sc.tl.umap(mdata, random_state=1)

In [None]:
mdata.obsm["X_mofa_umap"] = mdata.obsm["X_umap"]

In [None]:
mu.pl.embedding(mdata, basis="X_mofa_umap", color=["rna:celltype", "atac:celltype"])

In [None]:
# Reference: https://github.com/scverse/muon/issues/65
mdata.mod["atac"].uns = {}
mdata.mod["rna"].uns = {}
mdata.uns = {}

In [None]:
rna.var.gene_ids = rna.var.gene_ids.astype("str")
atac.var.gene_ids = atac.var.gene_ids.astype("str")

In [None]:
atac.obsm["X_hvg"] = atac[:, atac.var["highly_variable"]].copy().X
rna.obsm["X_hvg"] = rna[:, rna.var["highly_variable"]].copy().X

In [None]:
mdata.mod["atac"] = optimize_adata(
    atac,
    obs_cols=["leiden", "celltype"],
    obsm_keys=["X_pca", "X_umap", "X_hvg"],
    var_cols=["gene_ids", "feature_types", "genome", "interval", "highly_variable"],
    layer_keys=["counts", "lognorm"]
)
mdata.mod["rna"] = optimize_adata(
    rna,
    obs_cols=["leiden", "celltype"],
    obsm_keys=["X_pca", "X_umap", "X_hvg"],
    var_cols=["gene_ids", "feature_types", "genome", "interval", "highly_variable"],
    layer_keys=["counts", "lognorm"]
)

In [None]:
# Fix issue during writing to zarr - this column contains bool and NaN
mdata.var["rna:mt"] = mdata.var["rna:mt"].astype(str)

In [None]:
# TODO: sort var axis by genome (ATAC) and hierarchical clustering (RNA)

In [None]:
mdata

In [None]:
mdata.write_zarr(join("data", "brain3k_processed.mdata.zarr"))

## Configure visualization

In [None]:
from mudata import read_zarr

In [None]:
!pwd

In [None]:
mdata = read_zarr(join("..", "data", "brain3k_processed.mdata.zarr"))
mdata

In [None]:
mdata.mod["atac"].var

In [None]:
in_mtx = mdata.mod["atac"].layers["lognorm"]
in_clusters_df = mdata.mod["atac"].obs
in_barcodes_df = mdata.mod["atac"].obs
in_bins_df = mdata.mod["atac"].var

In [None]:
in_bins_df

In [None]:
from vitessce.entities import GenomicProfiles
import zarr
import math

In [None]:
# The bin datafram consists of one column like chrName:binStart-binEnd
def convert_bin_name_to_chr_name(bin_name):
    try:
        return bin_name[:bin_name.index(':')]
    except ValueError:
        return np.nan

def convert_bin_name_to_chr_start(bin_name):
    try:
        return int(bin_name[bin_name.index(':') + 1:bin_name.index('-')])
    except ValueError:
        return np.nan

def convert_bin_name_to_chr_end(bin_name):
    try:
        return int(bin_name[bin_name.index('-') + 1:])
    except ValueError:
        return np.nan


in_bins_df["chr_name"] = in_bins_df["interval"].apply(
    convert_bin_name_to_chr_name)
in_bins_df["chr_start"] = in_bins_df["interval"].apply(
    convert_bin_name_to_chr_start)
in_bins_df["chr_end"] = in_bins_df["interval"].apply(
    convert_bin_name_to_chr_end)

# Drop any rows that had incorrect bin strings (missing a chromosome name, bin start, or bin end value).
in_bins_df = in_bins_df.dropna(
    subset=["chr_name", "chr_start", "chr_end"]).copy()

In [None]:
# Ensure that the columns have the expected types.
in_bins_df["chr_name"] = in_bins_df["chr_name"].astype(str)
in_bins_df["chr_start"] = in_bins_df["chr_start"].astype(int)
in_bins_df["chr_end"] = in_bins_df["chr_end"].astype(int)

in_bins_df = in_bins_df.reset_index(drop=True)
in_bins_df = in_bins_df.drop(columns=["gene_ids", "feature_types", "genome", "highly_variable"])

In [None]:
in_bins_df

In [None]:
interval_sizes = in_bins_df.apply(lambda row: row["chr_end"] - row["chr_start"], axis='columns')
interval_sizes.max()

In [None]:
# Round bins
in_bins_df["chr_start_round"] = in_bins_df["chr_start"].apply(lambda x: math.floor(x / starting_resolution) * starting_resolution + 1)
in_bins_df["chr_end_round"] = in_bins_df["chr_start_round"].apply(lambda x: x + starting_resolution - 1)
# TODO: should the values need to be scaled based on the ratio of the original size of the interval to the rounded size?

In [None]:
# Replace the original start/end values
in_bins_df["chr_start"] = in_bins_df["chr_start_round"]
in_bins_df["chr_end"] = in_bins_df["chr_end_round"]
in_bins_df = in_bins_df.drop(columns=["chr_start_round", "chr_end_round"])
in_bins_df["interval"] = in_bins_df.apply(lambda r: f"{r['chr_name']}:{r['chr_start']}-{r['chr_end']}", axis='columns')

In [None]:
cluster_ids = in_clusters_df["celltype"].unique().tolist()
cluster_ids

In [None]:
cluster_paths = [["Cell Type", cluster_id] for cluster_id in cluster_ids]
starting_resolution=5000

In [None]:
# Create the Zarr store for the outputs.
out_f = zarr.open(join("..", "data", "brain3k.multivec.zarr"), mode='w')

In [None]:
genomic_profiles = GenomicProfiles(
    out_f, profile_paths=cluster_paths, assembly='hg38', starting_resolution=starting_resolution
)
chrom_name_to_length = genomic_profiles.chrom_name_to_length

In [None]:
# Create each chromosome dataset.
for chr_name, chr_len in chrom_name_to_length.items():
    # The bins dataframe frustratingly does not contain every bin.
    # We need to figure out which bins are missing.

    # We want to check for missing bins in each chromosome separately,
    # otherwise too much memory is used during the join step.
    chr_bins_in_df = in_bins_df.loc[in_bins_df["chr_name"] == chr_name]
    if chr_bins_in_df.shape[0] == 0:
        # No processing or output is necessary if there is no data for this chromosome.
        # Continue on through all resolutions of this chromosome to the next chromosome.
        continue
    # Determine the indices of the matrix at which the bins for this chromosome start and end.
    chr_bin_i_start = int(chr_bins_in_df.head(1).iloc[0].name)
    chr_bin_i_end = int(chr_bins_in_df.tail(1).iloc[0].name) + 1

    # Extract the part of the matrix corresponding to the current chromosome.
    chr_mtx = in_mtx[:, chr_bin_i_start:chr_bin_i_end]

    # Create a list of the "ground truth" bins (all bins from position 0 to the end of the chromosome).
    # We will join the input bins onto this dataframe to determine which bins are missing.
    chr_bins_gt_df = pd.DataFrame()
    chr_bins_gt_df["chr_start"] = np.arange(0, math.ceil(
        chr_len / starting_resolution)) * starting_resolution
    chr_bins_gt_df["chr_end"] = chr_bins_gt_df["chr_start"] + \
        starting_resolution
    chr_bins_gt_df["chr_start"] = chr_bins_gt_df["chr_start"] + 1
    chr_bins_gt_df["chr_start"] = chr_bins_gt_df["chr_start"].astype(
        int)
    chr_bins_gt_df["chr_end"] = chr_bins_gt_df["chr_end"].astype(int)
    chr_bins_gt_df["chr_name"] = chr_name
    chr_bins_gt_df[0] = chr_bins_gt_df.apply(lambda r: f"{r['chr_name']}:{r['chr_start']}-{r['chr_end']}", axis='columns')
    
    # We will add a new column "i", which should match the _old_ index, so that we will be able join with the data matrix on the original indices.
    # For the new rows, we will add values for the "i" column that are greater than any of the original indices,
    # to prevent any joining with the incoming data matrix onto these bins for which the data is missing.
    chr_bins_in_df = chr_bins_in_df.reset_index(drop=True)
    chr_bins_in_df["i"] = chr_bins_in_df.index.values
    chr_bins_gt_df["i"] = chr_bins_gt_df.index.values + \
        (in_mtx.shape[1] + 1)

    # Set the full bin string column as the index of both data frames.
    chr_bins_gt_df = chr_bins_gt_df.set_index(0)
    chr_bins_in_df = chr_bins_in_df.set_index("interval")

    # Join the input bin subset dataframe right onto the full bin ground truth dataframe.
    chr_bins_in_join_df = chr_bins_in_df.join(
        chr_bins_gt_df, how='right', lsuffix="", rsuffix="_gt")
    # The bins which were not present in the input will have NaN values in the "i" column.
    # For these rows, we replace the NaN values with the much higher "i_gt" values which will not match to any index of the data matrix.
    chr_bins_in_join_df["i"] = chr_bins_in_join_df.apply(
        lambda r: r['i'] if pd.notna(r['i']) else r['i_gt'], axis='columns').astype(int)

    # Clean up the joined data frame by removing unnecessary columns.
    chr_bins_in_join_df = chr_bins_in_join_df.drop(
        columns=['chr_name', 'chr_start', 'chr_end', 'i_gt'])
    chr_bins_in_join_df = chr_bins_in_join_df.rename(
        columns={'chr_name_gt': 'chr_name', 'chr_start_gt': 'chr_start', 'chr_end_gt': 'chr_end'})

    # Create a dataframe from the data matrix, so that we can join to the joined bins dataframe.
    chr_mtx_df = pd.DataFrame(data=chr_mtx.T)

    chr_bins_i_df = chr_bins_in_join_df.drop(
        columns=['chr_name', 'chr_start', 'chr_end'])

    # Join the data matrix dataframe and the bins dataframe.
    # Bins that are missing from the data matrix will have "i" values higher than any of the data matrix dataframe row indices,
    # and therefore the data values for these bins in the resulting joined dataframe will all be NaN.
    chr_mtx_join_df = chr_bins_i_df.join(
        chr_mtx_df, how='left', on='i')
    # We fill in these NaN values with 0.
    chr_mtx_join_df = chr_mtx_join_df.fillna(value=0.0)

    # Drop the "i" column, since it is not necessary now that we have done the join.
    chr_mtx_join_df = chr_mtx_join_df.drop(columns=['i'])
    # Obtain the new full data matrix, which contains values for all bins of the chromosome.
    chr_mtx = chr_mtx_join_df.values.T
    
    # Fill in the Zarr store with data for each cluster.
    for cluster_index, cluster_id in enumerate(cluster_ids):
        # Get the list of cells in the current cluster.
        cluster_df = in_clusters_df.loc[in_clusters_df["celltype"]
                                        == cluster_id]
        cluster_cell_ids = cluster_df.index.values.tolist()
        cluster_cells_tf = (
            in_barcodes_df.index.to_series().isin(cluster_cell_ids)).values

        # Get the rows of the data matrix corresponding to the cells in this cluster.
        cluster_cell_by_bin_mtx = chr_mtx[cluster_cells_tf, :]
        # Take the sum of this cluster along the cells axis.
        cluster_profile = cluster_cell_by_bin_mtx.sum(axis=0)
        
        # For some reason the matrix can contain intervals past the end of the
        # chromosome according to the length from negspy,
        # so we only keep those bins that fit.
        profile_len = math.ceil(chr_len / starting_resolution)

        genomic_profiles.add_profile(
            cluster_profile[0:profile_len], chr_name, cluster_index)

In [3]:
from vitessce.wrappers import AbstractWrapper
from vitessce.repr import make_repr
from uuid import uuid4

In [4]:
class MultivecZarrWrapper(AbstractWrapper):

    def __init__(self, zarr_path=None, zarr_url=None, **kwargs):
        super().__init__(**kwargs)
        self._repr = make_repr(locals())
        if zarr_url is not None and zarr_path is not None:
            raise ValueError(
                "Did not expect zarr_path to be provided with zarr_url")
        if zarr_url is None and zarr_path is None:
            raise ValueError(
                "Expected either zarr_url or zarr_path to be provided")
        self._zarr_path = zarr_path
        self._zarr_url = zarr_url
        if self._zarr_path is not None:
            self.is_remote = False
        else:
            self.is_remote = True
        self.local_dir_uid = str(uuid4())

    def convert_and_save(self, dataset_uid, obj_i):
        # Only create out-directory if needed
        if not self.is_remote:
            super().convert_and_save(dataset_uid, obj_i)

        file_def_creator = self.make_genomic_profiles_file_def_creator(
            dataset_uid, obj_i)
        routes = self.make_genomic_profiles_routes(dataset_uid, obj_i)

        self.file_def_creators.append(file_def_creator)
        self.routes += routes

    def make_genomic_profiles_routes(self, dataset_uid, obj_i):
        if self.is_remote:
            return []
        else:
            return self.get_local_dir_route(dataset_uid, obj_i, self._zarr_path, self.local_dir_uid)

    def get_zarr_url(self, base_url="", dataset_uid="", obj_i=""):
        if self.is_remote:
            return self._zarr_url
        return self.get_local_dir_url(base_url, dataset_uid, obj_i, self.local_dir_uid)

    def make_genomic_profiles_file_def_creator(self, dataset_uid, obj_i):
        def genomic_profiles_file_def_creator(base_url):
            return {
                "fileType": "genomic-profiles.zarr",
                "url": self.get_zarr_url(base_url, dataset_uid, obj_i)
            }
        return genomic_profiles_file_def_creator



In [5]:
from vitessce import (
    VitessceConfig,
    ViewType as vt,
    CoordinationType as ct,
    FileType as ft,
    AnnDataWrapper,
    OmeTiffWrapper,
)

In [6]:
vc = VitessceConfig(schema_version="1.0.15", name='Multiome data', description='RNA+ATAC')

In [7]:
multivec_zarr = join("data", "brain3k.multivec.zarr")
rna_zarr = join("..", "data", "brain3k_processed.mdata.zarr", "mod", "rna")
atac_zarr = join("..", "data", "brain3k_processed.mdata.zarr", "mod", "atac")
joint_zarr = join("..", "data", "brain3k_processed.mdata.zarr")

In [8]:
dataset = vc.add_dataset(name='RNA+ATAC').add_object(AnnDataWrapper(
    # We run add_object with adata_path=rna_zarr first to add the cell-by-gene matrix and associated metadata.
    adata_path=rna_zarr,
    obs_embedding_paths=["obsm/X_umap"],
    obs_embedding_names=["UMAP"],
    obs_set_paths=["obs/celltype"],
    obs_set_names=["Cell Type"],
    obs_feature_matrix_path="obsm/X_hvg",
    feature_filter_path="var/highly_variable",
    # To be explicit that the features represent genes and gene expression, we specify that here.
    coordination_values={
        "featureType": "gene",
        "featureValueType": "expression"
    }
)).add_object(AnnDataWrapper(
    # We next run add_object with adata_path=adt_zarr to add the cell-by-ADT matrix and associated metadata.
    adata_path=atac_zarr,
    obs_embedding_paths=["obsm/X_umap"],
    obs_embedding_names=["UMAP"],
    obs_set_paths=["obs/celltype"],
    obs_set_names=["Cell Type"],
    obs_feature_matrix_path="obsm/X_hvg",
    feature_filter_path="var/highly_variable",
    # If the features do not represent genes and gene expression, we specify alternate values here.
    coordination_values={
        "featureType": "peak",
        "featureValueType": "count"
    }
)).add_object(MultivecZarrWrapper(
    # We next run add_object with adata_path=adt_zarr to add the cell-by-ADT matrix and associated metadata.
    zarr_path=multivec_zarr,
))

In [9]:
genomic_profiles = vc.add_view(vt.GENOMIC_PROFILES, dataset=dataset)
scatter = vc.add_view(vt.SCATTERPLOT, dataset=dataset, mapping = "UMAP")
cell_sets = vc.add_view(vt.OBS_SETS, dataset=dataset)

vc.layout(genomic_profiles / (scatter | cell_sets));


In [10]:
vw = vc.widget(height=800)
vw

VitessceWidget(config={'version': '1.0.15', 'name': 'Multiome data', 'description': 'RNA+ATAC', 'datasets': [{…

In [None]:
umap_scatterplot_by_rna = vc.add_view(vt.SCATTERPLOT, dataset=dataset, mapping="UMAP")
umap_scatterplot_by_atac = vc.add_view(vt.SCATTERPLOT, dataset=dataset, mapping="UMAP")

gene_list = vc.add_view(vt.FEATURE_LIST, dataset=dataset)
peak_list = vc.add_view(vt.FEATURE_LIST, dataset=dataset)

rna_heatmap = vc.add_view(vt.HEATMAP, dataset=dataset).set_props(transpose=False)
atac_heatmap = vc.add_view(vt.HEATMAP, dataset=dataset).set_props(transpose=False)

In [None]:
# We need to specify which of the two features (i.e., genes or tags) the different plots correspond to.
# We also need to make sure the selection of genes and tags are scoped to only the corresponding plots,
# and we want to make sure the color mappings are independent for each modality.
coordination_types = [ct.FEATURE_TYPE, ct.FEATURE_VALUE_TYPE, ct.FEATURE_SELECTION, ct.OBS_COLOR_ENCODING, ct.FEATURE_VALUE_COLORMAP_RANGE]
vc.link_views([umap_scatterplot_by_rna, gene_list, rna_heatmap], coordination_types, ["gene", "expression", None, 'cellSetSelection', [0.0, 0.3]])
vc.link_views([umap_scatterplot_by_atac, peak_list, atac_heatmap], coordination_types, ["peak", "count", None, 'cellSetSelection', [0.0, 1.0]])

# We can link the two scatterplots on their zoom level and (X,Y) center point so that zooming/panning is coordinated.
vc.link_views([umap_scatterplot_by_rna, umap_scatterplot_by_atac], [ct.EMBEDDING_ZOOM, ct.EMBEDDING_TARGET_X, ct.EMBEDDING_TARGET_Y], [3, 0, 0])

In [None]:
# We define a layout for the plots using two rows.
# In the first row, we add the three gene-related visualizations,
# and in the second row, we add the three ADT-related visualizations.
vc.layout(
    (rna_heatmap | (umap_scatterplot_by_rna | gene_list))
    / (atac_heatmap | (umap_scatterplot_by_atac | peak_list))
);

In [None]:
vw = vc.widget()
vw