# Vitessce Widget Tutorial

# Visualization of a SpatialData object

## Import dependencies


In [1]:
import os
from os.path import join, isfile, isdir
from urllib.request import urlretrieve
import zipfile
import shutil

from vitessce import (
    VitessceConfig,
    ViewType as vt,
    CoordinationType as ct,
    CoordinationLevel as CL,
    SpatialDataWrapper,
    get_initial_coordination_scope_prefix
)

from vitessce.data_utils import (
    sdata_morton_sort_points,
    sdata_morton_query_rect,
)



In [2]:
import pandas as pd
import numpy as np

In [3]:
from spatialdata import read_zarr, get_element_annotators

In [4]:
data_dir = "data"
zip_filepath = join(data_dir, "xenium_rep1_io.spatialdata.zarr.zip")
spatialdata_filepath = join(data_dir, "xenium_rep1_io.spatialdata.zarr")

In [5]:
if not isdir(spatialdata_filepath):
    if not isfile(zip_filepath):
        os.makedirs(data_dir, exist_ok=True)
        urlretrieve('https://s3.embl.de/spatialdata/spatialdata-sandbox/xenium_rep1_io.zip', zip_filepath)
    with zipfile.ZipFile(zip_filepath,"r") as zip_ref:
        zip_ref.extractall(data_dir)
        os.rename(join(data_dir, "data.zarr"), spatialdata_filepath)
        
        # This Xenium dataset has an AnnData "raw" element.
        # Reference: https://github.com/giovp/spatialdata-sandbox/issues/55
        raw_dir = join(spatialdata_filepath, "tables", "table", "raw")
        if isdir(raw_dir):
            shutil.rmtree(raw_dir)

In [6]:
sdata = read_zarr(spatialdata_filepath)
sdata

version mismatch: detected: RasterFormatV02, requested: FormatV04
  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)
version mismatch: detected: RasterFormatV02, requested: FormatV04


SpatialData object, with associated Zarr store: /Users/mkeller/research/dbmi/vitessce/vitessce-python/docs/notebooks/data/xenium_rep1_io.spatialdata.zarr
├── Images
│     ├── 'morphology_focus': DataTree[cyx] (1, 25778, 35416), (1, 12889, 17708), (1, 6444, 8854), (1, 3222, 4427), (1, 1611, 2213)
│     └── 'morphology_mip': DataTree[cyx] (1, 25778, 35416), (1, 12889, 17708), (1, 6444, 8854), (1, 3222, 4427), (1, 1611, 2213)
├── Points
│     └── 'transcripts': DataFrame with shape: (<Delayed>, 8) (3D points)
├── Shapes
│     ├── 'cell_boundaries': GeoDataFrame shape: (167780, 1) (2D shapes)
│     └── 'cell_circles': GeoDataFrame shape: (167780, 2) (2D shapes)
└── Tables
      └── 'table': AnnData (167780, 313)
with coordinate systems:
    ▸ 'global', with elements:
        morphology_focus (Images), morphology_mip (Images), transcripts (Points), cell_boundaries (Shapes), cell_circles (Shapes)

In [7]:
sdata.points['transcripts'].head()

Unnamed: 0,x,y,z,feature_name,cell_id,overlaps_nucleus,transcript_id,qv
0,4.395842,328.666473,12.019493,SEC11C,565,0,281474976710656,18.662479
1,5.074415,236.964844,7.60851,NegControlCodeword_0502,540,0,281474976710657,18.634956
2,4.702023,322.79715,12.289083,SEC11C,562,0,281474976710658,18.662479
3,4.906601,581.42865,11.222615,DAPK3,271,0,281474976710659,20.821745
4,5.660699,720.851746,9.265523,TCIM,291,0,281474976710660,18.017488


In [8]:
sdata_morton_sort_points(sdata, "transcripts")

  self._check_key(key, self.keys(), self._shared_keys)


SpatialData object, with associated Zarr store: /Users/mkeller/research/dbmi/vitessce/vitessce-python/docs/notebooks/data/xenium_rep1_io.spatialdata.zarr
├── Images
│     ├── 'morphology_focus': DataTree[cyx] (1, 25778, 35416), (1, 12889, 17708), (1, 6444, 8854), (1, 3222, 4427), (1, 1611, 2213)
│     └── 'morphology_mip': DataTree[cyx] (1, 25778, 35416), (1, 12889, 17708), (1, 6444, 8854), (1, 3222, 4427), (1, 1611, 2213)
├── Points
│     └── 'transcripts': DataFrame with shape: (<Delayed>, 11) (3D points)
├── Shapes
│     ├── 'cell_boundaries': GeoDataFrame shape: (167780, 1) (2D shapes)
│     └── 'cell_circles': GeoDataFrame shape: (167780, 2) (2D shapes)
└── Tables
      └── 'table': AnnData (167780, 313)
with coordinate systems:
    ▸ 'global', with elements:
        morphology_focus (Images), morphology_mip (Images), transcripts (Points), cell_boundaries (Shapes), cell_circles (Shapes)

In [9]:
sdata.points["transcripts"].head()

Unnamed: 0,x,y,z,feature_name,cell_id,overlaps_nucleus,transcript_id,qv,x_uint,y_uint,morton_code_2d
677718,16.058575,17.155981,6.504758,ERBB2,-1,0,281474977398719,40.0,156,152,50128
542861,49.912006,13.121742,6.657761,TOMM7,-1,0,281474977261746,20.909292,451,104,96389
817519,50.067265,14.15489,6.614446,SERHL2,-1,0,281474977540499,40.0,452,116,96816
600252,40.367912,24.953489,6.542048,SCD,-1,0,281474977320069,40.0,367,246,114301
518878,56.408573,16.603086,6.755222,TPD52,-1,0,281474977237434,20.62504,507,146,120653


In [10]:
sdata.points['transcripts'].attrs

{'transform': {'global': Scale (x, y, z)
      [4.70588235 4.70588235 1.        ]},
 'spatialdata_attrs': {'feature_key': 'feature_name',
  'instance_key': 'cell_id'},
 'bounding_box': {'x_min': -1.8734135627746582,
  'x_max': 7522.74609375,
  'y_min': 4.415738582611084,
  'y_max': 5473.509765625}}

In [11]:
orig_rect = [[50, 50], [100, 150]] # x0, y0, x1, y1
rect_row_ranges = sdata_morton_query_rect(sdata, "transcripts", orig_rect)

In [12]:
rect_row_ranges

[(3545, 3546),
 (3727, 3731),
 (3850, 3866),
 (3878, 3919),
 (3963, 4094),
 (4097, 4134),
 (4136, 4145),
 (4224, 4274),
 (4277, 4279),
 (4281, 4312),
 (4314, 4317),
 (4866, 4905),
 (4933, 4960),
 (5193, 5226),
 (5296, 5301),
 (5309, 5318),
 (5326, 5327),
 (7035, 7146),
 (7158, 7159),
 (7196, 7197),
 (7201, 7202),
 (7210, 7253),
 (7255, 7270),
 (7328, 7348)]

In [17]:
df = sdata.points["transcripts"].compute()

In [30]:
# Convert list of (start, end) tuples to flat list of individual integer indices
row_indices = []
for (row_i, row_j) in rect_row_ranges:
    row_indices.extend(list(range(row_i, row_j)))

# Subset pandas df using matching row indices
df.iloc[row_indices, :]

Unnamed: 0,x,y,z,feature_name,cell_id,overlaps_nucleus,transcript_id,qv,x_uint,y_uint,morton_code_2d
658837,52.459900,93.118103,6.898128,CLDN4,-1,0,281474977379514,40.000000,473,1062,2185577
600591,50.805218,120.523071,10.561942,SVIL,18970,0,281474977320410,14.278772,458,1391,2324718
658809,50.356701,120.712105,9.161491,PDGFRA,18970,0,281474977379486,18.520607,454,1393,2325014
118808,50.859150,120.683838,9.550999,POSTN,18970,0,281474976831060,24.251556,459,1393,2325063
518751,51.994984,121.755920,8.629100,POSTN,18970,0,281474977237307,40.000000,469,1406,2325433
...,...,...,...,...,...,...,...,...,...,...,...
543890,97.529213,146.733734,19.274723,RAB30,19006,0,281474977262780,23.581987,865,1705,2989187
348074,99.394630,146.031845,20.747622,CDH1,19006,0,281474977064057,17.888988,881,1696,2989313
105517,99.517250,146.379028,20.244516,KRT7,19006,0,281474976817528,40.000000,883,1701,2989351
478892,100.051506,146.796509,22.307613,SMS,19006,0,281474977196826,40.000000,887,1706,2989469


In [None]:
import dask.dataframe as dd

# Construct dask dataframe of points in range 100x200:
toy_df = pd.DataFrame(index=[], data=[], columns=["x", "y"])
toy_df["x"] = np.random.uniform(low=0.0, high=100.0, size=20)
toy_df["y"] = np.random.uniform(low=0.0, high=200.0, size=20)

toy_ddf = dd.from_pandas(toy_df, npartitions=2)

In [None]:
# Compute morton codes
toy_ddf = norm_ddf_to_uint(toy_ddf)
toy_ddf["morton_code_2d"] = morton_interleave(toy_ddf)
sorted_ddf = toy_ddf.sort_values(by="morton_code_2d", ascending=True).compute()

## Configure Vitessce

Vitessce needs to know which pieces of data we are interested in visualizing, the visualization types we would like to use, and how we want to coordinate (or link) the views.

In [None]:
vc = VitessceConfig(
    schema_version="1.0.18",
    name='MERFISH SpatialData Demo',
)
# Add data to the configuration:
wrapper = SpatialDataWrapper(
    sdata_path=spatialdata_filepath,
    # The following paths are relative to the root of the SpatialData zarr store on-disk.
    image_path="images/rasterized",
    table_path="tables/table",
    obs_feature_matrix_path="tables/table/X",
    obs_spots_path="shapes/cells",
    coordinate_system="global",
    coordination_values={
        # The following tells Vitessce to consider each observation as a "spot"
        "obsType": "cell",
    }
)
dataset = vc.add_dataset(name='MERFISH').add_object(wrapper)

# Add views (visualizations) to the configuration:
spatial = vc.add_view("spatialBeta", dataset=dataset)
feature_list = vc.add_view("featureList", dataset=dataset)
layer_controller = vc.add_view("layerControllerBeta", dataset=dataset)
obs_sets = vc.add_view("obsSets", dataset=dataset)

vc.link_views_by_dict([spatial, layer_controller], {
    'spotLayer': CL([{
        'obsType': 'cell',
    }]),
}, scope_prefix=get_initial_coordination_scope_prefix("A", "obsSpots"))

vc.link_views([spatial, layer_controller, feature_list, obs_sets], ['obsType'], [wrapper.obs_type_label])

# Layout the views
vc.layout(spatial | (feature_list / layer_controller / obs_sets));

### Render the widget

In [None]:
vw = vc.widget()
vw