# Vitessce Widget Tutorial

# Visualization of a SpatialData object

## Import dependencies


In [16]:
import os
from os.path import join, isfile, isdir
from urllib.request import urlretrieve
import zipfile
import shutil

from vitessce import (
    VitessceConfig,
    ViewType as vt,
    CoordinationType as ct,
    CoordinationLevel as CL,
    SpatialDataWrapper,
    get_initial_coordination_scope_prefix
)

In [17]:
data_dir = "data"
zip_filepath = join(data_dir, "xenium_rep1_io.spatialdata.zarr.zip")
spatialdata_filepath = join(data_dir, "xenium_rep1_io.spatialdata.zarr")

In [21]:
if not isdir(spatialdata_filepath):
    if not isfile(zip_filepath):
        os.makedirs(data_dir, exist_ok=True)
        urlretrieve('https://s3.embl.de/spatialdata/spatialdata-sandbox/xenium_rep1_io.zip', zip_filepath)
    with zipfile.ZipFile(zip_filepath,"r") as zip_ref:
        zip_ref.extractall(data_dir)
        os.rename(join(data_dir, "data.zarr"), spatialdata_filepath)
        
        # This Xenium dataset has an AnnData "raw" element.
        # Reference: https://github.com/giovp/spatialdata-sandbox/issues/55
        raw_dir = join(spatialdata_filepath, "tables", "table", "raw")
        if isdir(raw_dir):
            shutil.rmtree(raw_dir)

In [22]:
from spatialdata import read_zarr

In [23]:
sdata = read_zarr(spatialdata_filepath)
sdata

version mismatch: detected: RasterFormatV02, requested: FormatV04
  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)
version mismatch: detected: RasterFormatV02, requested: FormatV04


SpatialData object, with associated Zarr store: /Users/mkeller/research/dbmi/vitessce/vitessce-python/docs/notebooks/data/xenium_rep1_io.spatialdata.zarr
├── Images
│     ├── 'morphology_focus': DataTree[cyx] (1, 25778, 35416), (1, 12889, 17708), (1, 6444, 8854), (1, 3222, 4427), (1, 1611, 2213)
│     └── 'morphology_mip': DataTree[cyx] (1, 25778, 35416), (1, 12889, 17708), (1, 6444, 8854), (1, 3222, 4427), (1, 1611, 2213)
├── Points
│     └── 'transcripts': DataFrame with shape: (<Delayed>, 8) (3D points)
├── Shapes
│     ├── 'cell_boundaries': GeoDataFrame shape: (167780, 1) (2D shapes)
│     └── 'cell_circles': GeoDataFrame shape: (167780, 2) (2D shapes)
└── Tables
      └── 'table': AnnData (167780, 313)
with coordinate systems:
    ▸ 'global', with elements:
        morphology_focus (Images), morphology_mip (Images), transcripts (Points), cell_boundaries (Shapes), cell_circles (Shapes)

In [24]:
sdata.points['transcripts'].shape[0].compute()

42638083

In [None]:
xi = df["X"].astype(np.uint32)
yi = df["Y"].astype(np.uint32)
codes = morton_interleave(xi, yi, bits=32)   # 64-bit Morton code

In [113]:
import pandas as pd
import numpy as np
from spatialdata import get_element_annotators

In [41]:
ddf = sdata.points['transcripts']

In [50]:
[x_min, x_max, y_min, y_max] = [ddf["x"].min().compute(), ddf["x"].max().compute(), ddf["y"].min().compute(), ddf["y"].max().compute()]

In [33]:
df = sdata.points['transcripts'].head(10)
df

Unnamed: 0,x,y,z,feature_name,cell_id,overlaps_nucleus,transcript_id,qv
0,4.395842,328.666473,12.019493,SEC11C,565,0,281474976710656,18.662479
1,5.074415,236.964844,7.60851,NegControlCodeword_0502,540,0,281474976710657,18.634956
2,4.702023,322.79715,12.289083,SEC11C,562,0,281474976710658,18.662479
3,4.906601,581.42865,11.222615,DAPK3,271,0,281474976710659,20.821745
4,5.660699,720.851746,9.265523,TCIM,291,0,281474976710660,18.017488
5,5.899098,748.592773,9.818688,TCIM,297,0,281474976710661,18.017488
6,6.249354,219.854141,10.27125,NKG7,536,0,281474976710662,40.0
7,7.776,878.157532,12.464459,RAPGEF3,1089,0,281474976710663,20.488186
8,6.397148,232.495712,7.837698,PPARG,540,0,281474976710664,35.338028
9,6.493312,211.362808,10.820307,RAPGEF3,532,0,281474976710665,40.0


In [45]:
MORTON_CODE_NUM_BITS = 32 # Resulting morton codes will be stored as uint32.
MORTON_CODE_VALUE_MIN = 0
MORTON_CODE_VALUE_MAX = 2**(MORTON_CODE_NUM_BITS/2) - 1

In [46]:
MORTON_CODE_VALUE_MAX

65535.0

In [80]:
def norm_series_to_uint(series, v_min, v_max):
    """
    Scale numeric Series (int or float) to integer grid [0, 2^bits-1], handling NaNs.
    """
    # Cast to float64
    series_f64 = series.astype("float64")
    # Normalize the array values to be between 0.0 and 1.0
    norm_series_f64 = (series_f64 - v_min) / (v_max - v_min)
    # Clip to ensure no values are outside 0/1 range
    clipped_norm_series_f64 = np.clip(norm_series_f64, 0.0, 1.0)
    # Multiply by the morton code max-value to scale from [0,1] to [0,65535]
    out = (clipped_norm_series_f64 * MORTON_CODE_VALUE_MAX).astype(np.uint32)
    # Set NaNs to 0.
    out = out.fillna(0)
    return out

In [81]:
def norm_ddf_to_uint(ddf):
    [x_min, x_max, y_min, y_max] = [ddf["x"].min().compute(), ddf["x"].max().compute(), ddf["y"].min().compute(), ddf["y"].max().compute()]
    ddf["x_uint"] = norm_series_to_uint(ddf["x"], x_min, x_max)
    ddf["y_uint"] = norm_series_to_uint(ddf["y"], y_min, y_max)
    return ddf

In [126]:
def _part1by1_16(x):
    """
    Spread each 16-bit value into 32 bits by inserting zeros between bits.
    Input:  uint32 array (values must fit in 16 bits)
    Output: uint32 array (bit-spread)
    """
        
    assert x.dtype.name == 'uint32'
    
    # Mask away any bits above 16 (just in case input wasn't clean).
    x = x & np.uint32(0x0000FFFF)
    
    # First spread: shift left by 8 bits, OR with original, then mask.
    # After this, groups of 8 bits are separated by 8 zeros.
    # x = (x | (x << 8)) & np.uint32(0x00FF00FF)
    x = (x | np.left_shift(x, 8)) & np.uint32(0x00FF00FF)
    
    # Spread further: now groups of 4 bits separated by 4 zeros.
    x = (x | np.left_shift(x, 4)) & np.uint32(0x0F0F0F0F)
    
    # Spread further: groups of 2 bits separated by 2 zeros.
    x = (x | np.left_shift(x, 2)) & np.uint32(0x33333333)
    
    # Final spread: single bits separated by a zero bit.
    # Now each original bit is in every other position (positions 0,2,4,...).
    x = (x | np.left_shift(x, 1)) & np.uint32(0x55555555)
    
    return x

"""
def _part1by1_32(u32):
    #Spread each 32-bit value into 64 bits by inserting zeros between bits.
    #Input:  uint64 array (values must fit in 32 bits)
    #Output: uint64 array (bit-spread)

    # Mask away any bits above 32 (safety).
    x = u32.astype(np.uint64) & np.uint64(0x00000000FFFFFFFF)
    
    # First spread: separate into 16-bit chunks spaced out.
    x = (x | (x << 16)) & np.uint64(0x0000FFFF0000FFFF)
    
    # Spread further: each 8-bit chunk separated.
    x = (x | (x << 8)) & np.uint64(0x00FF00FF00FF00FF)
    
    # Spread further: each 4-bit nibble separated.
    x = (x | (x << 4)) & np.uint64(0x0F0F0F0F0F0F0F0F)
    
    # Spread further: 2-bit groups separated.
    x = (x | (x << 2)) & np.uint64(0x3333333333333333)
    
    # Final spread: single bits separated by zeros.
    # Now each original bit occupies every other position (0,2,4,...).
    x = (x | (x << 1)) & np.uint64(0x5555555555555555)
    
    return x
"""

def morton_interleave(ddf):
    """
    Vectorized Morton interleave for integer arrays xi, yi
    already scaled to [0, 2^bits - 1].
    Returns Morton codes as uint32 (if bits<=16) or uint64 (if bits<=32).
    """
    
    xi = ddf["x_uint"]
    yi = ddf["y_uint"]
    
    # Spread x and y bits into even (x) and odd (y) positions.
    xs = _part1by1_16(xi)
    ys = _part1by1_16(yi)

    # Interleave: shift y bits left by 1 so they go into odd positions,
    # then OR with x bits in even positions.
    code = np.left_shift(ys.astype(np.uint64), 1) | xs.astype(np.uint64)
        
    # Fits in 32 bits since we only had 16+16 input bits.
    return code.astype(np.uint32)

def sdata_morton_sort_points(sdata, element):
    ddf = sdata.points[element]
    
    # Compute morton codes
    ddf = norm_ddf_to_uint(ddf)
    ddf["morton_code_2d"] = morton_interleave(ddf)
    
    if "z" in ddf.columns:
        # TODO: include z and transcript_id as dimensions in the morton code?
        sorted_ddf = ddf.sort_values(by=["z", "morton_code_2d"], ascending=True)
    else:
        sorted_ddf = ddf.sort_values(by="morton_code_2d", ascending=True)
    sdata.points[element] = sorted_ddf
        
    # Sort any annotating table(s) as well.
    annotating_tables = get_element_annotators(sdata, element)
    
    return sdata
    

In [None]:
sdata_morton_sort_points(sdata, "transcripts")

In [None]:
sdata.points["transcripts"]

In [122]:
get_element_annotators(sdata, "transcripts")

set()

In [79]:
ddf["y_uint"].compute()

0          3885
1          2786
2          3815
3          6914
4          8584
          ...  
638078    63402
638079    57379
638080    64958
638081    57940
638082    56730
Name: y_uint, Length: 42638083, dtype: uint64

In [25]:
sdata.points['transcripts']

Unnamed: 0_level_0,x,y,z,feature_name,cell_id,overlaps_nucleus,transcript_id,qv
npartitions=8,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
,float32,float32,float32,category[unknown],int32,uint8,uint64,float32
,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...


## Configure Vitessce

Vitessce needs to know which pieces of data we are interested in visualizing, the visualization types we would like to use, and how we want to coordinate (or link) the views.

In [None]:
vc = VitessceConfig(
    schema_version="1.0.18",
    name='MERFISH SpatialData Demo',
)
# Add data to the configuration:
wrapper = SpatialDataWrapper(
    sdata_path=spatialdata_filepath,
    # The following paths are relative to the root of the SpatialData zarr store on-disk.
    image_path="images/rasterized",
    table_path="tables/table",
    obs_feature_matrix_path="tables/table/X",
    obs_spots_path="shapes/cells",
    coordinate_system="global",
    coordination_values={
        # The following tells Vitessce to consider each observation as a "spot"
        "obsType": "cell",
    }
)
dataset = vc.add_dataset(name='MERFISH').add_object(wrapper)

# Add views (visualizations) to the configuration:
spatial = vc.add_view("spatialBeta", dataset=dataset)
feature_list = vc.add_view("featureList", dataset=dataset)
layer_controller = vc.add_view("layerControllerBeta", dataset=dataset)
obs_sets = vc.add_view("obsSets", dataset=dataset)

vc.link_views_by_dict([spatial, layer_controller], {
    'spotLayer': CL([{
        'obsType': 'cell',
    }]),
}, scope_prefix=get_initial_coordination_scope_prefix("A", "obsSpots"))

vc.link_views([spatial, layer_controller, feature_list, obs_sets], ['obsType'], [wrapper.obs_type_label])

# Layout the views
vc.layout(spatial | (feature_list / layer_controller / obs_sets));

### Render the widget

In [None]:
vw = vc.widget()
vw