# Vitessce Widget Tutorial

# Visualization of a SpatialData object

## Import dependencies


In [None]:
import os
from os.path import join, isfile, isdir
from urllib.request import urlretrieve
import zipfile
import shutil

from vitessce import (
    VitessceConfig,
    ViewType as vt,
    CoordinationType as ct,
    CoordinationLevel as CL,
    SpatialDataWrapper,
    get_initial_coordination_scope_prefix
)

from vitessce.data_utils import (
    sdata_morton_sort_points,
    sdata_points_process_columns,
    sdata_points_write_bounding_box_attrs,
    sdata_points_modify_row_group_size,
    sdata_morton_query_rect,
)

In [None]:
from spatialdata import read_zarr

In [None]:
data_dir = "data"
zip_filepath = join(data_dir, "xenium_rep1_io.spatialdata.zarr.zip")
spatialdata_filepath = join(data_dir, "xenium_rep1_io.spatialdata.zarr")

In [None]:
if not isdir(spatialdata_filepath):
    if not isfile(zip_filepath):
        os.makedirs(data_dir, exist_ok=True)
        urlretrieve('https://s3.embl.de/spatialdata/spatialdata-sandbox/xenium_rep1_io.zip', zip_filepath)
    with zipfile.ZipFile(zip_filepath,"r") as zip_ref:
        zip_ref.extractall(data_dir)
        os.rename(join(data_dir, "data.zarr"), spatialdata_filepath)
        
        # This Xenium dataset has an AnnData "raw" element.
        # Reference: https://github.com/giovp/spatialdata-sandbox/issues/55
        raw_dir = join(spatialdata_filepath, "tables", "table", "raw")
        if isdir(raw_dir):
            shutil.rmtree(raw_dir)

In [None]:
sdata = read_zarr(spatialdata_filepath)
sdata

In [None]:
sdata["transcripts"].shape[0].compute()

In [None]:
sdata.tables["table"].X = sdata.tables["table"].X.toarray()
sdata.tables["dense_table"] = sdata.tables["table"]
sdata.write_element("dense_table")

In [None]:
# TODO: store the two separate images as a single image with two channels.
# Similar to https://github.com/EricMoerthVis/tissue-map-tools/pull/12

In [None]:
sdata.tables['table'].obs

In [None]:
sdata

In [None]:
sdata.points['transcripts'].head()

## Sorting Points and creating a new Points element in the SpatialData object

### Step 1. Sort rows with `sdata_morton_sort_points`

In [None]:
sdata = sdata_morton_sort_points(sdata, "transcripts")

### Step 2. Clean up columns with `sdata_points_process_columns`

In [None]:
# Add feature_index column to dataframe, and reorder columns so that feature_name (dict column) is the rightmost column.
ddf = sdata_points_process_columns(sdata, "transcripts", var_name_col="feature_name", table_name="table")

In [None]:
ddf.head()

### Step 3. Save sorted dataframe to new Points element

In [None]:
sdata["transcripts_with_morton_codes"] = ddf
sdata.write_element("transcripts_with_morton_codes")

### Step 4. Write bounding box metadata with `sdata_points_write_bounding_box_attrs`

In [None]:
sdata_points_write_bounding_box_attrs(sdata, "transcripts_with_morton_codes")

### Step 5. Modify the row group sizes of the Parquet files with `sdata_points_modify_row_group_size`

In [None]:
sdata_points_modify_row_group_size(sdata, "transcripts_with_morton_codes", row_group_size=25_000)

In [None]:
# Done

In [None]:
# Optionally, check the number of row groups in one of the parquet file parts.
import pyarrow.parquet as pq
from os.path import join

parquet_file = pq.ParquetFile(join(sdata.path, "points", "transcripts_with_morton_codes", "points.parquet", "part.0.parquet"))

# Get the number of row groups in this part-0 file.
num_groups = parquet_file.num_row_groups
num_groups

## Configure Vitessce

Vitessce needs to know which pieces of data we are interested in visualizing, the visualization types we would like to use, and how we want to coordinate (or link) the views.

In [None]:
vc = VitessceConfig(
    schema_version="1.0.18",
    name='MERFISH SpatialData Demo',
)
# Add data to the configuration:
wrapper = SpatialDataWrapper(
    sdata_path=spatialdata_filepath,
    # The following paths are relative to the root of the SpatialData zarr store on-disk.
    image_path="images/rasterized",
    table_path="tables/table",
    obs_feature_matrix_path="tables/table/X",
    obs_spots_path="shapes/cells",
    coordinate_system="global",
    coordination_values={
        # The following tells Vitessce to consider each observation as a "spot"
        "obsType": "cell",
    }
)
dataset = vc.add_dataset(name='MERFISH').add_object(wrapper)

# Add views (visualizations) to the configuration:
spatial = vc.add_view("spatialBeta", dataset=dataset)
feature_list = vc.add_view("featureList", dataset=dataset)
layer_controller = vc.add_view("layerControllerBeta", dataset=dataset)
obs_sets = vc.add_view("obsSets", dataset=dataset)

vc.link_views_by_dict([spatial, layer_controller], {
    'spotLayer': CL([{
        'obsType': 'cell',
    }]),
}, scope_prefix=get_initial_coordination_scope_prefix("A", "obsSpots"))

vc.link_views([spatial, layer_controller, feature_list, obs_sets], ['obsType'], [wrapper.obs_type_label])

# Layout the views
vc.layout(spatial | (feature_list / layer_controller / obs_sets));

### Render the widget

In [None]:
vw = vc.widget()
vw