# Template: visualize FISH-based data with Vitessce

## Code to change

Places where you will need to edit the code are marked by `# TODO(template)` comments.

In [1]:
import os
import json
from os.path import join
from vitessce import (
    VitessceConfig,
    Component as cm,
    CoordinationType as ct,
    FileType as ft,
    AnnDataWrapper,
    OmeTiffWrapper,
    MultiImageWrapper,
    BASE_URL_PLACEHOLDER,
)
from vitessce.data_utils import (
    rgb_img_to_ome_tiff,
    multiplex_img_to_ome_tiff,
    optimize_adata,
    VAR_CHUNK_SIZE,
)
from anndata import read_h5ad, AnnData
import pandas as pd
import numpy as np
from tifffile import imread
from skimage.draw import disk

## Variables to fill in

In [2]:
#=== INPUTS ===
# TODO(template): specify the path to the .h5ad file containing cells and per-cell metadata
PATH_TO_INPUT_CELLS_H5AD = join('.', 'data', 'raw', 'codeluppi_2018_nature_methods.cells.h5ad')

# TODO(template): specify the path to the .tsv file containing molecule x-y coordinates
PATH_TO_INPUT_MOLECULES_TSV = join('.', 'data', 'raw', 'codeluppi_2018_nature_methods.molecules.tsv')
MOLECULES_TSV_X_COL = "x"
MOLECULES_TSV_Y_COL = "y"
MOLECULES_TSV_GENE_COL = "gene"

# TODO(template): specify the file path for the input background TIFF image
PATH_TO_INPUT_BACKGROUND_TIFF = join('.', 'data', 'raw', 'codeluppi_2018_nature_methods.image.tiff')
BACKGROUND_CHANNEL_NAMES = ['nuclei', 'polyT']
# TODO(template): specify the file path for the input cell segmentation bitmask TIFF image (assumed to have the same XY dimensions as the background image)
PATH_TO_INPUT_BITMASK_TIFF = join('.', 'data', 'raw', 'codeluppi_2018_nature_methods.bitmask.tiff')
BITMASK_CHANNEL_NAMES = ["cells"]

#=== OUTPUTS ===
# TODO(template): specify file paths for the converted AnnData objects
PATH_TO_OUTPUT_CELLS_ANNDATA_ZARR = join('.', 'processed_data', 'fish_based', 'output.cells.anndata.zarr')
PATH_TO_OUTPUT_MOLECULES_ANNDATA_ZARR = join('.', 'processed_data', 'fish_based', 'output.molecules.anndata.zarr')

# TODO(template): specify file paths for the converted OME-TIFF image and bitmask files
PATH_TO_OUTPUT_IMAGE_OME_TIFF = join('.', 'processed_data', 'fish_based', 'image.ome.tif')
PATH_TO_OUTPUT_IMAGE_PYRAMIDAL_OME_TIFF = join('.', 'processed_data', 'fish_based', 'image.pyramid.ome.tif')
PATH_TO_OUTPUT_BITMASK_OME_TIFF = join('.', 'processed_data', 'fish_based', 'bitmask.ome.tif')
PATH_TO_OUTPUT_BITMASK_PYRAMIDAL_OME_TIFF = join('.', 'processed_data', 'fish_based', 'bitmask.pyramid.ome.tif')


# TODO(template): this folder should not yet exist, but will be created in step 3.
PATH_TO_EXPORT_DIRECTORY = join('.', 'exported_fish_based_data')

# TODO(template): provide names and descriptions
CONFIG_NAME = 'My config'
CONFIG_DESCRIPTION = 'This dataset reveals...'
DATASET_NAME = 'My dataset'
IMG_NAME = 'My image'

## 1.1 Convert H5AD to AnnData-Zarr

In [3]:
cells_adata = read_h5ad(PATH_TO_INPUT_CELLS_H5AD)

In [4]:
# Create an integer index starting at 1 (0 is reserved for the background)
cells_adata.obs.index = list(range(1, cells_adata.shape[0]+1))
cells_adata.obs

Unnamed: 0,Cluster,Subcluster,Region
1,Inhibitory neurons,Inhibitory CP,Layer 6
2,Inhibitory neurons,Inhibitory CP,Internal Capsule Caudoputamen
3,Inhibitory neurons,Inhibitory CP,Layer 4
4,Inhibitory neurons,Inhibitory CP,Internal Capsule Caudoputamen
5,Inhibitory neurons,Inhibitory CP,Internal Capsule Caudoputamen
...,...,...,...
4835,Vasculature,Vascular Smooth Muscle,Layer 6
4836,Vasculature,Vascular Smooth Muscle,Layer 2-3 lateral
4837,Vasculature,Vascular Smooth Muscle,White matter
4838,Vasculature,Vascular Smooth Muscle,Layer 6


In [5]:
cells_adata

AnnData object with n_obs × n_vars = 4839 × 33
    obs: 'Cluster', 'Subcluster', 'Region'
    var: 'Fluorophore', 'Hybridization'
    obsm: 'X_centroid', 'X_pca', 'X_segmentations', 'X_spatial', 'X_tsne', 'X_umap'
    varm: 'PCs'
    layers: 'X_uint8'

In [6]:
cells_adata = optimize_adata(
    cells_adata,
    # TODO(template): Specify the columns and keys that will be used in the visualization.
    obs_cols=["Cluster", "Subcluster", "Region"],
    var_cols=["Fluorophore", "Hybridization"],
    obsm_keys=["X_pca", "X_tsne", "X_umap", "X_spatial"],
    optimize_X=True,
    # Vitessce plays nicely with dense matrices saved with chunking
    # and this one is small enough that dense is not a huge overhead.
    to_dense_X=True,
)

  adata = AnnData(X=new_X, obs=new_obs, var=new_var, obsm=new_obsm, varm=new_varm, layers=new_layers)


In [7]:
cells_adata.write_zarr(PATH_TO_OUTPUT_CELLS_ANNDATA_ZARR, chunks=[cells_adata.shape[0], VAR_CHUNK_SIZE])

## 1.2 Convert TSV to AnnData-Zarr

In [8]:
molecules_df = pd.read_csv(PATH_TO_INPUT_MOLECULES_TSV, sep="\t", index_col=0)
obs_df = molecules_df[[MOLECULES_TSV_GENE_COL]].rename(columns={ MOLECULES_TSV_GENE_COL: "gene" })
xy_arr = molecules_df[[MOLECULES_TSV_X_COL, MOLECULES_TSV_Y_COL]].values

molecules_adata = AnnData(
    obs=obs_df,
    obsm={ "X_spatial": xy_arr }
)

molecules_adata = optimize_adata(
    molecules_adata,
    # TODO(template): Specify the columns and keys that will be used in the visualization.
    obs_cols=["gene"],
    obsm_keys=["X_spatial"],
    remove_X=True
)

molecules_adata.write_zarr(PATH_TO_OUTPUT_MOLECULES_ANNDATA_ZARR)



## 1.2 Convert TIFFs to OME-TIFFs

### 1.2.1 Background image

In [9]:
img_arr = imread(PATH_TO_INPUT_BACKGROUND_TIFF)
img_arr.shape

(2, 12917, 7932)

In [10]:
# If needed, update the array axes so they are in CYX order to enable conversion to OME-TIFF.
#img_arr = img_arr.transpose((2, 0, 1))
#img_arr.shape

In [11]:
multiplex_img_to_ome_tiff(img_arr, BACKGROUND_CHANNEL_NAMES, PATH_TO_OUTPUT_IMAGE_OME_TIFF, axes="CYX")

In [12]:
# For larger images, you will want to comment out the above line and un-comment the line below,
# to increase the tile size (128 -> 512) and the number of pyramid resolutions (2 -> 6).
!BF_MAX_MEM=2048m ~/software/bftools/bfconvert -overwrite -tilex 512 -tiley 512 -pyramid-resolutions 6 -pyramid-scale 2 -compression LZW {PATH_TO_OUTPUT_IMAGE_OME_TIFF} {PATH_TO_OUTPUT_IMAGE_PYRAMIDAL_OME_TIFF}

./processed_data/fish_based/image.ome.tif
OMETiffReader initializing ./processed_data/fish_based/image.ome.tif
Reading IFDs
Populating metadata
[OME-TIFF] -> ./processed_data/fish_based/image.pyramid.ome.tif [OME-TIFF]
Tile size = 512 x 512
	Converted 1/2 planes (50%)
	Converted 2/2 planes (100%)
Tile size = 512 x 512
	Converted 1/2 planes (50%)
	Converted 2/2 planes (100%)
Tile size = 512 x 512
	Converted 1/2 planes (50%)
	Converted 2/2 planes (100%)
Tile size = 512 x 512
	Converted 1/2 planes (50%)
	Converted 2/2 planes (100%)
Tile size = 495 x 512
	Converted 2/2 planes (100%)
Tile size = 247 x 66
	Converted 2/2 planes (100%)
[done]
34.427s elapsed (171.08333+2623.3333ms per plane, 864ms overhead)


### 1.2.2 Segmentation bitmask / label image

In [13]:
bitmask_arr = imread(PATH_TO_INPUT_BITMASK_TIFF)
bitmask_arr.shape

(1, 12917, 7932)

In [14]:
# If needed, update the array axes so they are in CYX order to enable conversion to OME-TIFF.
#bitmask_arr = bitmask_arr.transpose((2, 0, 1))
#bitmask_arr.shape

In [15]:
multiplex_img_to_ome_tiff(bitmask_arr, BITMASK_CHANNEL_NAMES, PATH_TO_OUTPUT_BITMASK_OME_TIFF, axes="CYX")

In [16]:
# For larger images, you will want to comment out the above line and un-comment the line below,
# to increase the tile size (128 -> 512) and the number of pyramid resolutions (2 -> 6).
!BF_MAX_MEM=2048m ~/software/bftools/bfconvert -overwrite -tilex 512 -tiley 512 -pyramid-resolutions 6 -pyramid-scale 2 -compression LZW {PATH_TO_OUTPUT_BITMASK_OME_TIFF} {PATH_TO_OUTPUT_BITMASK_PYRAMIDAL_OME_TIFF}

./processed_data/fish_based/bitmask.ome.tif
OMETiffReader initializing ./processed_data/fish_based/bitmask.ome.tif
Reading IFDs
Populating metadata
[OME-TIFF] -> ./processed_data/fish_based/bitmask.pyramid.ome.tif [OME-TIFF]
Tile size = 512 x 512
Reading IFDs
Populating metadata
	Converted 1/1 planes (100%)
Tile size = 512 x 512
	Converted 1/1 planes (100%)
Tile size = 512 x 512
	Converted 1/1 planes (100%)
Tile size = 512 x 512
	Converted 1/1 planes (100%)
Tile size = 495 x 512
	Converted 1/1 planes (100%)
Tile size = 247 x 16
	Converted 1/1 planes (100%)
[done]
102.176s elapsed (768.3333+16125.833ms per plane, 776ms overhead)


## 2. Configure the visualization

In [17]:
vc = VitessceConfig(schema_version="1.0.15", name=CONFIG_NAME, description=CONFIG_DESCRIPTION)

dataset = vc.add_dataset(name=DATASET_NAME).add_object(AnnDataWrapper(
    adata_path=PATH_TO_OUTPUT_CELLS_ANNDATA_ZARR,
    # TODO(template): update the arrays of interest and where they are located in the AnnData object.
    obs_embedding_paths=["obsm/X_umap"],
    obs_embedding_names=["UMAP"],
    obs_set_paths=["obs/Cluster"],
    obs_set_names=["Cluster"],
    obs_feature_matrix_path="X",
    coordination_values={
        "obsType": "cell",
        "featureType": "gene",
        "featureValueType": "expression"
    }
)).add_object(AnnDataWrapper(
    adata_path=PATH_TO_OUTPUT_MOLECULES_ANNDATA_ZARR,
    # TODO(template): update the arrays of interest and where they are located in the AnnData object.
    obs_locations_path="obsm/X_spatial",
    obs_labels_path="obs/gene",
    coordination_values={
        "obsType": "molecule"
    }
)).add_object(MultiImageWrapper([
    OmeTiffWrapper(
        img_path=PATH_TO_OUTPUT_BITMASK_PYRAMIDAL_OME_TIFF,
        name="Cell segmentations",
        is_bitmask=True
    ),
    OmeTiffWrapper(
        img_path=PATH_TO_OUTPUT_IMAGE_PYRAMIDAL_OME_TIFF,
        name="Background image",
        is_bitmask=False
    ),
], use_physical_size_scaling=True))

# TODO(template): Update the views of interest.
spatial_colored_by_cluster = vc.add_view(cm.SPATIAL, dataset=dataset)
# TODO: update the mapping to match one of the elements of `obs_embedding_names` above, if necessary.
scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping="UMAP")

layer_controller = vc.add_view(cm.LAYER_CONTROLLER, dataset=dataset).set_props(disableChannelsIfRgbDetected=True)
spot_set_manager = vc.add_view(cm.OBS_SETS, dataset=dataset)
gene_list = vc.add_view(cm.FEATURE_LIST, dataset=dataset)
heatmap = vc.add_view(cm.HEATMAP, dataset=dataset).set_props(transpose=True)

# TODO(template): Update the layout of views.
vc.layout(
    (spatial_colored_by_cluster | scatterplot)
    / ((layer_controller | spot_set_manager) | (gene_list | heatmap))
);

In [18]:
# TODO(template): configure view coordinations and initial coordination values
spatial_views = [
    spatial_colored_by_cluster,
    layer_controller,
]
all_views = [
    *spatial_views,
    spot_set_manager,
    gene_list,
    heatmap,
    scatterplot,
]

spatial_point_layer_value = {
    "opacity": 1,
    "radius": 20,
    "visible": True
}

spatial_segmentation_layer_value = [{
    "type": "bitmask",
    "visible": True,
    "index": 0,
    "colormap": None,
    "transparentColor": None,
    "opacity": 1,
    "domainType": "Min/Max",
    "channels": [
        {
          "selection": { "c": 0, "t": 0, "z": 0 },
          "color": [255, 0, 0],
          "visible": True,
          "slider": [0, 1]
        }
    ]
}]

spatial_image_layer_value = [{
    "type": "raster",
    "index": 0,
    "colormap": None,
    "transparentColor": None,
    "opacity": 1,
    "domainType": "Min/Max",
    "channels": [
        {
          "selection": { "c": 0, "t": 0, "z": 0 },
          "color": [255, 0, 0],
          "visible": True,
          "slider": [0, 255]
        },
        {
          "selection": { "c": 1, "t": 0, "z": 0 },
          "color": [0, 255, 0],
          "visible": True,
          "slider": [0, 255]
        },
        {
          "selection": { "c": 2, "t": 0, "z": 0 },
          "color": [0, 0, 255],
          "visible": True,
          "slider": [0, 255]
        }
    ]
}]

vc.link_views(spatial_views, [ct.SPATIAL_IMAGE_LAYER, ct.SPATIAL_SEGMENTATION_LAYER, ct.SPATIAL_POINT_LAYER], [spatial_image_layer_value, spatial_segmentation_layer_value, spatial_point_layer_value])


<vitessce.config.VitessceConfig at 0x7fa02902cf10>

### Render the widget

In [19]:
vc.web_app()

'http://vitessce.io/#?theme=light&url=data:,%7B%22version%22%3A+%221.0.15%22%2C+%22name%22%3A+%22My+config%22%2C+%22description%22%3A+%22This+dataset+reveals...%22%2C+%22datasets%22%3A+%5B%7B%22uid%22%3A+%22A%22%2C+%22name%22%3A+%22My+dataset%22%2C+%22files%22%3A+%5B%7B%22fileType%22%3A+%22anndata.zarr%22%2C+%22url%22%3A+%22http%3A%2F%2Flocalhost%3A8000%2FA%2F0%2F44170ceb-30d8-49bb-81a2-2a1fec250bea%22%2C+%22options%22%3A+%7B%22obsEmbedding%22%3A+%5B%7B%22path%22%3A+%22obsm%2FX_umap%22%2C+%22dims%22%3A+%5B0%2C+1%5D%2C+%22embeddingType%22%3A+%22UMAP%22%7D%5D%2C+%22obsSets%22%3A+%5B%7B%22name%22%3A+%22Cluster%22%2C+%22path%22%3A+%22obs%2FCluster%22%7D%5D%2C+%22obsFeatureMatrix%22%3A+%7B%22path%22%3A+%22X%22%7D%7D%2C+%22coordinationValues%22%3A+%7B%22obsType%22%3A+%22cell%22%2C+%22featureType%22%3A+%22gene%22%2C+%22featureValueType%22%3A+%22expression%22%7D%7D%2C+%7B%22fileType%22%3A+%22anndata.zarr%22%2C+%22url%22%3A+%22http%3A%2F%2Flocalhost%3A8000%2FA%2F1%2Ff514c617-14fd-40a5-b43c-dc8f

## 3. Export the configuration and data

In [None]:
# TODO(template): The export function does not clear the contents of the `out_dir`.
# You may want to ensure that this folder does not yet exist:
os.makedirs(PATH_TO_EXPORT_DIRECTORY, exist_ok=False)

config_dict = vc.export(to="files", base_url=BASE_URL_PLACEHOLDER, out_dir=PATH_TO_EXPORT_DIRECTORY)

# Use `open` to create a new empty file at ./exported_data/vitessce.json
with open(join(PATH_TO_EXPORT_DIRECTORY, "vitessce.json"), "w") as f:
    json.dump(config_dict, f)