In [None]:
import sparrow as sp

In [None]:
import os
import pooch
from sparrow.datasets.registry import get_registry

registry = get_registry( path = None) # on Windows, set path (e.g. to symlink of default cache of os)
path_unzipped = registry.fetch(
    "transcriptomics/xenium/Xenium_V1_humanLung_Cancer_FFPE/Xenium_V1_humanLung_Cancer_FFPE_outs.zip",
    processor=pooch.Unzip(extract_dir="."),
)
_ = registry.fetch(
    "transcriptomics/xenium/Xenium_V1_humanLung_Cancer_FFPE/Xenium_V1_humanLung_Cancer_FFPE_he_image.ome.tif"
)
_ = registry.fetch(
    "transcriptomics/xenium/Xenium_V1_humanLung_Cancer_FFPE/Xenium_V1_humanLung_Cancer_FFPE_he_imagealignment.csv"
)
input_path = os.path.commonpath(path_unzipped)

In [None]:
import tempfile
import uuid

OUTPUT_DIR =  tempfile.gettempdir()

sdata = sp.io.xenium(
    input_path,
    to_coordinate_system="global",
    aligned_images=True,
    cells_table=True,
    nucleus_labels=True,
    cells_labels=True,
    filter_gene_names=['Unassigned','NegControl'],
    output= os.path.join( OUTPUT_DIR, f"sdata_{uuid.uuid4()}.zarr" ),
)


In [None]:
from spatialdata.transformations import get_transformation

get_transformation( sdata[ "morphology_focus_global" ], get_all=True )

In [None]:
get_transformation( sdata[ "cell_labels_global" ], get_all=True )

In [None]:
sdata

Excercise:

- Use napari-spatialdata to visualize the image layer 'morphology_focus_global'.
- Next visualize the cell segmentation mask provided by Xenium, 'cell_labels_global'.
- Visualize the gene expression of 'ACE2'

In [None]:
#from napari_spatialdata import Interactive

#Interactive( sdata )

Vectorize the labels layer

In [None]:
import dask
from spatialdata.transformations import get_transformation
from sparrow.image._image import _get_spatial_element

se = _get_spatial_element( sdata, layer = "cell_labels_global" )
transformations=get_transformation( sdata[ "cell_labels_global" ], get_all=True )
with dask.config.set(scheduler="processes"):
    sdata=sp.sh.vectorize( sdata, labels_layer="cell_labels_global", output_layer="cell_labels_boundaries_global", overwrite=True )

In [None]:
sdata[ "cell_labels_boundaries_global" ].head()

In [None]:
sp.pl.plot_shapes(
    sdata,
    img_layer="morphology_focus_global",
    shapes_layer=[None, "cell_labels_boundaries_global"],
    crd = [ 8000, 10000, 8000, 10000 ],
    channel= [ 'DAPI', 'ATP1A1/CD45/E-Cadherin', '18S', 'AlphaSMA/Vimentin' ],
    alpha = 0.4,
    linewidth=1.5,
      )

Excercise.

- Which spatial element annotates the table `table_global`?
- Can you visualize gene expression of gene `ACE2` using the sparrow function `sp.pl.plot_shapes`?

In [None]:
# solution

from IPython.display import display

display( sdata[ "table_global" ] )

display( sdata[ "table_global" ].uns[ "spatialdata_attrs" ] )

display( sdata[ "table_global" ].var.head() )

sp.pl.plot_shapes(
    sdata,
    img_layer="morphology_focus_global",
    shapes_layer="cell_labels_boundaries_global", # boundaries of table layer "cell_labels_global" obtained via sp.sh.vectorize(...)
    table_layer="table_global",
    column="ACE2",
    crd =[ 8000,10000, 8000, 10000 ],
    channel="DAPI",
    linewidth=0,
    figsize=(5,5),
      )

We want to obtain segmentation masks using `SPArroW`. First thing we do is image preprocessing. We do this on a crop.

In [None]:
sdata=sp.im.min_max_filtering(
    sdata,
    img_layer="morphology_focus_global",
    output_layer="min_max_filtered",
    size_min_max_filter=51,
    crd = [ 8000,10000,8000,10000 ],
    overwrite=True,
      )
sdata=sp.im.enhance_contrast(
    sdata,
    img_layer="min_max_filtered",
    output_layer="clahe",
    contrast_clip=3.5,
    overwrite=True,
      )

In [None]:
sp.pl.plot_image( sdata, img_layer=[ "morphology_focus_global" , "clahe" ], crd = [ 8000,10000,8000,10000 ], channel= [ 'DAPI', 'ATP1A1/CD45/E-Cadherin', '18S', 'AlphaSMA/Vimentin' ] )

Excercise.

Change the parameters of the `SPArrOW` functions `sp.im.min_max_filtering` and `sp.im.enhance_contrast`, and see what the effect is on the resulting preprocessed image.

In [None]:
sp.pl.plot_shapes(
    sdata,
    img_layer="morphology_focus_global",
    shapes_layer=[None, "cell_labels_boundaries_global"],
    crd = [ 8000, 10000, 8000, 10000 ],
    channel= [ 'DAPI', 'ATP1A1/CD45/E-Cadherin', '18S', 'AlphaSMA/Vimentin' ],
    alpha = 0.4,
    linewidth=1.5,
      )

In [None]:
import torch
from cellpose import models

from sparrow.image import cellpose_callable

gpu = False
device = "cpu"  # mps broken in cellpose (macOS), see https://github.com/MouseLand/cellpose/issues/1063
model=models.CellposeModel( gpu=gpu, pretrained_model='cyto3', device = torch.device(device ) )

sdata = sp.im.segment(
    sdata,
    img_layer="morphology_focus_global",
    chunks=2048,
    depth=200,
    model=cellpose_callable,
    # parameters that will be passed to the callable _cellpose
    pretrained_model = model,
    diameter=50,
    flow_threshold=0.9,
    cellprob_threshold=-4,
    channels=[ 2,1 ],
    output_labels_layer="segmentation_mask",
    output_shapes_layer="segmentation_mask_boundaries",
    crd= [8000, 10000, 8000, 10000],  # region to segment [x_min, xmax, y_min, y_max],
    overwrite=True,
)

In [None]:
sp.pl.plot_shapes(
    sdata,
    img_layer="morphology_focus_global",
    shapes_layer=[ "cell_labels_boundaries_global", "segmentation_mask_boundaries",],
    crd = [ 8000, 10000, 8000, 10000 ],
    channel= [ 'DAPI', 'ATP1A1/CD45/E-Cadherin', '18S', 'AlphaSMA/Vimentin' ],
    alpha = 0.4,
    linewidth=1.5,
      )

We create a new table. One could also skip this step, and use the table provided by xenium ("table_global"), but we recommend recalculating the table....see below

In [None]:
sdata = sp.tb.allocate(
    sdata=sdata,
    labels_layer="cell_labels_global", # we use the segmentation mask from Xenium
    points_layer="transcripts_global",
    output_layer="table_global_redo",
    update_shapes_layers=False,
    overwrite=True,
)

# If you would like to remove some genes from the anndata table, you can still do it, uncomment the following code:

#adata = sdata[ "table_global_redo" ]

#pattern = 'Unassigned|NegControl'

#mask = ~adata.var.reset_index().astypes(str).apply(
#    lambda row: row.str.contains(pattern, case=False, na=False)
#).any(axis=1)

#sdata=sp.tb.add_table_layer( sdata, adata = adata[:, mask ].copy(), output_layer="table_global_redo", region=["cell_labels_global"], overwrite=True )

In [None]:
sdata[ "table_global" ].obs.head()

In [None]:
sdata[ "table_global_redo" ].obs.head()

In [None]:
print(sdata[ "table_global_redo" ].X[0].sum())
print(sdata[ "table_global" ].X[1].sum())

In [None]:
sdata[ "table_global_redo" ][ :, sdata[ "table_global_redo" ].X[0].toarray() !=0].to_df().iloc[0]

In [None]:
sdata[ "table_global" ][ :, sdata[ "table_global" ].X[1].toarray() !=0].to_df().iloc[1]

For cell with `Cell_ID=2`, the count for PTPRC is 3 (see below). Using the `SPArrOW` function `sp.tb.allocate` this is correctly calculated, while table provided by Xenium only has count equal to 2 for PTPRC for cell with `cell_ID=2`.

Conclusion. We advice to recalculate the table layer.

In [None]:
sp.pl.sanity_plot_transcripts_matrix(
    sdata,
    img_layer="morphology_focus_global",
    shapes_layer = "cell_labels_boundaries_global",
    points_layer= "transcripts_global",
    plot_cell_number=True,
    gene="PTPRC",
    crd = [ 947-50, 947.49+50, 8540-50, 8540+50 ],
    figsize=(5,5),
)

Excercise:

Use segmentation mask generated by `SPArrOW`, and create the table layer.

In [None]:
# solution

sdata = sp.tb.allocate(
    sdata=sdata,
    labels_layer="segmentation_mask",
    points_layer="transcripts_global",
    output_layer="table_transcriptomics",
    update_shapes_layers=False,
    overwrite=True,
)

In [None]:
sdata[ "table_transcriptomics" ]

In [None]:
from sparrow.utils._keys import _REGION_KEY

sdata[ "table_transcriptomics" ].obs[ _REGION_KEY ].cat.categories.to_list()

In [None]:
sdata[ "table_transcriptomics" ].to_df().head( 10 )

In [None]:
sdata[ "table_transcriptomics" ]

In [None]:
sdata = sp.im.transcript_density(
    sdata,
    img_layer="morphology_focus_global",
    points_layer="transcripts_global",
    output_layer="transcript_density",
    overwrite=True,
)

In [None]:
sp.pl.plot_image( sdata, img_layer= "transcript_density" , crd = [ 35000,52000,3000,20000 ] )

In [None]:
sp.pl.plot_image( sdata, img_layer= "morphology_focus_global" , crd = [ 35000,52000,3000,20000 ], channel= "DAPI" )

In [None]:
df = sp.pl.analyse_genes_left_out(
    sdata,
    labels_layer="segmentation_mask",
    table_layer="table_transcriptomics",
    points_layer="transcripts_global",
)

In [None]:
# Perform preprocessing.
sdata = sp.tb.preprocess_transcriptomics(
    sdata,
    labels_layer="segmentation_mask",
    table_layer="table_transcriptomics",
    output_layer="table_transcriptomics_preprocessed",  # write results to a new slot, we could also write to the same slot (when passing overwrite==True).
    min_counts=10,
    min_cells=5,
    size_norm=True,
    n_comps=50,
    overwrite=True,
    update_shapes_layers=False,
)

In [None]:
sdata[ "table_transcriptomics" ]

In [None]:
sp.pl.preprocess_transcriptomics(
    sdata,
    table_layer="table_transcriptomics_preprocessed",
)

In [None]:
sdata[ "table_transcriptomics_preprocessed" ]

In [None]:
sdata[ "table_transcriptomics" ].to_df().sum( axis=1 ).mean()  # mean total count per cell

In [None]:
sdata[ "table_transcriptomics" ].to_df().sum( axis=1 ).std() # std total count per cell

In [None]:
sdata = sp.tb.filter_on_size(
    sdata,
    labels_layer="segmentation_mask",
    table_layer="table_transcriptomics_preprocessed",
    output_layer="table_transcriptomics_filter",
    min_size=500,
    max_size=100000,
    update_shapes_layers=False,
    overwrite=True,
)

Excercise.

Use the `SPArrOW` function `sp.tb.leiden` to perform leiden clustering.

In [None]:
# solution


import scanpy as sc

sdata = sp.tb.leiden(
    sdata,
    labels_layer="segmentation_mask",
    table_layer="table_transcriptomics_filter",
    output_layer="table_transcriptomics_clustered",
    calculate_umap=True,
    calculate_neighbors=True,
    n_pcs=17,
    n_neighbors=35,
    resolution=0.8,
    rank_genes=True,
    key_added="leiden",
    overwrite=True,
)

sc.pl.umap(sdata.tables["table_transcriptomics_clustered"], color=["leiden"], show=True)
sc.pl.rank_genes_groups(sdata.tables["table_transcriptomics_clustered"], n_genes=8, sharey=False, show=True)

In [None]:
sp.pl.plot_shapes(
    sdata,
    img_layer="clahe",
    table_layer="table_transcriptomics_clustered",
    column="leiden",
    shapes_layer="segmentation_mask_boundaries",
    alpha=1,
    linewidth=0,
    channel="DAPI",
)

In [None]:
sdata[ "table_transcriptomics_clustered"].obs[ sdata[ "table_transcriptomics_clustered"].obs[ "cell_ID" ] == 321]

In [None]:
sdata[ "table_transcriptomics_clustered"].obsm["spatial" ][ sdata[ "table_transcriptomics_clustered"].obs[ "cell_ID" ] == 321] # x y

In [None]:
#from napari_spatialdata import Interactive

#Interactive( sdata )