In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
import harpy as hp

#### Download the data

In [3]:
from training_datasets import sdata_vectra

path = None
#path = "/staging/leuven/stg_00143/spatial_data_training"

sdata = sdata_vectra(path=path, output=None) # on Windows, set path (e.g. to r"c:\tmp")

#### Create SpatialData object

In [None]:
import os
import tempfile

OUTPUT_DIR =  tempfile.gettempdir()
#OUTPUT_DIR = "/staging/leuven/stg_00143/spatial_data_training/output_dir"

sdata.write( os.path.join( OUTPUT_DIR, "sdata_vectra.zarr"), overwrite=True )

# good practice to rechunk on disk
sdata=hp.im.add_image_layer(
    sdata,
    arr=sdata[ "image" ].data.rechunk( 500 ),
    output_layer="image",
    overwrite=True,
      )

In [None]:
sdata.is_backed()

#### Visualize

In [None]:
hp.pl.plot_shapes(
    sdata,
    img_layer="image",
    figsize=( 8*3,8*3 ),
    channel=None,
      )

Plot histogram

In [None]:
import matplotlib.ticker as ticker

ax=hp.pl.histogram(
    sdata,
    img_layer="image",
    channel=0,
    bins=100,
    fig_kwargs={ "figsize":(3,3,) },
    )

ax.yaxis.set_major_formatter(ticker.ScalarFormatter(useMathText=True))
ax.ticklabel_format(axis='y', style='sci', scilimits=(0, 0))


#### Segment using InstanSeg.

Download an `instanseg` model.

Make sure to install `instanseg`:

`pip install git+https://github.com/instanseg/instanseg.git@main`

In [None]:
import os
import requests
import zipfile

def download_and_unzip(url, extract_to):
    try:
        os.makedirs(extract_to, exist_ok=False)
    except FileExistsError:
        print("Model already downloaded.")
        return
    local_zip_path = os.path.join(extract_to, 'downloaded.zip')
    print("Downloading...")
    response = requests.get(url, stream=True)
    response.raise_for_status()

    with open(local_zip_path, 'wb') as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)

    print("Unzipping...")
    with zipfile.ZipFile(local_zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
    os.remove(local_zip_path)
    print(f"Done! Files extracted to: {extract_to}")

url = "https://github.com/instanseg/instanseg/releases/download/instanseg_models_v0.1.0/fluorescence_nuclei_and_cells.zip"
target_path = os.path.join(OUTPUT_DIR, "fluorescence_nuclei_and_cells" )
download_and_unzip(url, target_path)

In [None]:
import os

import torch
from instanseg import InstanSeg

path_model = os.path.join( target_path, "instanseg.pt" )

instanseg_fluorescence= torch.load( path_model, weights_only=False )
instanseg_fluorescence=InstanSeg( model_type=instanseg_fluorescence, device="cpu" )

In [None]:
from dask.distributed import Client, LocalCluster
from instanseg import InstanSeg

workers=4
threads=1

if workers is not None and threads is not None:
    cluster = LocalCluster(
        n_workers=workers,
        threads_per_worker=threads,
        memory_limit="500GB",  # prevent spilling to disk
    )

    client = Client(cluster)

print( client.dashboard_link )

In [None]:
sdata[ "image" ].data

In [None]:
import harpy as hp

# takes 1m on mac m2

sdata = hp.im.segment(
    sdata,
    img_layer="image",
    output_labels_layer=["labels_nuclei_instanseg", "labels_cells_instanseg"],
    output_shapes_layer=None,
    labels_layer_align="labels_cells_instanseg",
    depth=50,
    chunks=None,
    model=hp.im.instanseg_callable,
    # parameters passed to hp.im.instanseg_callable
    output="all_outputs",
    device="cpu",
    instanseg_model=path_model,  # load it in every worker, because torchscript model is not serializable
    iou=True,
    trim=False,
    overwrite=True,
)

In [None]:
sdata_small = sdata.query.bounding_box(
    min_coordinate=[200, 200], max_coordinate=[500, 500], axes=("x", "y"), target_coordinate_system="global"
)
sdata_small.pl.render_images("image", channel=6).pl.render_labels("labels_cells_instanseg", fill_alpha=0.5 ).pl.show( title="cells" )
sdata_small.pl.render_images("image", channel=6).pl.render_labels("labels_nuclei_instanseg", fill_alpha=0.5  ).pl.show( title="nuclei" ) # although colors are different, labels are aligned.

Cell segmentation does not seem great if we use instanseg mode to `all_outputs`.
Better option is to segment cells and nuclei separately, and then align the cell and nuclei masks.

In [15]:
#from napari_spatialdata import Interactive

#Interactive( sdata )

In [None]:
import harpy as hp

sdata = hp.im.segment(
    sdata,
    img_layer="image",
    output_labels_layer=["labels_cells_instanseg"],
    output_shapes_layer=["shapes_cells_instanseg"],
    labels_layer_align=None,
    depth=50,
    model=hp.im.instanseg_callable,
    # parameters passed to hp.im.instanseg_callable
    output="cells",
    device="cpu",
    instanseg_model=path_model,  # load it in every worker, because torchscript model is not serializable
    iou=True,
    trim=False,
    overwrite=True,
)

In [None]:
import harpy as hp

sdata = hp.im.segment(
    sdata,
    img_layer="image",
    output_labels_layer=["labels_nuclei_instanseg"],
    output_shapes_layer=["shapes_nuclei_instanseg"],
    labels_layer_align=None,
    depth=50,
    chunks=( 500,500 ),
    model=hp.im.instanseg_callable,
    # parameters passed to hp.im.instanseg_callable
    output="nuclei",
    device="cpu",
    instanseg_model=path_model,  # load it in every worker, because torchscript model is not serializable
    iou=True,
    trim=False,
    overwrite=True,
)

In [None]:
sdata_small = sdata.query.bounding_box(
    min_coordinate=[200, 200], max_coordinate=[500, 500], axes=("x", "y"), target_coordinate_system="global"
)
sdata_small.pl.render_images("image", channel=6).pl.render_labels("labels_cells_instanseg", fill_alpha=0.5  ).pl.show( title="cells" )
sdata_small.pl.render_images("image", channel=6).pl.render_labels("labels_nuclei_instanseg", fill_alpha=0.5  ).pl.show( title="nuclei" ) # although colors are different, labels are aligned.

Align the obtained cell and nuclei masks.

In [None]:
sdata

In [None]:
sdata=hp.im.align_labels_layers( 
    sdata,
    labels_layer_1="labels_nuclei_instanseg",
    labels_layer_2="labels_cells_instanseg",
    output_labels_layer="labels_nuclei_instanseg_aligned",
    output_shapes_layer="shapes_nuclei_instanseg_aligned",
    chunks=None,
    depth=50,
    overwrite=True,
 )

Sanity check, to see if alignment went well.

In [None]:
hp.pl.sanity(
    sdata,
    img_layer="image",
    shapes_layer="shapes_cells_instanseg",
    points_layer=None,
    crd = [ 250, 350, 250, 350 ],
    plot_cell_number=True,
    figsize=(8,8),
      )

hp.pl.sanity(
    sdata,
    img_layer="image",
    shapes_layer="shapes_nuclei_instanseg_aligned",
    points_layer=None,
    crd = [ 250, 350, 250, 350 ],
    plot_cell_number=True,
    figsize=(8,8)
      )

hp.pl.sanity(
    sdata,
    img_layer="image",
    shapes_layer="shapes_nuclei_instanseg",
    points_layer=None,
    crd = [ 250, 350, 250, 350 ],
    plot_cell_number=True,
    figsize=(8,8)
      )

In [22]:
#from napari_spatialdata import Interactive

#Interactive( sdata )

In [None]:
sdata_small = sdata.query.bounding_box(
    min_coordinate=[200, 200],
    max_coordinate=[500, 500],
    axes=("x", "y"),
    target_coordinate_system="global",
)
sdata_small.pl.render_images("image", channel=6).pl.render_labels("labels_cells_instanseg", fill_alpha=0.5  ).pl.show( title="cells" )
sdata_small.pl.render_images("image", channel=6).pl.render_labels("labels_nuclei_instanseg_aligned", fill_alpha=0.5  ).pl.show( title="nuclei" ) # although colors are different, labels are aligned.

#### Create the AnnData table:

Allocate intensity for cells.

In [None]:
sdata=hp.tb.allocate_intensity(
    sdata,
    labels_layer="labels_cells_instanseg",
    output_layer="table_intensities_cells",
    channels=[ 0,1,2,3,4,5,7],  # do not include DAPI
    mode="mean",
    obs_stats=["count", "var", "kurtosis"],
    calculate_center_of_mass=True,
    overwrite=True,
    )

In [None]:
sdata[ "table_intensities_cells" ]

In [None]:
sdata[ "table_intensities_cells" ].to_df().head()

In [None]:
sdata[ "table_intensities_cells" ].obs.head()

Allocate intensity for nuclei

In [None]:
sdata=hp.tb.allocate_intensity(
    sdata,
    labels_layer="labels_nuclei_instanseg_aligned",
    output_layer="table_intensities_nuclei",
    channels=[ 0,1,2,3,4,5,7],  # do not include DAPI
    mode="mean",
    obs_stats=["count", "var", "kurtosis"],
    calculate_center_of_mass=True,
    overwrite=True,
    )

In [None]:
sdata[ "table_intensities_nuclei" ].to_df().head()

In [None]:
sdata[ "table_intensities_nuclei" ].obs.head()

In [None]:
hp.pl.plot_shapes(
    sdata,
    img_layer="image",
    shapes_layer="shapes_cells_instanseg",
    table_layer="table_intensities_cells",
    column="var_5",
    channel=5,
    crd = [ 200, 500, 200, 500 ],
    figsize=(5,5),
    alpha=0.4,
      )

In [None]:
import numpy as np

image=sdata[ "image" ].data[ 5 ].compute() 
mask=sdata[ "labels_cells_instanseg" ].data.compute() == 8059  # cell with high variance

np.var(image[ mask ])

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(5, 5))
ax = plt.gca()

color = "var_5"

sdata_small = sdata.query.bounding_box(
    min_coordinate=[200, 200], max_coordinate=[500, 500], axes=("x", "y"), target_coordinate_system="global"
)

sdata_small.pl.render_images( "image", channel=5 ).pl.render_labels("labels_cells_instanseg", color=color, method="datashader", fill_alpha=0.3, outline_alpha=0.1).pl.show(
    coordinate_systems="global", ax=ax
)

### Segment using Cellpose:

In [None]:
import matplotlib.pyplot as plt

hp.pl.plot_shapes(
    sdata,
    img_layer="image",
    shapes_layer=None,
    channel=[ 6, 5, 1, 4 ],
    figsize=(5*4,5*4)
      )

In [None]:
# takes 1m 30 on mac OS m2

arr=sdata[ "image" ].data[ 6 ][ None,... ]
sdata=hp.im.add_image_layer(
    sdata,
    arr=arr,
    output_layer="image_c6",
    overwrite=True,
      )

print( "Segmenting channel 6." )

sdata = hp.im.segment(
    sdata,
    img_layer="image_c6",
    model=hp.im.cellpose_callable,
    device="cpu",
    model_type="nuclei",
    output_labels_layer="labels_cells_c6",
    output_shapes_layer="shapes_cells_c6",
    diameter=15,
    depth=50,
    channels=[0, 0],
    overwrite=True,
)

print( "Segmenting channel 5." )

arr=sdata[ "image" ].data[ 5 ][ None,... ]
sdata=hp.im.add_image_layer(
    sdata,
    arr=arr,
    output_layer="image_c5",
    overwrite=True,
      )

sdata = hp.im.segment(
    sdata,
    img_layer="image_c5",
    model=hp.im.cellpose_callable,
    device="cpu",
    model_type="cyto3",
    output_labels_layer="labels_cells_c5",
    output_shapes_layer="shapes_cells_c5",
    diameter=30,
    depth=50,
    channels=[0, 0],
    overwrite=True,
)

print( "Segmenting channel 1." )

arr=sdata[ "image" ].data[ 1 ][ None,... ]
sdata=hp.im.add_image_layer(
    sdata,
    arr=arr,
    output_layer="image_c1",
    overwrite=True,
      )

sdata = hp.im.segment(
    sdata,
    img_layer="image_c1",
    model=hp.im.cellpose_callable,
    device="cpu",
    model_type="cyto3",
    output_labels_layer="labels_cells_c1",
    output_shapes_layer="shapes_cells_c1",
    diameter=30,
    depth=50,
    channels=[0, 0],
    overwrite=True,
)

print( "Segmenting channel 4." )

arr=sdata[ "image" ].data[ 4 ][ None,... ]
sdata=hp.im.add_image_layer(
    sdata,
    arr=arr,
    output_layer="image_c4",
    overwrite=True,
      )

sdata = hp.im.segment(
    sdata,
    img_layer="image_c4",
    model=hp.im.cellpose_callable,
    device="cpu",
    model_type="cyto3",
    output_labels_layer="labels_cells_c4",
    output_shapes_layer="shapes_cells_c4",
    diameter=30,
    depth=50,
    channels=[0, 0],
    overwrite=True,
)

In [None]:
for i in [ 6,5,1,4 ]:

  hp.pl.plot_shapes(
      sdata,
      img_layer="image",
      shapes_layer=f"shapes_cells_c{i}",
      channel=i,
      figsize=( 5,5 ),
        )

#### Merge the segmentation masks:

In [None]:
sdata=hp.im.merge_labels_layers(
    sdata,
    labels_layer_1="labels_cells_c5",
    labels_layer_2="labels_cells_c1",
    output_labels_layer="labels_cells_merged",
    output_shapes_layer="shapes_cells_merged",
    overwrite=True,
        )

sdata=hp.im.merge_labels_layers(
    sdata,
    labels_layer_1="labels_cells_merged",
    labels_layer_2="labels_cells_c4",
    output_labels_layer="labels_cells_merged", # we overwrite
    output_shapes_layer="shapes_cells_merged",
    overwrite=True,
        )

hp.pl.plot_shapes(
    sdata,
    img_layer="image",
    channel=1,
    shapes_layer="shapes_cells_merged",
    figsize=(5,5)
      )

In [None]:
# what about nuclei?
hp.pl.plot_shapes(
    sdata,
    img_layer="image",
    shapes_layer="shapes_cells_c6",
    channel=6,
    figsize=( 5,5 ),
      )

In [None]:
sdata=hp.im.expand_labels_layer(
    sdata,
    labels_layer="labels_cells_c6",
    distance=10,
    output_labels_layer="labels_cells_c6_expanded",
    output_shapes_layer="shapes_cells_c6_expanded",
    overwrite=True,
        )

In [None]:
hp.pl.plot_shapes(
    sdata,
    img_layer="image",
    shapes_layer="shapes_cells_c6_expanded",
    channel=6,
    figsize=( 5,5 ),
      )

In [None]:
# now merge them
sdata=hp.im.merge_labels_layers_nuclei(
    sdata,
    labels_layer="labels_cells_merged",
    labels_layer_nuclei="labels_cells_c6", # nuclei
    labels_layer_nuclei_expanded="labels_cells_c6_expanded", # nuclei expanded
    output_labels_layer="labels_cells_merged",
    output_shapes_layer="shapes_cells_merged",
    overwrite=True,
)

In [None]:
hp.pl.plot_shapes(
    sdata,
    img_layer="image",
    shapes_layer="shapes_cells_merged",
    channel=6,
    figsize=( 10,10 ),
      )

In [None]:
hp.pl.plot_shapes(
    sdata,
    img_layer="image",
    shapes_layer="shapes_cells_instanseg",
    channel=6,
    figsize=( 10,10 ),
      )

#### Create the AnnData table:

In [None]:
sdata=hp.tb.allocate_intensity(
    sdata,
    img_layer="image",
    channels=[ 0,1,2,3,4,5,7],  # do not include DAPI
    labels_layer="labels_cells_merged",
    output_layer="table_intensities_cells_merged",
    mode="mean",
    obs_stats=["count", "var", "kurtosis"],
    calculate_center_of_mass=True,
    overwrite=True,
    )

In [None]:
sdata[ "table_intensities_cells_merged" ].obs.head()

Exercise:

Visualize the data. Use the `harpy` functions `hp.pl.plot_shapes` to visualize `var_5`.


<details>
<summary>Click to reveal the solution</summary>

```python

hp.pl.plot_shapes(
    sdata,
    img_layer="image",
    shapes_layer="shapes_cells_merged",
    table_layer="table_intensities_cells_merged",
    column="var_5",
    channel=5,
    crd = [ 200, 500, 200, 500 ],
    figsize=(5,5),
    alpha=0.4,
      )

#### Leiden clustering

In [None]:
import scanpy as sc

# Leiden clustering
sdata = hp.tb.leiden(
    sdata,
    labels_layer="labels_cells_merged",
    table_layer="table_intensities_cells_merged",
    output_layer="table_intensities_cells_merged_leiden",
    calculate_umap=True,
    calculate_neighbors=True,
    n_pcs=17, # The number of principal components to use when calculating neighbors.
    n_neighbors=35, # The number of neighbors to consider when calculating neighbors.
    resolution=0.4,
    rank_genes=True,
    key_added="leiden",
    overwrite=True,
)

# Plot UMAP
sc.pl.umap(sdata.tables["table_intensities_cells_merged_leiden"], color=["leiden"], show=True)

In [None]:
sdata.tables["table_intensities_cells_merged"]

In [None]:
hp.pl.plot_shapes( 
    sdata,
    img_layer="image",
    table_layer="table_intensities_cells_merged_leiden",
    shapes_layer="shapes_cells_merged",
    column="leiden",
    channel=6,
    linewidth=0.2,
    alpha=0.7,
    figsize=( 8,8 ),
 )

In [None]:
sc.pl.rank_genes_groups(
    sdata.tables["table_intensities_cells_merged_leiden"],
    n_genes=7,
    sharey=False,
    show=True,
    )

In [None]:
sdata[ "table_intensities_cells_merged_leiden" ]

In [None]:
# channel 1 is found in the leiden clusters
# channel 7 also
# channel 5 clearly pops up

#### Spatial neighbors using Squidpy

In [None]:
import squidpy as sq
import numpy as np

adata = sdata[ "table_intensities_cells_merged_leiden" ]

cluster_key = "leiden"

sq.gr.spatial_neighbors(adata, spatial_key="spatial", copy=False,)
sq.gr.nhood_enrichment(adata, cluster_key=cluster_key, seed=0, copy=False,)

adata.uns[f"{cluster_key}_nhood_enrichment"]["zscore"] = np.nan_to_num(
    adata.uns[f"{cluster_key}_nhood_enrichment"]["zscore"]
)
sq.pl.nhood_enrichment(adata, cluster_key=cluster_key, method="ward", mode="zscore", figsize=(8, 8))