In [None]:
# Sanity check to confirm that the Jupyter Notebook renderer works.
# Execute this cell. If it yields an error, click the Extensions icon in Visual Studio Code (4 squares on the left hand side),
# select the "Jupyter" extension, press "Uninstall" and then "Reload Window". Then install the extension again,
# for example via "Select Kernel" then "Install/Enable suggested extensions".
print('Hello Harpy')

In [2]:
import harpy as hp

In [None]:
import os
import uuid
import tempfile
from harpy.datasets.registry import get_registry

registry = get_registry(path = "/staging/leuven/stg_00143/spatial_data_training/merscope")
# On the VSC cluster, we set path to "/staging/leuven/stg_00143/spatial_data_training/merscope", as the datasets are already there.
# On Windows, set path (e.g. to r"c:\tmp").

_ = registry.fetch("transcriptomics/vizgen/mouse/Liver1Slice1/images/mosaic_DAPI_z3.tif")
_ = registry.fetch("transcriptomics/vizgen/mouse/Liver1Slice1/images/mosaic_PolyT_z3.tif")
_ = registry.fetch("transcriptomics/vizgen/mouse/Liver1Slice1/images/micron_to_mosaic_pixel_transform.csv")
path_transcripts = registry.fetch("transcriptomics/vizgen/mouse/Liver1Slice1/detected_transcripts.csv")

input_path = os.path.dirname(path_transcripts)
OUTPUT_DIR =  tempfile.gettempdir()

# Takes around 7 min on Mac M2; around 18 min on VSC.
sdata = hp.io.merscope(
    path=input_path,
    to_coordinate_system="global",
    z_layers=[
        3,
    ],
    backend=None,
    transcripts=True,
    mosaic_images=True,
    do_3D=False,
    z_projection=False,
    image_models_kwargs={"scale_factors": None},
    output=os.path.join( OUTPUT_DIR, f"sdata_merscope_{uuid.uuid4()}.zarr"),
    filter_gene_names=[ "blank" ]
)

In [None]:
sdata  # The image has 2 channels, a DAPI and polyT

In [5]:
# Alternatively: read already analyzed data from a shared .zarr store,
# and save it to our own store (so we can modify our own copies).

# from spatialdata import read_zarr
# sdata = read_zarr("/staging/leuven/stg_00143/spatial_data_training/merscope/sdata_merscope_full.zarr")  # takes 3 min 30 s on the VSC
# sdata.write("/staging/leuven/stg_00143/YOURNAME/merscope/sdata_merscope_full.zarr")  # takes about 3 min on the VSC

In [None]:
sdata=hp.im.min_max_filtering(
    sdata,
    img_layer="mouse_Liver1Slice1_z3_global",
    output_layer="min_max_filtered",
    size_min_max_filter=[ 85, 135 ],
    crd = [ 20000, 25000, 20000, 25000 ],
    overwrite=True,
      )
sdata=hp.im.enhance_contrast(
    sdata,
    img_layer="min_max_filtered",
    output_layer="clahe",
    contrast_clip=[13.5, 18.5 ],
    crd = [ 20000, 25000, 20000, 25000 ],
    overwrite=True,
      )

In [None]:
hp.pl.plot_shapes( 
    sdata,
    img_layer=["mouse_Liver1Slice1_z3_global", "clahe"],
    crd = [ 20000,25000, 20000,25000 ],
    figsize=(10,10),
     )

In [None]:
sdata[ "clahe" ].c.data

In [None]:
# rechunk on disk

from spatialdata.transformations import get_transformation

sdata = hp.im.add_image_layer(
    sdata,
    arr =sdata[ "clahe" ].data.rechunk( 2048 ),
    transformations=get_transformation( sdata[ "clahe"], get_all=True ),
    output_layer = "clahe",
    c_coords=sdata[ "clahe" ].c.data,
    overwrite=True,
     )

In [None]:
from dask.distributed import Client, LocalCluster

cluster = LocalCluster(
    n_workers=8,
    threads_per_worker=1,
    processes=True,
    memory_limit="32GB",
)

client = Client(cluster)

print(client.dashboard_link)

In [None]:
import torch
from cellpose import models

from harpy.image import cellpose_callable

from harpy.image._image import _get_spatial_element

se = _get_spatial_element( sdata, layer = "clahe" )

sdata = hp.im.segment(
    sdata,
    img_layer="clahe",
    chunks=None,
    depth=200,
    model=cellpose_callable,
    device="cpu",
    # parameters that will be passed to the callable _cellpose
    pretrained_model = "cyto3",
    diameter=100,
    flow_threshold=0.85,
    cellprob_threshold=-4,
    channels = [ se.c.data.tolist().index("PolyT" )+1  ,se.c.data.tolist().index("DAPI" )+1 ],
    output_labels_layer="segmentation_mask_crop",
    output_shapes_layer="segmentation_mask_boundaries_crop",
    crd= [20000, 25000, 20000, 25000],  # region to segment [x_min, xmax, y_min, y_max],
    overwrite=True,
)

client.close()

# Took 19 minutes on the VSC

In [None]:
hp.pl.plot_shapes( sdata, shapes_layer="segmentation_mask_boundaries_crop" , img_layer=["clahe"], crd = [ 20000,25000,20000,25000 ], figsize=( 10,10 ) )

In [None]:
sdata = hp.tb.allocate(
    sdata=sdata,
    labels_layer="segmentation_mask_crop",
    points_layer="transcripts_global",
    output_layer="table_transcriptomics_crop",
    update_shapes_layers=False,
    overwrite=True,
)

In [None]:
hp.pl.sanity(
    sdata,
    img_layer="clahe",
    shapes_layer = "segmentation_mask_boundaries_crop",
    points_layer= "transcripts_global",
    plot_cell_number=True,
    gene="Vwf",
    crd = [ 20500, 20500+500, 21000, 21500 ],
    figsize=(5,5),
)

In [None]:
# Look-up a the number of transcripts for the Vwf gene in a cell shown above (using the cell ID),
# it should be the same number as transcripts plotted above.
sdata[ "table_transcriptomics_crop" ][sdata[ "table_transcriptomics_crop" ].obs[ "cell_ID" ] == 1360].to_df()["Vwf"]

In [None]:
hp.pl.plot_shapes(
    sdata,
    img_layer="clahe",
    shapes_layer="segmentation_mask_boundaries_crop",
    figsize=( 10,10 ),
    crd = [  20000, 22000, 20000, 22000  ],
    table_layer="table_transcriptomics_crop",
    column = "Vwf",
      )

In [None]:
df = hp.pl.analyse_genes_left_out(
    sdata,
    labels_layer="segmentation_mask_crop",
    table_layer="table_transcriptomics_crop",
    points_layer="transcripts_global",
)

In [None]:
# Perform preprocessing.
sdata = hp.tb.preprocess_transcriptomics(
    sdata,
    labels_layer="segmentation_mask_crop",
    table_layer="table_transcriptomics_crop",
    output_layer="table_transcriptomics_preprocessed_crop",  # write results to a new slot, we could also write to the same slot (when passing overwrite==True).
    min_counts=10,
    min_cells=5,
    size_norm=True,
    n_comps=50,
    overwrite=True,
    update_shapes_layers=False,
)

In [None]:
hp.pl.preprocess_transcriptomics(
    sdata,
    table_layer="table_transcriptomics_preprocessed_crop",
)

In [None]:
sdata = hp.tb.filter_on_size(
    sdata,
    labels_layer="segmentation_mask_crop",
    table_layer="table_transcriptomics_preprocessed_crop",
    output_layer="table_transcriptomics_filter_crop",
    min_size=500,
    max_size=100000,
    update_shapes_layers=False,
    overwrite=True,
)

In [None]:
import scanpy as sc

sdata = hp.tb.leiden(
    sdata,
    labels_layer="segmentation_mask_crop",
    table_layer="table_transcriptomics_filter_crop",
    output_layer="table_transcriptomics_clustered_crop",
    calculate_umap=True,
    calculate_neighbors=True,
    n_pcs=17,
    n_neighbors=35,
    resolution=1.0,
    rank_genes=True,
    key_added="leiden",
    overwrite=True,
)

sc.pl.umap(sdata.tables["table_transcriptomics_clustered_crop"], color=["leiden"], show=True)
sc.pl.rank_genes_groups(sdata.tables["table_transcriptomics_clustered_crop"], n_genes=8, sharey=False, show=True)

In [None]:
hp.pl.plot_shapes(
    sdata,
    img_layer="clahe",
    table_layer="table_transcriptomics_clustered_crop",
    column="leiden",
    shapes_layer="segmentation_mask_boundaries_crop",
    alpha=1,
    linewidth=0,
    channel="DAPI",
    crd = [ 20000,25000,20000,25000 ],
)

In [29]:
# from napari_spatialdata import Interactive

#del sdata[ "table_transcriptomics_clustered"].uns[ "leiden_colors" ]

# Interactive( sdata )