#### Read the data

In [None]:
import os
import harpy as hp

data_path = "/Users/arnedf/VIB/DATA/hubmap"
# data_path = "/hive/hubmap/data/public/spatial-data-workshop/TMA2/phenocycler"

output_path = "/Users/arnedf/VIB/DATA/hubmap"
# output_path = "/hive/user-workspaces/adefauw/1538/phenocycler_data"

file = "Segmented_Bronchi_TMA_Scan1.er.qptiff"

input_path = os.path.join( data_path, file )

In [4]:
from contextlib import contextmanager
from typing import Generator
from xml.etree import ElementTree

import dask.array as da
import ome_types
from ome_types.model import UnitsLength
import tifffile

# Class for basic Tiff and QPTiff file reading.

class Tiff:
    """ Class for reading TIFF and QPTIFF files. """
    def __init__(self, filepath):
        self.filepath = filepath
        self.tif = None
        self.pages = []
        self.channel_names = []
        self.pixel_size_um = None

    def open(self):
        """ Open the file for reading """
        self.tif = tifffile.TiffFile(self.filepath)
        self.pages = self.tif.series[0].pages
        self.channel_names = self._get_channel_names()
        self.pixel_size_um = self._get_pixel_size_um()

    def close(self):
        """ Close the file """
        if self.tif:
            self.tif.close()

    def __enter__(self):
        self.open()
        return self

    def __exit__(self, exc_type, exc_value, exc_tb):
        self.close()

    def read_channel(self, name):
        """ Read the channel with the given name from the tiff/qptiff file and
            return the pixel data as a numpy array. """
        idx = self.channel_names.index(name)
        return self.pages[idx].asarray()
    
    @contextmanager
    def dask_array(self) -> Generator[da.Array, None, None]:
        """ Read the full tiff/qptiff file into a dask array of dimensions (c, y, x). """

        # Represent each channel of the TIFF file as a zarr store (a tifffile.ZarrTiffStore).
        # No temporary zarr files on disk are created. Instead, when the zarr store is accessed,
        # the tifffile library will read the pixel data from the underlying existing tiff file.
        # Dask arrays are then created from these zarr stores.
        # This proces is lazy and almost instantaneous.
        zarr_stores = []
        dask_arrays = []
        for channel in range(len(self.channel_names)):
            zarr_store = tifffile.imread(self.filepath, key=channel, aszarr=True)
            zarr_stores.append(zarr_store)
            dask_array = da.from_zarr(zarr_store)
            dask_arrays.append(dask_array)

        # Combine dask arrays for each channel into a single dask array.
        full_dask_image = da.stack(dask_arrays)
        yield full_dask_image

        # Close all ZarrTiffStore's
        for zarr_store in zarr_stores:
            zarr_store.close()

    def _get_channel_names(self):
        if self.pages.kind.lower() == 'qpi':
            # It's a PerkinElmer qptiff, for example from a VectraPolaris scanner.
            # These apparently do not have OME metadata, but store metadata in a 
            # description tag per channel image. 
            # The name of fluorophore is stored in the "Name" tag. More recent versions
            # of the file format have an additional "Biomarker" tag that stores the
            # detected molecule (e.g. Ki67, Podoplanin, ...). If present we will use the biomarker
            # as channel name, otherwise the fluorophore name.
            # See also the bioformats VectroPolaris qptiff reader:
            # https://github.com/ome/bioformats/blob/develop/components/formats-gpl/src/loci/formats/in/VectraReader.java
            # and the Akoya PhenoChart user guide (appendix B): https://www.akoyabio.com/wp-content/uploads/PhenochartUserManual_2_2_0_rev0.pdf
            channel_names = []
            for page in self.pages:
                description = ElementTree.fromstring(page.description)
                
                image_type = description.find('ImageType').text
                assert image_type == "FullResolution"

                biomarker_tag = description.find('Biomarker')
                if biomarker_tag is not None:
                    channel_name = biomarker_tag.text
                else:
                    name_tag = description.find('Name')
                    if name_tag is not None:
                        channel_name = name_tag.text
                    else:
                        channel_name = None
                channel_names.append(channel_name)
            return channel_names
        elif self.pages.kind.lower() == 'ome':
            # If the qptiff has OME metadata (like Lunaphore COMET)
            # read it from the OME XML tag.
            xml = tifffile.tiffcomment(self.filepath)
            assert xml
            ome = ome_types.from_xml(xml)
            assert len(ome.images) == 1
            channel_names = [channel.name for channel in ome.images[0].pixels.channels]
            return channel_names
        elif self.pages.kind.lower() == 'imagej':
            # ImageJ compatible TIFFs have yet another way to store metadata.
            # The channel names are stored under the "Labels" key in this dict.
            return self.tif.imagej_metadata['Labels']
        else:
            return []

    # def _get_pixel_size_um(self):
    #     page = self.pages[0]  # pick the first channel, the resolution of all channels is assumed to be identical
    #     scale = 1000000 if page.tags['ResolutionUnit'].value == tifffile.RESUNIT.NONE else None  # ImageJ compatible TIFFs (sometimes?) don't specify the ResolutionUnit tag, but assume micrometers
    #     resolution = page.get_resolution(tifffile.RESUNIT.MICROMETER, scale=scale)
    #     assert resolution[0] == resolution[1], "(qp)tiff x and y resolution are not the same"
    #     pixelsize = 1 / resolution[0]
    #     return pixelsize
    
    def _get_pixel_size_um(self) -> float | None:
        # If the TIFF file is an OME TIFF, and the OME XML tag specifies a physical size, then return this value
        if self.tif.is_ome:
            pixelsize = self._ome_get_physical_size_x_um()
            if pixelsize is not None:
                return pixelsize
            
        # If the file is not OME, or the OME XML file did not specify a pixelsize,
        # then attempt to get it from the regular TIFF metadata tags.
        page = self.pages[0]  # pick the first channel, the resolution of all channels is assumed to be identical
        unit = page.tags['ResolutionUnit'].value
        if unit == tifffile.RESUNIT.NONE:
            return None  # if it's an ImageJ TIFF file, we could check TiffTag 270 ImageDescription, which may contain a units specification, but for now we simply don't support this case. Neither does QuPath, it seems.
        resolution = page.get_resolution(tifffile.RESUNIT.MICROMETER)
        assert resolution[0] == resolution[1], "(qp)tiff x and y resolution are not the same"
        pixelsize = 1 / resolution[0]
        return pixelsize
        
        
    def _ome_get_physical_size_x_um(self) -> float | None:
        assert self.tif.is_ome
        ome_xml = self.tif.ome_metadata
        ome = ome_types.from_xml(ome_xml, validate=False)
        pixels = ome.images[0].pixels
        size = pixels.physical_size_x
        if size is None:
            return None
        match pixels.physical_size_x_unit:
            case UnitsLength.NANOMETER:
                return size / 1000.0
            case UnitsLength.MICROMETER:
                return size
            case UnitsLength.MILLIMETER:
                return size * 1000.0
            case UnitsLength.CENTIMETER:
                return size * 10000.0
            case _:
                assert False, f"OME unit {pixels.physical_size_x_unit} is not supported"

    # Note about tiff/qptiff pages:
    # - TiffFile.pages[] has *all* the images in the qptiff file,
    #   including the different lower-resolution levels in the image pyramid,
    #   the overview and thumbnail images etc.
    # - TiffFile.series[0].pages[] on the other hand only stores the full resolution
    #   images with the actual image data, which is exactly what we need.

#### Create spatialdata object with multiscale image:

In [None]:
# Takes about 8 min on mac OS M2

zarr_path = os.path.join( output_path, "sdata_phenocycler.zarr" )

with Tiff(input_path) as tif:
    channel_names = tif.channel_names
    pixel_size_um = tif.pixel_size_um

with Tiff(input_path) as tif:
           with tif.dask_array() as dask_img:
               hp.io.create_sdata(
                   dask_img,
                   dims=['c', 'y', 'x'],
                   output_path=zarr_path,
                   img_layer="scan",
                   chunks=2048,
                   scale_factors=[2, 2, 2, 2, 2],
                   c_coords=tif.channel_names
               )

In [None]:
import os
from spatialdata import read_zarr

sdata=read_zarr( os.path.join( output_path, "sdata_phenocycler.zarr" ))

Exercise:

crop out each region, and save each one separately in its own coordinate space:

- read in the polygons that define each region (`geopandas.read_file(...)`).
- add the polygons to the SpatialData object (`harpy.sh.add_shapes_layer(...)`).
- rasterize the polygons (creation of the corresponding mask) ( `harpy.im.rasterize(...)`).
- extract the region (`dask.where(...)`).
- add the image to the SpatialData object (`harpy.im.add_image_layer(...)`).

In [None]:
import geopandas as gpd
from spatialdata.transformations import Translation
from harpy.image._image import _get_spatial_element

# takes 3 min on macOS M2

tma_2_1_2 = "D231-RLL-14C2-HM-TMA-2-1-2.geojson" # region 1
tma_2_2_2 = "D239-RLL-19C2-HM-TMA-2-2-2.geojson" 
tma_2_3_2 = "D260-RLL-12C2-HM-TMA-2-3-2.geojson" # region 3
tma_2_4_2 = "D264-LLL-11C1-HM-TMA-2-4-2.geojson" # region 4
tma_2_5_2 = "D264-LLL-11C2-HM-TMA-2-5-2.geojson"
tma_2_6_2 = "D265-RLL-10C2-HM-TMA-2-6-2.geojson" # region 6
tma_2_7_2 = "D271-RLL-13D4-HM-TMA-2-7-2.geojson" # region 7
tma_2_8_2 = "D292-RLL-14C3-HM-TMA-2-8-2.geojson" 
tma_2_9_2 = "D341-RLL-14C2-HM-TMA-2-9-2.geojson"
tma_2_10_2 = "D346-RLL-17C3-HM-TMA-2-10-2.geojson" # region 10

tma_paths = [ tma_2_1_2, tma_2_2_2, tma_2_3_2, tma_2_4_2, tma_2_5_2, tma_2_6_2, tma_2_7_2, tma_2_8_2, tma_2_9_2, tma_2_10_2 ]
tma_name =  [ "region1", "region2", "region3", "region4", "region5", "region6", "region7", "region8", "region9", "region10" ]

se_image = _get_spatial_element( sdata, layer = "scan" )

for _tma_path, _tma_name in zip(tma_paths, tma_name, strict=True):

    # Read the GeoJSON file
    gdf = gpd.read_file( os.path.join( data_path, _tma_path ) )
    gdf.index = gdf.index +1
    sdata = hp.sh.add_shapes_layer( sdata, input=gdf, output_layer=_tma_name, overwrite=True )

    # rasterize the shapes layer
    sdata = hp.im.rasterize(
        sdata,
        shapes_layer=_tma_name,
        output_layer=f"{_tma_name}_labels",
        out_shape= se_image.shape[ 1: ],
        chunks = 2048,
        overwrite=True,
        scale_factors=[ 2,2,2,2,2 ],
        )
    
    se_mask = _get_spatial_element(sdata, layer=f"{_tma_name}_labels" )

    mask = se_mask.data
    image = se_image.data

    mask = mask[None, ...]

    # create a mask
    masked_image = da.where(mask == 1, image, 0)

    x_min, y_min, x_max, y_max = sdata[ _tma_name ].geometry.total_bounds.astype( int )

    translation = Translation( translation=[x_min, y_min], axes = ("x", "y") )

    masked_image = masked_image[ :, y_min: y_max, x_min: x_max ]

    sdata = hp.im.add_image_layer(
        sdata,
        arr = masked_image.rechunk( 2048 ),
        output_layer=f"scan_{_tma_name}",
        transformations={ _tma_name: translation },
        c_coords=se_image.c.data,
        overwrite=True,
        )

In [None]:
import spatialdata_plot
from matplotlib.colors import Normalize

vmax = 500
vmin = 100

norm = Normalize(vmax=vmax, vmin=vmin, clip=True)

sdata.pl.render_images(
    element = "scan",
    channel = "DAPI",
    scale = "scale3",
    cmap = "gray",
    norm = norm,
    ).pl.show()

Exercise:

Plot each region using either `harpy` or `spatialdata-plot`.

<details>
<summary>Click to reveal the solution</summary>

```python

for _tma_name in tma_name:
    print( _tma_name )
    hp.pl.plot_image(
        sdata,
        img_layer=f"scan_{_tma_name}",
        channel="DAPI",
        figsize=(5,5),
        to_coordinate_system=_tma_name,
          )

In [None]:
sdata

#### Select 4 channels (DAPI, CD45, CD68, CD14), and segment using InstanSeg.

For optimal segmentation results a more careful selection of channels is probably necessary.

In [None]:
hp.pl.plot_image(
    sdata,
    img_layer= "scan_region1",
    channel = "CD45",
    to_coordinate_system = "region1",
    vmin_img = 50,
    vmax_img = 1000,
    figsize = (8,8),
      )

In [None]:
hp.pl.plot_image(
    sdata,
    img_layer= "scan_region1",
    channel = "CD68",
    to_coordinate_system = "region1",
    vmin_img = 50,
    vmax_img = 1000,
    figsize = (8,8),
      )

In [None]:
hp.pl.plot_image(
    sdata,
    img_layer = "scan_region1",
    channel = "CD14",
    to_coordinate_system = "region1",
    vmin_img = 200,
    vmax_img = 500,
    figsize = (8,8),
      )

In [None]:
import os
import requests
import zipfile
import tempfile

OUTPUT_DIR =  tempfile.gettempdir()

def download_and_unzip(url, extract_to):
    try:
        os.makedirs(extract_to, exist_ok=False)
    except FileExistsError:
        print("Model already downloaded.")
        return
    local_zip_path = os.path.join(extract_to, 'downloaded.zip')
    print("Downloading...")
    response = requests.get(url, stream=True)
    response.raise_for_status()

    with open(local_zip_path, 'wb') as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)

    print("Unzipping...")
    with zipfile.ZipFile(local_zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
    os.remove(local_zip_path)
    print(f"Done! Files extracted to: {extract_to}")

url = "https://github.com/instanseg/instanseg/releases/download/instanseg_models_v0.1.0/fluorescence_nuclei_and_cells.zip"
target_path = os.path.join(OUTPUT_DIR, "fluorescence_nuclei_and_cells" )
download_and_unzip(url, target_path)

#### Make a new image layer with the selection of channels

Exercise:

- List all channels.
- Subset image with name `scan_region1` with the selection of channels (  `[ "DAPI", "CD45", "CD68", "CD14" ]` ).
- Add subsetted image to the SpatialData object.

<details>
<summary>Click to reveal the solution</summary>

```python

sdata[ "scan_region1" ].c.data

#sdata[ "scan_region1" ].c.data.tolist()
labels = [ "DAPI", "CD45", "CD68", "CD14" ]

array_channels = sdata[ "scan_region1" ].c.data
label_to_index = {label: idx for idx, label in enumerate(array_channels)}
indices = [label_to_index[label] for label in labels]

from spatialdata.transformations import get_transformation

transformations = get_transformation( sdata[ "scan_region1" ], get_all=True )

sdata = hp.im.add_image_layer(
    sdata,
    arr = sdata[ "scan_region1" ].data[ indices ],
    output_layer = "scan_region1_subset",
    transformations=transformations,
    c_coords = labels,
    overwrite=True,
)

In [None]:
sdata[ "scan_region1" ].c.data

In [16]:
#sdata[ "scan_region1" ].c.data.tolist()
labels = [ "DAPI", "CD45", "CD68", "CD14" ]

array_channels = sdata[ "scan_region1" ].c.data
label_to_index = {label: idx for idx, label in enumerate(array_channels)}
indices = [label_to_index[label] for label in labels]

In [None]:
from spatialdata.transformations import get_transformation

transformations = get_transformation( sdata[ "scan_region1" ], get_all=True )

sdata = hp.im.add_image_layer(
    sdata,
    arr = sdata[ "scan_region1" ].data[ indices ],
    output_layer = "scan_region1_subset",
    transformations=transformations,
    c_coords = labels,
    overwrite=True,
)

In [None]:
from dask.distributed import Client, LocalCluster

# # Create a local Dask cluster
cluster = LocalCluster(
     n_workers=4,              # Number of worker processes. Possible to increase to more workers, depending on available memory/cores
     threads_per_worker=1,    # Number of threads per worker
     memory_limit="32GB",      # Memory limit per worker
 )

# # Connect a Client to the cluster
client = Client(cluster)

# # Print the Dask dashboard link
print(client.dashboard_link)

In [None]:
import harpy as hp
import os

import torch
from instanseg import InstanSeg

# takes approx 6 minutes on macOS

path_model = os.path.join( target_path, "instanseg.pt" )

instanseg_fluorescence = torch.load( path_model, weights_only=False )
instanseg_fluorescence = InstanSeg( model_type=instanseg_fluorescence, device="cpu" )

crd_segment = None # [ 12000, 14000, 44000, 46000 ]

sdata = hp.im.segment(
    sdata,
    img_layer="scan_region1_subset",
    output_labels_layer=["labels_cells_instanseg"],
    output_shapes_layer=["shapes_cells_instanseg"],
    labels_layer_align=None,
    depth=50,
    model=hp.im.instanseg_callable,
    # parameters passed to hp.im.instanseg_callable
    output="cells",
    device="cpu",
    instanseg_model=path_model,  # load it in every worker, because torchscript model is not serializable
    iou=True,
    trim=False,
    crd=crd_segment,
    to_coordinate_system="region1",
    overwrite=True,
)

client.close()

In [21]:
labels = [ "DAPI", "CD45", "CD68", "CD14" ]

In [None]:
hp.pl.plot_shapes(
    sdata,
    img_layer= "scan_region1",
    shapes_layer = "shapes_cells_instanseg",
    channel="DAPI",
    to_coordinate_system="region1",
    vmin_img = 50,
    vmax_img = 500,
    alpha=0.5,
    crd = [12000, 13000, 44000, 45000 ],
    figsize = (8,8),
      )

In [None]:
hp.pl.plot_shapes(
    sdata,
    img_layer = "scan_region1",
    shapes_layer = "shapes_cells_instanseg",
    channel = "CD45",
    to_coordinate_system = "region1",
    vmin_img = 50,
    vmax_img = 500,
    alpha=0.2,
    crd = [12000, 13000, 44000, 45000 ],
    figsize = (8,8),
      )

In [None]:
hp.pl.plot_shapes(
    sdata,
    img_layer= "scan_region1",
    shapes_layer = "shapes_cells_instanseg",
    channel = "CD68",
    to_coordinate_system = "region1",
    vmin_img = 200,
    vmax_img = 500,
    alpha=0.4,
    crd = [12000, 13000, 44000, 45000 ],
    figsize = (8,8),
      )

In [None]:
hp.pl.plot_shapes(
    sdata,
    img_layer= "scan_region1",
    shapes_layer = "shapes_cells_instanseg",
    channel="CD14",
    to_coordinate_system="region1",
    vmin_img = 200,
    vmax_img = 500,
    alpha=0.2,
    crd = [12000, 13000, 44000, 45000 ],
    figsize = (8,8),
      )

In [146]:
#from spatialdata import read_zarr
#sdata = read_zarr( sdata.path )

In [26]:
# we want to extract all channels
from spatialdata.transformations import get_transformation
from spatialdata.transformations import Translation

if crd_segment is not None:

    transformations = get_transformation( sdata[ "scan_region1" ], get_all=True )
    y_translation=transformations[ "region1" ].to_affine_matrix( input_axes=("y", "x" ), output_axes=( "y", "x" ) )[ 0, 2 ]
    x_translation=transformations[ "region1" ].to_affine_matrix( input_axes=("y", "x" ), output_axes=( "y", "x" ) )[ 1, 2 ]
    
    array=sdata[ "scan_region1" ].data[ :, crd_segment[ 2 ]-y_translation: crd_segment[3]-y_translation, crd_segment[0]-x_translation: crd_segment[1]-x_translation ]
    
    sdata = hp.im.add_image_layer(
        sdata,
        arr=array.rechunk( 2048 ),
        output_layer="scan_region1_crop",
        transformations={ "region1": Translation( axes = ( "y", "x" ), translation=[ crd_segment[2], crd_segment[0] ] ) },
        c_coords=sdata[ "scan_region1" ].c.data,
        overwrite=True,
     )

In [None]:
channels = sdata[ "scan_region1" ].c.data
channels = channels[ channels!="DAPI" ]
channels

Exercise: 

Use the segmentation mask (`labels_cells_instanseg`) to create the AnnData table with the intensities for every channel.

<details>
<summary>Click to reveal the solution</summary>

```python

sdata = hp.tb.allocate_intensity( 
    sdata,
    img_layer= "scan_region1" if crd_segment is None else "scan_region1_crop",
    labels_layer="labels_cells_instanseg",
    output_layer="table_intensities",
    channels=channels,
    mode= "sum",
    to_coordinate_system="region1",
    overwrite=True,
        )

In [30]:
sdata = hp.tb.allocate_intensity( 
    sdata,
    img_layer= "scan_region1" if crd_segment is None else "scan_region1_crop",
    labels_layer="labels_cells_instanseg",
    output_layer="table_intensities",
    channels=channels,
    mode= "sum",
    to_coordinate_system="region1",
    overwrite=True,
        )

In [None]:
sdata[ "table_intensities" ].to_df().head()

In [None]:
sdata=hp.tb.preprocess_proteomics(
    sdata,
    labels_layer="labels_cells_instanseg",
    table_layer="table_intensities",
    output_layer="table_intensities_prepocessed",
    size_norm=True,
    overwrite=True,
      )

In [None]:
sdata[ "table_intensities_prepocessed" ].to_df().head()

In [None]:
import scanpy as sc

# Leiden clustering
sdata = hp.tb.leiden(
    sdata,
    labels_layer="labels_cells_instanseg",
    table_layer="table_intensities_prepocessed",
    output_layer="table_intensities_leiden",
    calculate_umap=True,
    calculate_neighbors=True,
    n_pcs=17, # The number of principal components to use when calculating neighbors.
    n_neighbors=35, # The number of neighbors to consider when calculating neighbors.
    resolution=0.4,
    rank_genes=True,
    key_added="leiden",
    overwrite=True,
)

# Plot UMAP
sc.pl.umap(sdata.tables["table_intensities_leiden"], color=["leiden"], show=True)

sc.pl.rank_genes_groups(
    sdata.tables["table_intensities_leiden"],
    n_genes=8,
    sharey=False,
    show=True,
    )

In [None]:
hp.pl.plot_shapes( 
    sdata,
    img_layer="scan_region1",
    table_layer="table_intensities_leiden",
    shapes_layer="shapes_cells_instanseg",
    column="leiden",
    channel="DAPI",
    linewidth=0.2,
    alpha=0.7,
    figsize=( 8,8 ),
    to_coordinate_system="region1",
 )

In [None]:
#from napari_spatialdata import Interactive

#Interactive( sdata )
sdata[ "scan_region1" ]

#### FlowSOM clustering

First we create a crop.

In [39]:
from spatialdata import read_zarr

sdata = read_zarr( sdata.path )

In [40]:
from spatialdata import bounding_box_query

se=bounding_box_query(
    sdata[ "scan_region1" ],
    axes = ( "y", "x" ),
    min_coordinate=[ 42000, 11000 ],
    max_coordinate=[ 44000, 13000 ],
    target_coordinate_system = "region1",
        )

sdata[ "scan_region1_flowsom_crop" ] = se
sdata.write_element(
    "scan_region1_flowsom_crop", overwrite=True
      )


In [None]:
hp.pl.plot_image(
    sdata,
    img_layer="scan_region1_flowsom_crop",
    to_coordinate_system="region1",
    channel="DAPI",
    figsize=(5,5)
      )

In [None]:
channels = sdata[ "scan_region1_flowsom_crop" ].c.data
channels = channels[ channels!="DAPI" ]
channels

#### Preprocessing before flowsom clustering.

In [None]:
sdata = hp.im.pixel_clustering_preprocess( 
    sdata,
    img_layer="scan_region1_flowsom_crop",
    output_layer="scan_region1_flowsom_preprocessed",
    channels=channels,
    overwrite=True,
 )

In [None]:
sdata[ "scan_region1_flowsom_preprocessed" ].c.data

In [None]:
hp.pl.histogram(
    sdata,
    img_layer="scan_region1_flowsom_crop",
    channel="CD68",
    bins=100,
    fig_kwargs={
        "figsize": (4, 4),
    },
)

# smoothing is applied

hp.pl.histogram(
    sdata,
    img_layer="scan_region1_flowsom_preprocessed",
    channel="CD68",
    bins=100,
    fig_kwargs={
        "figsize": (4, 4),
    },
)

#### FlowSOM clustering:

In [None]:
import flowsom as fs
from dask.distributed import Client, LocalCluster

work_with_client = False

if work_with_client:
    # client example
    cluster = LocalCluster(
        n_workers=1,
        threads_per_worker=10,
    )

    client = Client(cluster)
else:
    client = None

batch_model = fs.models.BatchFlowSOMEstimator

sdata, fsom, mapping = hp.im.flowsom(
    sdata,
    img_layer=["scan_region1_flowsom_preprocessed"],
    output_layer_clusters=[
        "scan_region_1_flowsom_clusters",
    ],  # we need output_cluster_layer and output_meta_cluster_layer --> these will both be labels layers
    output_layer_metaclusters=[
        "scan_region_1_flowsom_metaclusters",
    ],
    n_clusters=20,
    random_state=111,
    chunks=512,
    client=client,
    model=batch_model,
    num_batches=10,
    xdim=10,
    ydim=10,
    z_score=True,
    z_cap=3,
    persist_intermediate=True,
    overwrite=True,
)

In [None]:
sdata = hp.tb.cluster_intensity(
    sdata,
    mapping=mapping,
    img_layer=["scan_region1_flowsom_preprocessed"],
    labels_layer=["scan_region_1_flowsom_clusters"],
    to_coordinate_system=["region1"],
    output_layer="counts_clusters",
    overwrite=True,
)

In [None]:
sdata[ "scan_region_1_flowsom_clusters" ]

In [49]:
from spatialdata import read_zarr

sdata=read_zarr( sdata.path )

In [None]:
from spatialdata import SpatialData
from spatialdata import read_zarr

# Due to weird bug in spatialdata plot, we need to use this temporary spatial data object
sdata_temp = SpatialData()
sdata_temp[  "scan_region_1_flowsom_clusters" ] = sdata[ "scan_region_1_flowsom_clusters" ]
sdata_temp[  "scan_region_1_flowsom_metaclusters" ] = sdata[ "scan_region_1_flowsom_metaclusters" ]

hp.pl.pixel_clusters(
    sdata_temp,
    labels_layer="scan_region_1_flowsom_clusters",
    figsize=(10, 10),
    to_coordinate_system="region1",
    render_labels_kwargs={"alpha": 1},
)

hp.pl.pixel_clusters(
    sdata_temp,
    labels_layer="scan_region_1_flowsom_metaclusters",
    figsize=(10, 10),
    to_coordinate_system="region1",
    render_labels_kwargs={"alpha": 1},
)

In [None]:
for _metaclusters in [True, False]:
    hp.pl.pixel_clusters_heatmap(
        sdata,
        table_layer="counts_clusters",
        figsize=(40, 16),
        fig_kwargs={"dpi": 300},
        linewidths=0.001,
        metaclusters=_metaclusters,
        z_score=True,
    )

Analysis of the clusters:

Cluster 9 -> SOX2 and SOX9 <br>
Cluster 10 -> Podoplanin and LYVE1 <br>
Cluster 2 -> MUC5AC, Pan-Cytokeratin, KRT8,... <br>
Cluster 3 -> CD31, Caveolin <br>

...

In [None]:
import numpy as np
import squidpy as sq

key_added = "cluster_id"

adata = hp.tb.spatial_pixel_neighbors(
    sdata,
    labels_layer="scan_region_1_flowsom_metaclusters",
    key_added=key_added,
    mode="most_frequent",
    grid_type="hexagon",
    size=20,
    subset=None,
)

adata.uns[f"{key_added}_nhood_enrichment"]["zscore"] = np.nan_to_num(
    adata.uns[f"{key_added}_nhood_enrichment"]["zscore"]
)
sq.pl.nhood_enrichment(adata, cluster_key=key_added, method="ward", mode="zscore", figsize=(8, 8))

In [None]:
#from napari_spatialdata import Interactive

#Interactive( sdata )