In [None]:
import numpy as np
import pandas as pd
import os
import sys
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns
import scanpy as sc
import squidpy as sq
import spatialdata as sd
import spatialdata_io as sdio
import spatialdata_plot

from joblib import Parallel, delayed

from spatialdata.transformations import (
        Affine,
        Identity,
        MapAxis,
        Scale,
        Sequence,
        Translation,
        get_transformation,
        get_transformation_between_coordinate_systems,
        set_transformation,
    )

prjdir = os.path.abspath(os.path.join(os.getcwd(), '../..'))
if prjdir not in sys.path:
    sys.path.append(prjdir)

n_jobs=32
sc.settings.n_jobs=n_jobs
sc.set_figure_params(dpi=100, frameon=True, vector_friendly=True, fontsize=10)

from matplotlib.colors import LinearSegmentedColormap
cmap = LinearSegmentedColormap.from_list('grey_to_blue', ['lightgrey', 'mediumblue'])

import warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

In [None]:
%load_ext autoreload
%autoreload 2
import src.spatial_helpers.spatial as spp
import src.spatial_helpers.spatialplot as spl
import src.spatial_helpers.sc as scp
scp.set_all_seeds()

In [None]:
rawdir = '../../data/public/bioimagearchive' # raw data dir (with subdirs crca_xenium_slide1, crca_xenium_slide2, ...)
samplesdir = '../../data/xenium_samples' # dir to save single samples
datadir = '../../data/xenium_merged' # dir to save generated data
resultsdir = '../../data/xenium_results' # dir to save results
os.makedirs(resultsdir, exist_ok=True)

## Import data and save individual samples

In [None]:
from spatialdata_io._constants._constants import XeniumKeys
channel_names = {
        0: XeniumKeys.MORPHOLOGY_FOCUS_CHANNEL_0,
        1: XeniumKeys.MORPHOLOGY_FOCUS_CHANNEL_1,
        2: XeniumKeys.MORPHOLOGY_FOCUS_CHANNEL_2,
        3: XeniumKeys.MORPHOLOGY_FOCUS_CHANNEL_3,
        4: 'dummy',
    }
image_models_kwargs = {}
image_models_kwargs['c_coords'] = list(channel_names.values())

### Run 1 (Slides 1 & 2)

In [None]:
sample_out = 'slide1_region1'
sdata = sdio.xenium(os.path.join(rawdir, 'crca_xenium_slide1', sample_out), morphology_focus=False, cells_as_circles=True, n_jobs=n_jobs)
morphology_focus = os.path.join(rawdir, 'crca_xenium_slide1', sample_out, XeniumKeys.MORPHOLOGY_FOCUS_DIR, XeniumKeys.MORPHOLOGY_FOCUS_CHANNEL_IMAGE.format(0))
sdata.images['morphology_focus'] = sdio.xenium_aligned_image(morphology_focus, image_models_kwargs=image_models_kwargs, alignment_file=None)
sdata.shapes['sample_area'] = spp.import_shapes(os.path.join(rawdir, 'crca_xenium_slide1', 'slide1_region1_coordinates.csv'))
sdata = spp.shapes_to_cells(sdata, shapes='sample_area', key='sample_id', n_cores=n_jobs)
sdata['table'].obs['run'] = 1
sdata['table'].obs['slide'] = 1
sdata['table'].obs['sample_id'].value_counts()
spp.save_subset_shapes(sdata, shapes='sample_area', samplesdir=samplesdir, coordinate_system='global', n_jobs=4)

In [None]:
sample_out = 'slide1_region2'
sdata = sdio.xenium(os.path.join(rawdir, 'crca_xenium_slide1', sample_out), morphology_focus=False, cells_as_circles=True, n_jobs=n_jobs)
morphology_focus = os.path.join(rawdir, 'crca_xenium_slide1', sample_out, XeniumKeys.MORPHOLOGY_FOCUS_DIR, XeniumKeys.MORPHOLOGY_FOCUS_CHANNEL_IMAGE.format(0))
sdata.images['morphology_focus'] = sdio.xenium_aligned_image(morphology_focus, image_models_kwargs=image_models_kwargs, alignment_file=None)
sdata.shapes['sample_area'] = spp.import_shapes(os.path.join(rawdir, 'crca_xenium_slide1', 'slide1_region2_coordinates.csv'))
sdata = spp.shapes_to_cells(sdata, shapes='sample_area', key='sample_id', n_cores=n_jobs)
sdata['table'].obs['run'] = 1
sdata['table'].obs['slide'] = 1
sdata['table'].obs['sample_id'].value_counts()
spp.save_subset_shapes(sdata, shapes='sample_area', samplesdir=samplesdir, coordinate_system='global', n_jobs=1)

In [None]:
sample_out = 'slide2_region1'
sdata = sdio.xenium(os.path.join(rawdir, 'crca_xenium_slide2', sample_out), morphology_focus=False, cells_as_circles=True, n_jobs=n_jobs)
morphology_focus = os.path.join(rawdir, 'crca_xenium_slide2', sample_out, XeniumKeys.MORPHOLOGY_FOCUS_DIR, XeniumKeys.MORPHOLOGY_FOCUS_CHANNEL_IMAGE.format(0))
sdata.images['morphology_focus'] = sdio.xenium_aligned_image(morphology_focus, image_models_kwargs=image_models_kwargs, alignment_file=None)
sdata.shapes['sample_area'] = spp.import_shapes(os.path.join(rawdir, 'crca_xenium_slide2', 'slide2_region1_coordinates.csv'))
sdata = spp.shapes_to_cells(sdata, shapes='sample_area', key='sample_id', n_cores=n_jobs)
sdata['table'].obs['run'] = 1
sdata['table'].obs['slide'] = 2
sdata['table'].obs['sample_id'].value_counts()
spp.save_subset_shapes(sdata, shapes='sample_area', samplesdir=samplesdir, coordinate_system='global', n_jobs=1)

### Run 2 (Slides 3 & 4)

In [None]:
samples_dict_1 = {
    '0': {'ID': 'run2_crc_d_c', 'sample_id': 'crc_d_c', 'channels': channel_names, 'dir': os.path.join(rawdir, 'crca_xenium_slide3')},
    '1': {'ID': 'run2_crc_f_n', 'sample_id': 'crc_f_n', 'channels': channel_names, 'dir': os.path.join(rawdir, 'crca_xenium_slide3')},
    '2': {'ID': 'run2_crc_d_n', 'sample_id': 'crc_d_n', 'channels': channel_names, 'dir': os.path.join(rawdir, 'crca_xenium_slide3')},
    '3': {'ID': 'run2_crc_e_m', 'sample_id': 'crc_e_m', 'channels': channel_names, 'dir': os.path.join(rawdir, 'crca_xenium_slide3')},
    '4': {'ID': 'run2_crc_d_m', 'sample_id': 'crc_d_m', 'channels': channel_names, 'dir': os.path.join(rawdir, 'crca_xenium_slide3')},
    '5': {'ID': 'run2_crc_e_c', 'sample_id': 'crc_e_c', 'channels': channel_names, 'dir': os.path.join(rawdir, 'crca_xenium_slide3')},
    '6': {'ID': 'run2_crc_e_n', 'sample_id': 'crc_e_n', 'channels': channel_names, 'dir': os.path.join(rawdir, 'crca_xenium_slide3')},
    '7': {'ID': 'run2_crc_f_c', 'sample_id': 'crc_f_c', 'channels': channel_names, 'dir': os.path.join(rawdir, 'crca_xenium_slide3')}
}

In [None]:
def read_sample(sample_info):
    from spatialdata_io._constants._constants import XeniumKeys
    image_models_kwargs = {}
    image_models_kwargs['c_coords'] = list(sample_info['channels'].values())
    sample_path = os.path.join(sample_info['dir'], sample_info['ID'])
    sdata = sdio.xenium(sample_path, morphology_focus=False, cells_as_circles=True, n_jobs=sc.settings.n_jobs)
    morphology_focus = os.path.join(sample_path, XeniumKeys.MORPHOLOGY_FOCUS_DIR, XeniumKeys.MORPHOLOGY_FOCUS_CHANNEL_IMAGE.format(0))
    sdata.images['morphology_focus'] = sdio.xenium_aligned_image(morphology_focus, image_models_kwargs=image_models_kwargs, alignment_file=None)
    sdata['table'].obs['sample_id'] = sample_info['sample_id']
    sdata['table'].obs['run'] = 2
    sdata['table'].obs['slide'] = 3
    sdata['sample_area'] = spp.get_enclosing_rect(sdata, name=sample_info['sample_id'])
    return sample_info['sample_id'], sdata

In [None]:
results = Parallel(n_jobs=n_jobs)(delayed(read_sample)(samples_dict_1[sample]) for sample in samples_dict_1)
samples = {sample_id: sdata for sample_id, sdata in results}

In [None]:
for s in list(samples):
    samples[s].write(os.path.join(samplesdir, 'run2_' + s + '.zarr'), overwrite=True)

In [None]:
samples_dict_2 = {
    '8': {'ID': 'run2_crc_h_c', 'sample_id': 'crc_h_c', 'channels': channel_names, 'dir': os.path.join(rawdir, 'crca_xenium_slide4')},
    '9': {'ID': 'run2_crc_g_c', 'sample_id': 'crc_g_c', 'channels': channel_names, 'dir': os.path.join(rawdir, 'crca_xenium_slide4')},
    '10': {'ID': 'run2_crc_i_c', 'sample_id': 'crc_i_c', 'channels': channel_names, 'dir': os.path.join(rawdir, 'crca_xenium_slide4')},
    '11': {'ID': 'run2_crc_g_n', 'sample_id': 'crc_g_n', 'channels': channel_names, 'dir': os.path.join(rawdir, 'crca_xenium_slide4')},
    '12': {'ID': 'run2_crc_f_m', 'sample_id': 'crc_f_m', 'channels': channel_names, 'dir': os.path.join(rawdir, 'crca_xenium_slide4')},
    '13': {'ID': 'run2_crc_h_n', 'sample_id': 'crc_h_n', 'channels': channel_names, 'dir': os.path.join(rawdir, 'crca_xenium_slide4')},
}

In [None]:
def read_sample(sample_info):
    from spatialdata_io._constants._constants import XeniumKeys
    image_models_kwargs = {}
    image_models_kwargs['c_coords'] = list(sample_info['channels'].values())
    sample_path = os.path.join(sample_info['dir'], sample_info['ID'])
    sdata = sdio.xenium(sample_path, morphology_focus=False, cells_as_circles=True, n_jobs=sc.settings.n_jobs)
    morphology_focus = os.path.join(sample_path, XeniumKeys.MORPHOLOGY_FOCUS_DIR, XeniumKeys.MORPHOLOGY_FOCUS_CHANNEL_IMAGE.format(0))
    sdata.images['morphology_focus'] = sdio.xenium_aligned_image(morphology_focus, image_models_kwargs=image_models_kwargs, alignment_file=None)
    sdata['table'].obs['sample_id'] = sample_info['sample_id']
    sdata['table'].obs['run'] = 2
    sdata['table'].obs['slide'] = 4
    sdata['sample_area'] = spp.get_enclosing_rect(sdata, name=sample_info['sample_id'])
    return sample_info['sample_id'], sdata

In [None]:
results = Parallel(n_jobs=n_jobs)(delayed(read_sample)(samples_dict_2[sample]) for sample in samples_dict_2)
samples = {sample_id: sdata for sample_id, sdata in results}

In [None]:
for s in list(samples):
    samples[s].write(os.path.join(samplesdir, 'run2_' + s + '.zarr'), overwrite=True)

### Run 3 (Slides 5 & 6)

Slide 5

In [None]:
sample_out = 'slide5_region1'
sdata = sdio.xenium(os.path.join(rawdir, 'crca_xenium_slide5', sample_out), morphology_focus=False, cells_as_circles=True, n_jobs=n_jobs)

In [None]:
morphology_focus = os.path.join(rawdir, 'crca_xenium_slide5', sample_out, XeniumKeys.MORPHOLOGY_FOCUS_DIR, XeniumKeys.MORPHOLOGY_FOCUS_CHANNEL_IMAGE.format(0))
sdata.images['morphology_focus'] = sdio.xenium_aligned_image(morphology_focus, image_models_kwargs=image_models_kwargs, alignment_file=None)

In [None]:
sdata.shapes['sample_area'] = spp.import_shapes(os.path.join(rawdir, 'crca_xenium_slide5', 'slide5_region1_coordinates.csv'))
sdata = spp.shapes_to_cells(sdata, shapes='sample_area', key='sample_id', n_cores=n_jobs)

In [None]:
sdata['table'].obs['run'] = 3
sdata['table'].obs['slide'] = 5
sdata['table'].obs['sample_id'].value_counts()

In [None]:
ax = spl.spatialplot(sdata, figsize=(8,8), coordinate_system='global')
sample_areas = sdata['sample_area']['name'].unique()
for s in sample_areas:
    polygon = sdata['sample_area'][sdata['sample_area']['name'] == s].geometry.iloc[0]
    poly_patch = patches.Polygon(list(polygon.exterior.coords), linewidth=2, edgecolor='red', facecolor='none')
    ax.add_patch(poly_patch)
plt.show()

In [None]:
spp.save_subset_shapes(sdata, shapes='sample_area', samplesdir=samplesdir, coordinate_system='global', n_jobs=1)

Slide 6

In [None]:
sample_out = 'slide6_region1'
sdata = sdio.xenium(os.path.join(rawdir, 'crca_xenium_slide6', sample_out), morphology_focus=False, cells_as_circles=True, n_jobs=n_jobs)

In [None]:
morphology_focus = os.path.join(rawdir, 'crca_xenium_slide6', sample_out, XeniumKeys.MORPHOLOGY_FOCUS_DIR, XeniumKeys.MORPHOLOGY_FOCUS_CHANNEL_IMAGE.format(0))
sdata.images['morphology_focus'] = sdio.xenium_aligned_image(morphology_focus, image_models_kwargs=image_models_kwargs, alignment_file=None)

In [None]:
sdata.shapes['sample_area'] = spp.import_shapes(os.path.join(rawdir, 'crca_xenium_slide6', 'slide6_region1_coordinates.csv'))
sdata = spp.shapes_to_cells(sdata, shapes='sample_area', key='sample_id', n_cores=n_jobs)

In [None]:
sdata['table'].obs['run'] = 3
sdata['table'].obs['slide'] = 6
sdata['table'].obs['sample_id'].value_counts()

In [None]:
ax = spl.spatialplot(sdata, figsize=(8,8), coordinate_system='global')
sample_areas = sdata['sample_area']['name'].unique()
for s in sample_areas:
    polygon = sdata['sample_area'][sdata['sample_area']['name'] == s].geometry.iloc[0]
    poly_patch = patches.Polygon(list(polygon.exterior.coords), linewidth=2, edgecolor='red', facecolor='none')
    ax.add_patch(poly_patch)
plt.show()

In [None]:
spp.save_subset_shapes(sdata, shapes='sample_area', samplesdir=samplesdir, coordinate_system='global', n_jobs=1)

## Combine samples from different runs

### Load samples

In [None]:
sample_dict = {
    
    # Run 1
    'crc_a_c': 'run1_crc_a_c',
    'crc_a_m': 'run1_crc_a_m',
    'crc_b_c': 'run1_crc_b_c',
    'crc_b_m': 'run1_crc_b_m',
    'crc_c_c': 'run1_crc_c_c',
    'crc_c_m': 'run1_crc_c_m',
    'crc_d_c': 'run1_crc_d_c',
    'crc_d_m': 'run1_crc_d_m',
    'crc_o_c': 'run1_crc_o_c',
    
    # Run 2
    'crc_e_n': 'run2_crc_e_n',
    'crc_e_m': 'run2_crc_e_m',
    'crc_e_c': 'run2_crc_e_c',
    'crc_d_n': 'run2_crc_d_n',
    'crc_f_n': 'run2_crc_f_n',
    'crc_f_c': 'run2_crc_f_c',
    'crc_f_m': 'run2_crc_f_m',
    'crc_g_n': 'run2_crc_g_n',
    'crc_g_c': 'run2_crc_g_c',
    'crc_h_c': 'run2_crc_h_c',
    'crc_i_c': 'run2_crc_i_c',

    # Run 3
    'crc_h_n': 'run3_crc_h_n',
    'crc_i_n': 'run3_crc_i_n',
    'crc_j_n': 'run3_crc_j_n',
    'crc_j_m': 'run3_crc_j_m',
    'crc_j_c': 'run3_crc_j_c',
    'crc_k_n': 'run3_crc_k_n',
    'crc_k_m': 'run3_crc_k_m',
    'crc_k_c': 'run3_crc_k_c',
    'crc_l_n': 'run3_crc_l_n',
    'crc_l_m': 'run3_crc_l_m',
    'crc_l_c': 'run3_crc_l_c',
    'crc_m_n': 'run3_crc_m_n',
    'crc_m_m': 'run3_crc_m_m',
    'crc_m_c': 'run3_crc_m_c',
    'crc_n_n': 'run3_crc_n_n',
    'crc_n_m': 'run3_crc_n_m',
    'crc_n_c': 'run3_crc_n_c',

}

samples = spp.load_samples(sample_dict, samplesdir, n_jobs=n_jobs)

In [None]:
sample_files = [s + '.zarr' for s in list(sample_dict.values())]
[file for file in sample_files if file not in os.listdir(samplesdir)]

In [None]:
sample_files = [s + '.zarr' for s in list(sample_dict.values())]
[file for file in os.listdir(samplesdir) if file not in sample_files]

### Coordinate transformations

In [None]:
### Centering
with warnings.catch_warnings():
    warnings.simplefilter("ignore", UserWarning)
    elements=['morphology_focus', 'cell_labels', 'nucleus_labels', 'cell_circles', 'transcripts', 'cell_boundaries', 'nucleus_boundaries'] # '
    samples = spp.par_apply_coord_transformation(samples, elements, rotations=None, translations=None, target_coordinate_system='global', center=True, n_jobs=n_jobs)

In [None]:
for s in samples:
    elems = [s+'_morphology_focus', s+'_cell_labels', s+'_nucleus_labels', s+'_cell_circles', s+'_transcripts', s+'_cell_boundaries', s+'_nucleus_boundaries', s+'_sample_area']
    samples[s] = spp.transform_xenium_coords(samples[s], coordinate_system_µm='global_µm', elements_px=elems, elements_μm=[])

In [None]:
### Rotations
rotations = {
    
    # Run 1
    'crc_a_c': 0,
    'crc_a_m': 0,
    'crc_b_c': 10,
    'crc_b_m': 0,
    'crc_c_c': 20,
    'crc_c_m': 0,
    'crc_d_c': 0,
    'crc_d_m': 60,
    'crc_o_c': 60,
    
    # Run 2
    'crc_e_n': 0,
    'crc_e_m': 0,
    'crc_e_c': 0,
    'crc_d_n': 0,
    'crc_f_n': 10,
    'crc_f_c': 355,
    'crc_f_m': 0,
    'crc_g_n': 85,
    'crc_g_c': 0,
    'crc_h_c': 0,
    'crc_i_c': 45,

    # Run 3
    'crc_h_n': 90,
    'crc_i_n': 275,
    'crc_j_n': 5,
    'crc_j_m': 0,
    'crc_j_c': 0,
    'crc_k_n': 15,
    'crc_k_m': 30,
    'crc_k_c': 20,
    'crc_l_n': 0,
    'crc_l_m': 10,
    'crc_l_c': 10,
    'crc_m_n': 350,
    'crc_m_m': 15,
    'crc_m_c': 325,
    'crc_n_n': 0,
    'crc_n_m': 0,
    'crc_n_c': 355,

}

In [None]:
all(key in rotations for key in samples)

In [None]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore", UserWarning)
    elements=['sample_area', 'cell_boundaries', 'cell_circles', 'transcripts', 'morphology_focus']
    samples = spp.par_apply_coord_transformation(samples, elements, rotations=rotations, translations=None, target_coordinate_system='rot_µm', prior_coordinate_system='global_µm', center=True, n_jobs=n_jobs)

In [None]:
### Global layout
sample_dims = {}
for s in list(samples):
    sample_dims[s] = spp.get_sample_dim(samples[s], s+'_morphology_focus')

In [None]:
layout = [
    ['crc_a_c', 'crc_b_c', 'crc_c_c', 'crc_d_c', 'crc_e_c', 'crc_f_c', 'crc_g_c', 'crc_h_c', 'crc_i_c', 'crc_j_c', 'crc_k_c', 'crc_l_c', 'crc_m_c', 'crc_n_c', 'crc_o_c'],
    ['crc_a_m', 'crc_b_m', 'crc_c_m', 'crc_d_m', 'crc_e_m', 'crc_f_m', None,      None,        None,      'crc_j_m', 'crc_k_m', 'crc_l_m', 'crc_m_m', 'crc_n_m', None],
    [None,      None,      None,      'crc_d_n', 'crc_e_n', 'crc_f_n', 'crc_g_n', 'crc_h_n', 'crc_i_n', 'crc_j_n', 'crc_k_n', 'crc_l_n', 'crc_m_n', 'crc_n_n', None]
]

In [None]:
translations = spp.compute_transformations(layout, sample_dims, spacing=(0.1, 0.1))

In [None]:
elements=['cell_boundaries', 'cell_circles', 'transcripts', 'morphology_focus']
samples = spp.par_apply_coord_transformation(samples, elements, rotations=None, translations=translations, target_coordinate_system='merged_µm', prior_coordinate_system='rot_µm', center=False, n_jobs=n_jobs)

In [None]:
samples[s]['table'].uns['spatialdata_attrs']['region_key']
samples[s]['table'].obs['region']

In [None]:
for s in list(samples):
    samples[s]['table'].obs['region'] = s+'_cell_boundaries'
    samples[s].set_table_annotates_spatialelement(table_name = 'table', region = s+'_cell_boundaries', region_key = 'region', instance_key = 'cell_id')

In [None]:
sdata = sd.concatenate(list(samples.values()), region_key='region', concatenate_tables=True, index_unique=None)
dup = sdata.tables['table'].obs.index.duplicated()
sdata.tables['table'] = sdata.tables['table'][~sdata.tables['table'].obs.index.duplicated(keep='first')]

In [None]:
print(dup.sum() / len(dup))

In [None]:
# shapes
sdata['cell_boundaries'] = spp.concat_shapes(sdata, elem='cell_boundaries', samples=samples.keys())
sdata['cell_boundaries'].reset_index(level=0, inplace=True, drop=True)
sdata['cell_boundaries'] = sdata['cell_boundaries'][~sdata['cell_boundaries'].index.duplicated(keep='first')]
sdata = spp.match_ids(sdata, ['cell_boundaries'], table_key='table')
sdata.tables['table'].obs['region'] = 'cell_boundaries'
sdata.set_table_annotates_spatialelement('table', region_key='region', region='cell_boundaries')

sdata['cell_circles'] = spp.concat_shapes(sdata, elem='cell_circles', samples=samples.keys())
sdata['cell_circles'].reset_index(level=0, inplace=True, drop=True)
sdata['cell_circles'] = sdata['cell_circles'][~sdata['cell_circles'].index.duplicated(keep='first')]

In [None]:
# transcripts
sdata['transcripts'] = sd.models.PointsModel.parse(spp.concat_points(sdata, elem='transcripts', samples=samples.keys()))

In [None]:
# images
# sdata['morphology_focus'] = spp.concat_images(sdata, elem='morphology_focus', samples=samples.keys())
# too large for saving to zarr

In [None]:
set_transformation(sdata['transcripts'], Identity(), 'merged_µm')
set_transformation(sdata['cell_boundaries'], Identity(), 'merged_µm')

In [None]:
# update coordinates in tables
sdata = spp.match_ids(sdata, ['cell_circles'], table_key='table')
sdata.tables['table'].obsm['spatial'] = np.array([sdata['cell_circles']['geometry'].x, sdata['cell_circles']['geometry'].y]).transpose()

In [None]:
sample_cat = {
    'm': 'margin',
    'c': 'core',
    'n': 'normal',
}
sdata['table'].obs['tissue_region'] = sdata['table'].obs.name.str.split('_').str[-1].str.replace(r'\d+', '', regex=True).map(sample_cat).fillna('Unknown')

In [None]:
sdata['table'].obs['tissue_region'].value_counts()

In [None]:
sdata['table'].obs['patient_id'] = sdata['table'].obs.name.str.extract(r'_(.)_')
sdata['table'].obs['patient_id'].value_counts()

In [None]:
segmentation_method_redef = {
    'Segmented by interior stain (18S)': 'interior',
    'Segmented by boundary stain (ATP1A1+CD45+E-Cadherin)': 'boundary',
    'Segmented by nucleus expansion of 5.0µm': 'nucleus',
}
sdata['table'].obs['segm_meth'] = sdata['table'].obs.segmentation_method.map(segmentation_method_redef)
sdata['table'].obs['batch_factor'] = sdata['table'].obs['name'].astype(str) + '_' + sdata['table'].obs['segm_meth'].astype(str)

In [None]:
sdata['table'].obs['sample_id'] = sdata['table'].obs['sample_id'].str.replace('run._', ' ', regex=True)
sdata['table'].obs['sample_id'].value_counts()

In [None]:
sdata = spp.match_ids(sdata, ['cell_boundaries'], table_key='table')
sdata.tables['table'].obs['region'] = 'cell_boundaries'
sdata.set_table_annotates_spatialelement('table', region_key='region', region='cell_boundaries')

In [None]:
elems = ['transcripts','cell_boundaries','cell_circles'] + [s + '_sample_area' for s in samples]
sdata.subset(elems, filter_tables=False).write(os.path.join(datadir, 'crca_xenium.zarr'), overwrite=True)