#### Demo1 CAST Mark captures common spatial features across multiple samples

In [5]:
import os
import numpy as np
import anndata as ad
import scanpy as sc
from CAST import *
import warnings
warnings.filterwarnings("ignore")
work_dir = '$demo_path' #### input the demo path

#### Load Data
- CAST Mark method only require the following data modalities:
    1. gene expression raw counts
    2. spatial coordinates of the cells
- We organize spatial omics data in the AnnData format (We recommend readers to pre-organize data in this format):
    - `adata.X` stores the STARmap gene expression raw counts
    - `adata.obs` contains important cell-level annotation, including spatial coordinates (column name: `'x'`, `'y'`)
    - Data for different experimental samples are combined in a single Anndata object (column name `'sample'`)

#### Settings

In [6]:
output_path = f'{work_dir}/demo1_CAST_Mark/demo_output'
os.makedirs(output_path, exist_ok=True)
adata = ad.read_h5ad(f'{output_path}/../data/demo1.h5ad')
adata.layers['norm_1e4'] = sc.pp.normalize_total(adata, target_sum=1e4, inplace=False)['X'].toarray() # we use normalized counts for each cell as input gene expression

samples = np.unique(adata.obs['sample']) # used samples in adata
coords_raw = {name: np.array(list(zip(group['x'], group['y']))) for name, group in adata.obs.groupby(by=['sample'])}
exp_dict = {sample_t: adata[adata.obs['sample'] == sample_t].layers['norm_1e4'] for sample_t in samples}

#### Run

In [None]:
### Run the model to generate the graph embedding
embed_dict = CAST_MARK(coords_raw,exp_dict,output_path)

In [None]:
### Visualize the embedding with Kmeans clustering
kmeans_plot_multiple(embed_dict,samples,coords_raw,'demo1',output_path,k=20,dot_size = 10,minibatch=False)