##  These codes are used for ST data convolution based on Spotiphy

In [None]:
import spotiphy
import numpy as np
import pandas as pd
import scanpy as sc
import os
import torch

In [None]:
adata_sc = sc.read_h5ad("./2023_Nat_Can_adata.h5ad")
adata_sc.layers["counts"] = adata_sc.X.copy()
adata_st = sc.read_h5ad("./HGSOC_macro.h5ad")
adata_st.X = adata_st.layers["counts"].copy()

macro_subtype = [
                 'M07_Macro-EREG', 'M08_Macro-FN1', 'M09_Macro-FABP5', 'M10_Macro-C1QA',
                 'M11_Macro-VCAN', 'M12_Macro-C3', 'M14_Macro-FOLR2']
adata_sc = adata_sc[adata_sc.obs["Annotation"].isin(macro_subtype)].copy()

key_type = "Annotation"
type_list = sorted(list(adata_sc.obs[key_type].unique().astype(str)))
results_folder = f'./spotiphy/macro/'
if not os.path.exists(results_folder):
    os.makedirs(results_folder)


In [None]:
adata_st = adata_st[:, ~adata_st.var_names.duplicated()].copy()
sc.pp.filter_cells(adata_st, min_genes=1)
sc.pp.filter_genes(adata_st, min_cells=1)

adata_st.obsm['spatial'] = adata_st.obsm['spatial'].astype(float)
adata_st_raw = adata_st.copy()
adata_sc_norm, adata_st_norm = spotiphy.initialization(adata_sc, adata_st, verbose=1)

marker_gene_dict = spotiphy.sc_reference.marker_selection(adata_sc_norm, key_type=key_type, return_dict=True, 
                                                        n_select=50, threshold_p=0.1, threshold_fold=1.0,
                                                        q=0.15)
marker_gene = []
marker_gene_label = []
for type_ in type_list:
    marker_gene.extend(marker_gene_dict[type_])
    marker_gene_label.extend([type_]*len(marker_gene_dict[type_]))
marker_gene_df = pd.DataFrame({'gene':marker_gene, 'label':marker_gene_label})
marker_gene_df.to_csv(results_folder+'marker_gene.csv')

adata_sc_norm_marker = adata_sc_norm[:, marker_gene]
adata_st_norm_marker = adata_st_norm[:, marker_gene]
sc_ref = spotiphy.construct_sc_ref(adata_sc_norm_marker, key_type=key_type)


In [None]:
total_cells = adata_st_norm_marker.n_obs
cell_proportion_list = []
batch_size = 100000
device = 'cuda' if torch.cuda.is_available() else 'cpu'

for i in range(0, total_cells, batch_size):
    print(f"Processing {i//batch_size + 1} batch: from {i} to {min(i + batch_size, total_cells)} cells")
    adata_batch = adata_st_norm_marker[i : min(i + batch_size, total_cells)].copy()
    X_batch = np.array(adata_batch.X)
    batch_proportion = spotiphy.deconvolution.estimation_proportion(
        X_batch, adata_sc_norm_marker, sc_ref, type_list, key_type,
        n_epoch=4000, plot=False, batch_prior=1, device=device
    )

    cell_proportion_list.append(batch_proportion)

cell_proportion = np.concatenate(cell_proportion_list, axis=0)
np.save(results_folder + 'proportion.npy', cell_proportion)
# adata_st.obs[type_list] = cell_proportion
np.savetxt(results_folder + 'proportion.csv', cell_proportion, delimiter=',')


In [None]:

cell_proportion = pd.DataFrame(cell_proportion, columns=type_list)
cell_proportion.columns = type_list
row_max = cell_proportion.max(axis=1)
cell_type_assignment = cell_proportion.idxmax(axis=1)
cell_type_assignment[row_max < 0.3] = "Undefined"
adata_st_raw.obs["macro_subtype"] = cell_type_assignment.tolist()

adata_st_raw.write_h5ad(f"{results_folder}/TAM_deconvo_adata.h5ad")
