# 0. Import libraries

In [1]:
import os
import scanpy as sc
import pandas as pd
import numpy as np
import json

import warnings
warnings.filterwarnings("ignore")

In [2]:
from spamint import sprout_plus
from spamint import preprocess as pp

# 1. Load files

In [3]:
inputDir = './tutorial/demo/'
outDir = f'{inputDir}/demo_results/'
sc_exp = pd.read_csv(f'{inputDir}/SC_exp.tsv',sep = '\t',header=0,index_col=0)
sc_meta = pd.read_csv(f'{inputDir}/SC_meta.tsv',sep = '\t',header=0,index_col=0)
st_exp = pd.read_csv(f'{inputDir}/ST_exp.tsv',sep = '\t',header=0,index_col=0)
st_coord = pd.read_csv(f'{inputDir}/ST_coord.tsv',sep = '\t',header=0,index_col=0)
st_decon = pd.read_csv(f'{inputDir}/ST_decon.tsv',sep = '\t',header=0,index_col=0)
# TODO run smurf for demo
sc_smurf = sc_exp + np.random.rand(sc_exp.shape[0],sc_exp.shape[1])*10
lr_df = pd.read_csv('./LR/mouse_LR_pairs.txt',sep='\t',header=None)

# 2.Run sprout

In [4]:
species = 'Mouse'
st_tp = 'st'

In [5]:
st_coord.index = ['spot_' + str(x) for x in st_coord.index]
st_exp.index = ['spot_' + str(x) for x in st_exp.index]
st_decon.index = ['spot_' + str(x) for x in st_decon.index]
sc_exp.index = ['cell_' + str(x) for x in sc_exp.index]
sc_meta.index = ['cell_' + str(x) for x in sc_meta.index]
sc_smurf.index = ['cell_' + str(x) for x in sc_smurf.index]

# Parameters

In [6]:
SUM = 1e4
alpha, beta, gamma, delta, eta = [1, 0.001, 0.001, 0.1, 0.0005]
if st_tp == 'slide-seq':
    num_per_spot = 1
    repeat_penalty = 2
else:
    num_per_spot = 10
    repeat_penalty = int((st_exp.shape[0] * num_per_spot/sc_exp.shape[0]) * 10)

max_rep = 20

In [7]:
from importlib import reload
reload(sprout_plus)

<module 'spamint.sprout_plus' from '/home/notify/Documents/2406-work/SpaMint/spamint/sprout_plus.py'>

# Preprocess

In [8]:
if st_exp.shape[1]<1e4:
    # merfish data, only has 200~500 genes
    sc_adata, st_adata, sc_ref, lr_df = pp.prep_all_adata_merfish(sc_exp = sc_exp, st_exp = st_exp, sc_distribution = sc_smurf, 
                                                        sc_meta = sc_meta, st_coord = st_coord, lr_df = lr_df, SP = species)
else:
    sc_adata, st_adata, sc_ref, lr_df = pp.prep_all_adata(sc_exp = sc_exp, st_exp = st_exp, sc_distribution = sc_smurf, 
                                                            sc_meta = sc_meta, st_coord = st_coord, lr_df = lr_df, SP = species)

Data clean and scale are done! Single-cell data has 882 genes, spatial data has 882 genes.


In [9]:
obj_spex = sprout_plus.SpaMint(save_path = outDir, st_adata = st_adata, weight = st_decon, 
                 sc_ref = sc_ref, sc_adata = sc_adata, cell_type_key = 'celltype', lr_df = lr_df, 
                 st_tp = st_tp)

sc_ref and sc_adata has different genes, both data are subset to 882 genes.
Parameters checked!
Getting svg genes
By setting k as 4, each spot has average 3.9788359788359786 neighbors.


# cell selection

In [10]:
sc_agg_meta = obj_spex.select_cells(use_sc_orig = True, p = 0, mean_num_per_spot = num_per_spot, mode = 'strict', max_rep = 1,
                                    repeat_penalty = repeat_penalty)
sc_agg_meta.to_csv(f'{outDir}/spexmod_sc_meta.tsv',sep = '\t',header=True,index=True)

	 Estimating the cell number in each spot by the deconvolution result.
	 SpexMod selects 882 feature genes.
	 Init solution: max - 1.0000,     mean - 0.9632,     min - 0.7753
	 Swap selection start...
	 Swapped solution: max - 1.00,     mean - 0.99,     min - 0.94


if p != 0, extremely time consuming

In [11]:
# change p to 0.1, use different code to select cells
#sc_agg_meta = obj_spex.select_cells(use_sc_orig = True, p = 0.1, mean_num_per_spot = num_per_spot, mode = 'strict', max_rep = 1, 
#                                    repeat_penalty = repeat_penalty)
#sc_agg_meta.to_csv(f'{outDir}/spexmod_sc_meta.tsv',sep = '\t',header=True,index=True)

	 Estimating the cell number in each spot by the deconvolution result.
	 SpexMod selects 882 feature genes.
	 Init solution: max - 1.0000,     mean - 0.9635,     min - 0.7753
	 Swap selection start...
	 Swapped solution: max - 1.00,     mean - 0.97,     min - 0.81


In [12]:
alter_sc_exp, sc_agg_meta = obj_spex.gradient_descent(
                alpha, beta, gamma, delta, eta, 
                init_sc_embed = False,
                iteration = max_rep, k = 2, W_HVG = 2,
                left_range = 1, right_range = 2, steps = 1, dim = 2)
sc_agg_meta.to_csv(f'{outDir}/spexmod_sc_meta.tsv',sep = '\t',header=True,index=True)
with open(f'{outDir}/sc_knn.json', 'w') as fp:
    json.dump(obj_spex.sc_knn, fp)
# utils.save_object(obj_spex, f'{outDir}/obj_spex.pkl')


Running v12 now...
Init sc_coord by affinity embedding...
Average shape correlation is: 0.9781484686379595
First-term calculation done!
Second-term calculation done!
Third term calculation done!
Fourth term calculation done!
Hyperparameters adjusted.
-----Start iteration 0 -----
Average shape correlation is: 0.9781484686379595
First-term calculation done!
Second-term calculation done!
Third term calculation done!
Fourth term calculation done!
---0 self.loss4 506.23408458969135 self.GAMMA 0.003257917226911025 self.GAMMA*self.loss4 1.6492687450342884
---In iteration 0, the loss is:loss1:1649.26875,loss2:2654.15712,loss3:1303.35056,loss4:506.23408,loss5:3381.05054.
---In iteration 0, the loss is:loss1:1649.26875,loss2:1649.26875,loss3:1.64927,loss4:1.64927,loss5:164.92687.
The total loss after iteration 0 is 3466.76290.
-----Start iteration 1 -----
Average shape correlation is: 0.9771336656771149
First-term calculation done!
Second-term calculation done!
Third term calculation done!
Fourt