In [4]:
import os, sys
import scanpy as sc
import pandas as pd
import numpy as np
import json
import warnings
warnings.filterwarnings("ignore")

In [2]:
from spamint import sprout_plus
from spamint import preprocess as pp
#import logging
#logger = logging.getLogger(__name__)
logger = sprout_plus.logger

# 1. Load files

In [5]:
logger.info("Loading files...")
inputDir = './scc_data/'
outDir = f'{inputDir}/results/'
sc_exp = pd.read_csv(f'{inputDir}/SC_exp.tsv',sep = '\t',header=0,index_col=0)
sc_meta = pd.read_csv(f'{inputDir}/SC_meta.tsv',sep = '\t',header=0,index_col=0)
st_exp = pd.read_csv(f'{inputDir}/ST_exp.tsv',sep = '\t',header=0,index_col=0)
st_coord = pd.read_csv(f'{inputDir}/ST_coord.csv',sep = ',',header=0,index_col=0)
st_decon = pd.read_csv(f'{inputDir}/ST_decon.tsv',sep = '\t',header=0,index_col=0)
sc_smurf = pd.read_csv(f'{inputDir}/smurf_ref.csv',sep = ',',header=0,index_col=0)
lr_df = pd.read_csv('./LR/human_LR_pairs.txt',sep='\t',header=None)
logger.info("File loading complete")

[2024-07-16 14:03:38,176/INFO] Loading files...
[2024-07-16 14:04:36,437/INFO] File loading complete


# Parameters

In [11]:
logger.info("Setting parameters...")
st_decon.columns = [x.split('sf_')[-1] for x in st_decon.columns]
species = 'Human'
st_tp = 'visum'
meta_key = 'level3_celltype'
SUM = 1e4
alpha, beta, gamma, delta, eta = [1, 0.001, 0.001, 0.1, 0.0005]
if st_tp == 'slide-seq':
    num_per_spot = 1
    repeat_penalty = 2
else:
    num_per_spot = 10
    repeat_penalty = int((st_exp.shape[0] * num_per_spot/sc_exp.shape[0]) * 10)

max_rep = 3

[2024-07-16 14:36:58,975/INFO] Setting parameters...


In [98]:
logger.debug("Reloading all SpaMint modules")
import importlib
l = [module for module in sys.modules.values() if module.__name__.startswith('spamint')]
for module in l:
    try:
        if module.__name__.startswith('spamint'):
            print(module.__name__)
            importlib.reload(module)
    except:
        pass

[2024-07-16 16:59:42,243/DEBUG] Reloading all SpaMint modules
spamint
spamint.utils
spamint.optimizers
spamint.preprocess
spamint.cell_selection
spamint.gradient_descent
spamint.sprout_plus


# Preprocess

In [99]:
if st_exp.shape[1]<1e4:
    # merfish data, only has 200~500 genes
    sc_adata, st_adata, sc_ref, lr_df = pp.prep_all_adata_merfish(sc_exp = sc_exp, st_exp = st_exp, sc_distribution = sc_smurf, 
                                                        sc_meta = sc_meta, st_coord = st_coord, lr_df = lr_df, SP = species)
else:
    sc_adata, st_adata, sc_ref, lr_df = pp.prep_all_adata(sc_exp = sc_exp, st_exp = st_exp, sc_distribution = sc_smurf, 
                                                            sc_meta = sc_meta, st_coord = st_coord, lr_df = lr_df, SP = species)

[2024-07-16 16:59:48,204/DEBUG] Data clean is done! Using 15386 shared genes .


In [100]:
obj_spex = sprout_plus.SpaMint(save_path = outDir, st_adata = st_adata, weight = st_decon, 
                sc_ref = sc_ref, sc_adata = sc_adata, cell_type_key = meta_key, lr_df = lr_df, 
                st_tp = st_tp)
#obj_spex.prep()

sc_ref and sc_adata has different genes, both data are subset to 15386 genes.
[2024-07-16 16:59:50,083/DEBUG] Parameters checked!
[2024-07-16 16:59:52,100/DEBUG] Getting svg genes
By setting k as 4, each spot has average 3.990990990990991 neighbors.
[2024-07-16 16:59:52,317/DEBUG] Calculating spots affinity profile data
[2024-07-16 16:59:55,148/DEBUG] SpaMint object created.


# cell selection

In [101]:
sc_agg_meta = obj_spex.select_cells(use_sc_orig = True, p = 0, mean_num_per_spot = num_per_spot,
                                    mode = 'strict', max_rep = 1,
                                    repeat_penalty = repeat_penalty)
sc_agg_meta.to_csv(f'{outDir}/spexmod_sc_meta.tsv',sep = '\t',header=True,index=True)

[2024-07-16 17:00:05,511/INFO] Starting cell selection
[2024-07-16 17:00:05,514/DEBUG] 0. calc num of cell per spot
[2024-07-16 17:00:05,516/DEBUG] 	 Estimating the cell number in each spot by the deconvolution result.
[2024-07-16 17:00:07,384/DEBUG] 1. filter gene
[2024-07-16 17:00:07,524/DEBUG] 2. feature select
[2024-07-16 17:00:07,527/DEBUG] 	 SpexMod selects 3430 feature genes.
[2024-07-16 17:00:07,529/DEBUG] 3. scale and norm
[2024-07-16 17:00:09,730/DEBUG] 4. init solution
[2024-07-16 17:01:40,652/DEBUG] 	 Init solution: max - 0.9477,     mean - 0.6281,     min - 0.0603
[2024-07-16 17:01:42,819/DEBUG] 5. Swap selection start...
[2024-07-16 17:01:42,821/DEBUG] 	Swap selection iter 0 of 1
[2024-07-16 17:02:16,550/DEBUG] 	 Swapped solution: max - 0.92,     mean - 0.70,     min - 0.07


In [10]:
sc_agg_meta = obj_spex.select_cells(use_sc_orig = True, p = 0, mean_num_per_spot = num_per_spot,
                                    mode = 'strict', max_rep = 1,
                                    repeat_penalty = repeat_penalty)
sc_agg_meta.to_csv(f'{outDir}/spexmod_sc_meta.tsv',sep = '\t',header=True,index=True)

[2024-07-16 14:10:13,594/INFO] Starting cell selection
[2024-07-16 14:10:13,596/DEBUG] 0. calc num of cell per spot
[2024-07-16 14:10:13,597/DEBUG] 	 Estimating the cell number in each spot by the deconvolution result.
[2024-07-16 14:10:15,166/DEBUG] 1. filter gene
[2024-07-16 14:10:15,242/DEBUG] 2. feature select
[2024-07-16 14:15:45,201/DEBUG] 	 SpexMod selects 3746 feature genes.
[2024-07-16 14:15:45,202/DEBUG] 3. scale and norm
[2024-07-16 14:15:47,209/DEBUG] 4. init solution
[2024-07-16 14:17:19,826/DEBUG] 	 Init solution: max - 0.9501,     mean - 0.6417,     min - 0.1346
[2024-07-16 14:17:21,653/DEBUG] 5. Swap selection start...
[2024-07-16 14:17:21,655/DEBUG] 	Swap selection iter 0 of 1
[2024-07-16 14:17:52,797/DEBUG] 	 Swapped solution: max - 0.92,     mean - 0.70,     min - 0.13


if p != 0, extremely time consuming

In [None]:
# change p to 0.1, use different code to select cells
sc_agg_meta = obj_spex.select_cells(use_sc_orig = True, p = 0.1, mean_num_per_spot = num_per_spot, mode = 'strict', max_rep = 1, 
                                    repeat_penalty = repeat_penalty)
sc_agg_meta.to_csv(f'{outDir}/spexmod_sc_meta.tsv',sep = '\t',header=True,index=True)

In [102]:
alter_sc_exp, sc_agg_meta = obj_spex.gradient_descent(
                alpha, beta, gamma, delta, eta, 
                init_sc_embed = False,
                iteration = max_rep, k = 2, W_HVG = 2,
                left_range = 1, right_range = 2, steps = 1, dim = 2)
sc_agg_meta.to_csv(f'{outDir}/spexmod_sc_meta.tsv',sep = '\t',header=True,index=True)
# with open(f'{outDir}/sc_knn.json', 'w') as fp:
#     json.dump(obj_spex.sc_knn, fp)
# utils.save_object(obj_spex, f'{outDir}/obj_spex.pkl')


[2024-07-16 17:10:04,276/DEBUG] Running v12 now...
[2024-07-16 17:10:04,280/DEBUG] Init sc_coord by affinity embedding...
[2024-07-16 17:10:04,281/DEBUG] Start embedding...
[2024-07-16 17:10:04,282/DEBUG] Calc aff mat...


In [30]:
from spamint import optimizers
importlib.reload(optimizers)
import cProfile
'''
print(optimizers.findCellKNN(obj_spex.st_coord, obj_spex.st_tp, 
                            obj_spex.sc_agg_meta,
                            obj_spex.gradient_descent_solver.sc_coord,
                            obj_spex.gradient_descent_solver.K ))
'''
self=obj_spex.gradient_descent_solver

logger.debug("calcNeighborAffMat")
#cProfile.runctx('''
optimizers.calcNeighborAffinityMat(self.spots_nn_lst, self.spot_cell_dict, self.lr_df, self.alter_sc_exp)
#                ''', globals(), locals(), sort='tottime')
logger.debug("OK")

logger.debug("calcAffMat")
optimizers.calculate_affinity_mat(self.lr_df, self.alter_sc_exp)
logger.debug("ok")

[2024-07-09 00:34:47,068/DEBUG] calcNeighborAffMat
[2024-07-09 00:35:04,701/DEBUG] OK
[2024-07-09 00:35:04,708/DEBUG] calcAffMat
[2024-07-09 00:35:18,771/DEBUG] ok
