In [103]:
import os, sys
import scanpy as sc
import pandas as pd
import numpy as np
import json
import warnings
warnings.filterwarnings("ignore")

In [104]:
from spamint import sprout_plus
from spamint import preprocess as pp
#import logging
#logger = logging.getLogger(__name__)
logger = sprout_plus.logger

# 1. Load files

In [115]:
logger.info("Loading files...")
inputDir = './scc_data/'
outDir = f'{inputDir}/results/'
sc_exp = pd.read_csv(f'{inputDir}/SC_exp.tsv',sep = '\t',header=0,index_col=0)
sc_meta = pd.read_csv(f'{inputDir}/SC_meta.tsv',sep = '\t',header=0,index_col=0)
st_exp = pd.read_csv(f'{inputDir}/ST_exp.tsv',sep = '\t',header=0,index_col=0)
st_coord = pd.read_csv(f'{inputDir}/ST_coord.csv',sep = ',',header=0,index_col=0)
st_decon = pd.read_csv(f'{inputDir}/ST_decon.tsv',sep = '\t',header=0,index_col=0)
sc_smurf = pd.read_csv(f'{inputDir}/smurf_ref.csv',sep = ',',header=0,index_col=0)
lr_df = pd.read_csv('./LR/human_LR_pairs.txt',sep='\t',header=None)
logger.info("File loading complete")

[2024-07-19 12:04:35,520/INFO] Loading files...
[2024-07-19 12:05:32,987/INFO] File loading complete


# Parameters

In [116]:
logger.info("Setting parameters...")
st_decon.columns = [x.split('sf_')[-1] for x in st_decon.columns]
species = 'Human'
st_tp = 'visum'
meta_key = 'level3_celltype'
SUM = 1e4
alpha, beta, gamma, delta, eta = [1, 0.001, 0.001, 0.1, 0.0005]
if st_tp == 'slide-seq':
    num_per_spot = 1
    repeat_penalty = 2
else:
    num_per_spot = 10
    repeat_penalty = int((st_exp.shape[0] * num_per_spot/sc_exp.shape[0]) * 10)

max_rep = 3

[2024-07-19 12:05:43,902/INFO] Setting parameters...


In [155]:
logger.debug("Reloading all SpaMint modules")
import importlib
l = [module for module in sys.modules.values() if module.__name__.startswith('spamint')]
for module in l:
    try:
        if module.__name__.startswith('spamint'):
            print(module.__name__)
            importlib.reload(module)
    except:
        pass

[2024-07-19 13:33:56,173/DEBUG] Reloading all SpaMint modules
spamint
spamint.utils
spamint.optimizers
spamint.preprocess
spamint.cell_selection
spamint.gradient_descent
spamint.sprout_plus


# Preprocess

In [156]:
if st_exp.shape[1]<1e4:
    # merfish data, only has 200~500 genes
    sc_adata, st_adata, sc_ref, lr_df = pp.prep_all_adata_merfish(sc_exp = sc_exp, st_exp = st_exp, sc_distribution = sc_smurf, 
                                                        sc_meta = sc_meta, st_coord = st_coord, lr_df = lr_df, SP = species)
else:
    sc_adata, st_adata, sc_ref, lr_df = pp.prep_all_adata(sc_exp = sc_exp, st_exp = st_exp, sc_distribution = sc_smurf, 
                                                            sc_meta = sc_meta, st_coord = st_coord, lr_df = lr_df, SP = species)

[2024-07-19 13:34:02,529/DEBUG] Data clean is done! Using 15386 shared genes .


In [157]:
obj_spex = sprout_plus.SpaMint(save_path = outDir, st_adata = st_adata, weight = st_decon, 
                sc_ref = sc_ref, sc_adata = sc_adata, cell_type_key = meta_key, lr_df = lr_df, 
                st_tp = st_tp)
#obj_spex.prep()

sc_ref and sc_adata has different genes, both data are subset to 15386 genes.
[2024-07-19 13:34:04,778/DEBUG] Parameters checked!
[2024-07-19 13:34:09,785/DEBUG] Getting svg genes
By setting k as 4, each spot has average 3.990990990990991 neighbors.
[2024-07-19 13:34:10,444/DEBUG] Calculating spots affinity profile data
[2024-07-19 13:34:16,843/DEBUG] SpaMint object created.


# cell selection

In [101]:
sc_agg_meta = obj_spex.select_cells(use_sc_orig = True, p = 0, mean_num_per_spot = num_per_spot,
                                    mode = 'strict', max_rep = 1,
                                    repeat_penalty = repeat_penalty)
sc_agg_meta.to_csv(f'{outDir}/spexmod_sc_meta.tsv',sep = '\t',header=True,index=True)

[2024-07-16 17:00:05,511/INFO] Starting cell selection
[2024-07-16 17:00:05,514/DEBUG] 0. calc num of cell per spot
[2024-07-16 17:00:05,516/DEBUG] 	 Estimating the cell number in each spot by the deconvolution result.
[2024-07-16 17:00:07,384/DEBUG] 1. filter gene
[2024-07-16 17:00:07,524/DEBUG] 2. feature select
[2024-07-16 17:00:07,527/DEBUG] 	 SpexMod selects 3430 feature genes.
[2024-07-16 17:00:07,529/DEBUG] 3. scale and norm
[2024-07-16 17:00:09,730/DEBUG] 4. init solution
[2024-07-16 17:01:40,652/DEBUG] 	 Init solution: max - 0.9477,     mean - 0.6281,     min - 0.0603
[2024-07-16 17:01:42,819/DEBUG] 5. Swap selection start...
[2024-07-16 17:01:42,821/DEBUG] 	Swap selection iter 0 of 1
[2024-07-16 17:02:16,550/DEBUG] 	 Swapped solution: max - 0.92,     mean - 0.70,     min - 0.07


In [10]:
sc_agg_meta = obj_spex.select_cells(use_sc_orig = True, p = 0, mean_num_per_spot = num_per_spot,
                                    mode = 'strict', max_rep = 1,
                                    repeat_penalty = repeat_penalty)
sc_agg_meta.to_csv(f'{outDir}/spexmod_sc_meta.tsv',sep = '\t',header=True,index=True)

[2024-07-16 14:10:13,594/INFO] Starting cell selection
[2024-07-16 14:10:13,596/DEBUG] 0. calc num of cell per spot
[2024-07-16 14:10:13,597/DEBUG] 	 Estimating the cell number in each spot by the deconvolution result.
[2024-07-16 14:10:15,166/DEBUG] 1. filter gene
[2024-07-16 14:10:15,242/DEBUG] 2. feature select
[2024-07-16 14:15:45,201/DEBUG] 	 SpexMod selects 3746 feature genes.
[2024-07-16 14:15:45,202/DEBUG] 3. scale and norm
[2024-07-16 14:15:47,209/DEBUG] 4. init solution
[2024-07-16 14:17:19,826/DEBUG] 	 Init solution: max - 0.9501,     mean - 0.6417,     min - 0.1346
[2024-07-16 14:17:21,653/DEBUG] 5. Swap selection start...
[2024-07-16 14:17:21,655/DEBUG] 	Swap selection iter 0 of 1
[2024-07-16 14:17:52,797/DEBUG] 	 Swapped solution: max - 0.92,     mean - 0.70,     min - 0.13


if p != 0, extremely time consuming

In [None]:
# change p to 0.1, use different code to select cells
sc_agg_meta = obj_spex.select_cells(use_sc_orig = True, p = 0.1, mean_num_per_spot = num_per_spot, mode = 'strict', max_rep = 1, 
                                    repeat_penalty = repeat_penalty)
sc_agg_meta.to_csv(f'{outDir}/spexmod_sc_meta.tsv',sep = '\t',header=True,index=True)

use user defined cell selection

In [None]:
# use our result to pretend the user provided cell selection
# the user_sc_agg_meta should contain celltype, spot, sc_id columns

In [158]:
user_sc_agg_meta = pd.read_csv(f'{outDir}/spexmod_sc_meta.tsv',sep = '\t',header=0,index_col=0)
user_sc_exp = sc_exp.loc[user_sc_agg_meta['sc_id']]
sc_agg_meta = obj_spex.load_predefined_cells(user_sc_exp, user_sc_agg_meta)

In [160]:
alter_sc_exp, sc_agg_meta = obj_spex.gradient_descent(
                alpha, beta, gamma, delta, eta, 
                init_sc_embed = sc_agg_meta[['adj_spex_UMAP1','adj_spex_UMAP2']],
                iteration = max_rep, k = 2, W_HVG = 2,
                left_range = 1, right_range = 2, steps = 1, dim = 2)
sc_agg_meta.to_csv(f'{outDir}/spexmod_sc_meta.tsv',sep = '\t',header=True,index=True)

[2024-07-19 13:35:56,566/DEBUG] Running v12 now...
[2024-07-19 13:35:56,573/DEBUG] Using user provided init sc_coord.
[2024-07-19 13:36:14,193/DEBUG] First-term calculation done!
[2024-07-19 13:36:19,321/DEBUG] Second-term calculation done!
('7x45', {'7x45': ['8x44', '8x46', '9x45'], '8x30': ['9x29', '9x31', '8x32', '10x30'], '8x32': ['9x31', '9x33', '8x34', '10x32'], '8x34': ['9x33', '9x35', '8x32', '10x34'], '8x36': ['9x35', '9x37', '8x38', '8x34'], '8x38': ['9x39', '9x37', '8x36', '10x38'], '8x44': ['7x45', '9x43', '9x45', '8x46'], '8x46': ['7x45', '9x45', '9x47', '8x44'], '9x29': ['10x30', '8x30', '10x28', '11x29'], '9x31': ['8x32', '10x32', '10x30', '8x30'], '9x33': ['10x34', '8x34', '8x32', '10x32'], '9x35': ['10x34', '8x34', '8x36', '10x36'], '9x37': ['8x36', '10x36', '10x38', '8x38'], '9x39': ['8x38', '10x40', '10x38', '9x41'], '9x41': ['10x40', '11x41', '9x43', '9x39'], '9x43': ['8x44', '10x44', '9x41', '9x45'], '9x45': ['8x44', '8x46', '10x44', '10x46'], '9x47': ['10x48', '8x

# Gradient desent

In [126]:
alter_sc_exp, sc_agg_meta = obj_spex.gradient_descent(
                alpha, beta, gamma, delta, eta, 
                init_sc_embed = False,
                iteration = max_rep, k = 2, W_HVG = 2,
                left_range = 1, right_range = 2, steps = 1, dim = 2)
sc_agg_meta.to_csv(f'{outDir}/spexmod_sc_meta.tsv',sep = '\t',header=True,index=True)
# with open(f'{outDir}/sc_knn.json', 'w') as fp:
#     json.dump(obj_spex.sc_knn, fp)
# utils.save_object(obj_spex, f'{outDir}/obj_spex.pkl')


[2024-07-19 12:11:31,973/DEBUG] Running v12 now...
[2024-07-19 12:11:31,976/DEBUG] Init sc_coord by affinity embedding...
[2024-07-19 12:11:31,978/DEBUG] Start embedding...
[2024-07-19 12:11:31,978/DEBUG] Calc aff mat...
[2024-07-19 12:11:51,239/DEBUG] Preprocessing affinity matrix
[2024-07-19 12:12:03,792/DEBUG] Evaluating coord generated by UMAP...
[2024-07-19 12:12:05,240/DEBUG] shape correlation is: 0.865247480035666
[2024-07-19 12:12:05,241/DEBUG] End embedding.
[2024-07-19 12:12:14,586/DEBUG] First-term calculation done!
[2024-07-19 12:12:17,117/DEBUG] Second-term calculation done!
[2024-07-19 12:13:15,652/DEBUG] Third term calculation done!
[2024-07-19 12:13:59,621/DEBUG] Fourth term calculation done!
[2024-07-19 12:13:59,873/DEBUG] Hyperparameters adjusted.
[2024-07-19 12:13:59,874/DEBUG] -----Start iteration 0 -----
[2024-07-19 12:13:59,875/DEBUG] Start embedding...
[2024-07-19 12:13:59,876/DEBUG] Calc aff mat...
[2024-07-19 12:14:19,747/DEBUG] Preprocessing affinity matrix
[2

KeyboardInterrupt: 

In [30]:
from spamint import optimizers
importlib.reload(optimizers)
import cProfile
'''
print(optimizers.findCellKNN(obj_spex.st_coord, obj_spex.st_tp, 
                            obj_spex.sc_agg_meta,
                            obj_spex.gradient_descent_solver.sc_coord,
                            obj_spex.gradient_descent_solver.K ))
'''
self=obj_spex.gradient_descent_solver

logger.debug("calcNeighborAffMat")
#cProfile.runctx('''
optimizers.calcNeighborAffinityMat(self.spots_nn_lst, self.spot_cell_dict, self.lr_df, self.alter_sc_exp)
#                ''', globals(), locals(), sort='tottime')
logger.debug("OK")

logger.debug("calcAffMat")
optimizers.calculate_affinity_mat(self.lr_df, self.alter_sc_exp)
logger.debug("ok")

[2024-07-09 00:34:47,068/DEBUG] calcNeighborAffMat
[2024-07-09 00:35:04,701/DEBUG] OK
[2024-07-09 00:35:04,708/DEBUG] calcAffMat
[2024-07-09 00:35:18,771/DEBUG] ok
