In [11]:
# When we get deconvolution results, we can calculate some evaluation metrcs
# In simulating datasets, we can calculate RMSE
import pandas as pd
import scanpy as sc

from models.utils import check_anndata, RMSE
save_path = 'seqFISH3000'
st_target_path = f"../datasets/seqFISH//spatial/seqFISH_st3000.h5ad"
st_rna_origin_adata = check_anndata(
        st_target_path, True)
sc.pp.normalize_total(st_rna_origin_adata)
sc.pp.log1p(st_rna_origin_adata)
cell_type_proportion_matrix = pd.read_csv(f"{save_path}/cell_type_results.csv", index_col=0)
# print(df.columns.tolist())
# print(adata.obs.keys())
ct_list = cell_type_proportion_matrix.columns.tolist()
total_rmse = 0
for ct in ct_list:
    true_list = st_rna_origin_adata.obs[ct].tolist()
    pred_list = cell_type_proportion_matrix[ct].tolist()
    # if cal_type == 'rmse':
    rmse = RMSE(true_list, pred_list)
    total_rmse += rmse
print(total_rmse)

Data matrix:
(71, 3000)
[[0 0 0 ... 0 0 0]
 [0 2 2 ... 2 0 2]
 [0 0 0 ... 0 0 1]
 ...
 [2 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 1]]
Data obs:
    cell_id  astrocytes      Olig  endo.mural   iNeuron   eNeuron  microglia  \
0         0    0.000000  0.000000    1.000000  0.000000  0.000000   0.000000   
1         1    0.333333  0.000000    0.333333  0.333333  0.000000   0.000000   
2         2    0.250000  0.250000    0.250000  0.000000  0.000000   0.250000   
3         3    0.333333  0.000000    0.000000  0.333333  0.000000   0.333333   
4         7    0.000000  0.000000    0.000000  0.000000  1.000000   0.000000   
..      ...         ...       ...         ...       ...       ...        ...   
66      102    0.083333  0.333333    0.083333  0.000000  0.500000   0.000000   
67      103    0.083333  0.416667    0.083333  0.000000  0.416667   0.000000   
68      104    1.000000  0.000000    0.000000  0.000000  0.000000   0.000000   
69      109    0.500000  0.500000    0.000000 

In [18]:
# Furthermore, we can calculate spearman or Moran's I
from scipy.stats import spearmanr
import libpysal as lps
import esda
target_gene = 'stk3' # this gene is as an example, random selected
target_cell_type = 'Olig'
target_gene_value_list = st_rna_origin_adata[:, target_gene].X.flatten().tolist()
target_ct_value_list = cell_type_proportion_matrix[target_cell_type].tolist()
# print(target_gene_value_list)
# print(target_ct_value_list)
corr, p_value = spearmanr(target_gene_value_list, target_ct_value_list)
print(f'spearmanr corr:{corr}, p-value:{p_value}')

spatial_xy = st_rna_origin_adata.obsm['spatial']
w_knn = lps.weights.KNN(spatial_xy, k=9)
w_knn.transform = 'r'
moran = esda.Moran(target_ct_value_list, w_knn)
corr = moran.I
p_value = moran.p_sim
print(f"Moran's I:{corr}, p-value:{p_value}")


spearmanr corr:-0.03646288399785135, p-value:0.7627369964957669
Moran's I:0.37788123017007325, p-value:0.001
