In [None]:
import os
import gc
import ot
import pickle
import anndata
import scanpy as sc
import pandas as pd
from scipy.stats import wilcoxon
import numpy as np
from scipy import sparse
from scipy.stats import rankdata
from scipy.stats import spearmanr, pearsonr
from scipy.spatial import distance_matrix
import matplotlib.pyplot as plt

import commot as ct

In [None]:
adata = sc.read_visium(r'E:\datasets\space\P01')
adata.var_names_make_unique()
sc.pp.normalize_total(adata, inplace=True)
sc.pp.log1p(adata)
adata.raw = adata
adata_dis500 = adata.copy()

sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata = adata[:, adata.var.highly_variable]
sc.tl.pca(adata, svd_solver='arpack')
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)
sc.tl.leiden(adata, resolution=0.4)
sc.pl.spatial(adata, color='leiden')

In [None]:
adata.obs['leiden'] =  adata.obs['leiden'].replace('0','malignant')
adata.obs['leiden'] =  adata.obs['leiden'].replace('2','malignant')
adata.obs['leiden'] =  adata.obs['leiden'].replace('1','malignant')
adata.obs['leiden'] =  adata.obs['leiden'].replace('4','others')
adata.obs['leiden'] =  adata.obs['leiden'].replace('3','CAF')

In [None]:
def fdr(p_vals):

    ranked_p_values = rankdata(p_vals)
    fdr = p_vals * len(p_vals) / ranked_p_values
    fdr[fdr > 1] = 1

    return fdr

def MCAM_CAFs(adata):
    barcode_list = []
    p_value_list = []
    adata_CAF = adata[adata.obs['leiden'].isin(['CAF'])]
    adata_MACM_CAF_df = adata_CAF[:,'MCAM'].to_df()
    for barcode in adata_MACM_CAF_df.index:
        MACM_express_except_barcode = adata_MACM_CAF_df.loc[~(adata_MACM_CAF_df.index==barcode),:]
        MACM_express = adata_CAF[barcode, 'MCAM'].X.toarray()[0][0]
        sta, p_value = wilcoxon(np.array(MACM_express_except_barcode).flatten()-MACM_express, alternative='less')
        p_value_list.append(p_value)
    fdr_list = fdr(np.array(p_value_list))
    adata_CAF_MCAM = adata_CAF[fdr_list<0.05,:]
    MCAM_barcode = list(adata_CAF_MCAM.obs.index)
    leiden_array = np.array(adata_CAF.obs['leiden'])
    leiden_array[fdr_list<0.05] =  'MCAM CAF'
    adata_CAF.obs['leiden2'] = leiden_array
    return adata_CAF

In [None]:
adata_dis500.obs['leiden'] = adata.obs['leiden']
adata_CAF = MCAM_CAFs(adata_dis500)

leiden_list = []
for i in range(adata.shape[0]):
    barcode = adata.obs.index[i]
    if barcode in adata_CAF.obs.index:
        ST_cluster = adata_CAF.obs.loc[barcode,'leiden2']
    else:
        ST_cluster = adata.obs.loc[barcode,'leiden']
    leiden_list.append(ST_cluster)
    
leiden_list = ['other CAF' if item == 'CAF' else item for item in leiden_list]

adata.obs['leiden2'] = leiden_list
adata_dis500.obs['leiden2'] = leiden_list

In [None]:
sc.pl.spatial(adata, color='leiden2')

In [None]:
sc.pl.matrixplot(adata, var_names=['EPCAM','ACTA2','COL1A1','C1QB','MZB1','KLRD1','MCAM'], groupby='leiden2',standard_scale='var')

In [None]:
df_ligrec = ct.pp.ligand_receptor_database(database='CellChat', signaling_type=None, species='human')
df_sub = df_ligrec[df_ligrec['0'].isin(['COL4A1','COL4A2','CD47'])]
df_sub.loc['679'] = ['CD47', 'SIRPA', 'CD47', 'Cell-Cell Contact']

In [None]:
ct.tl.spatial_communication(adata_dis500,
    database_name='user_database', df_ligrec=df_sub, dis_thr=500, heteromeric=True)

In [None]:
ct.tl.communication_direction(adata_dis500, database_name='user_database', pathway_name='CD47-SIRPA', k=5)

In [None]:
ct.tl.cluster_communication(adata_dis500, database_name='user_database', pathway_name='CD47-SIRPA', clustering='leiden2',n_permutations=1000)
ct.tl.cluster_communication(adata_dis500, database_name='user_database', pathway_name='COL4A1-ITGA2_ITGB1', clustering='leiden2',n_permutations=1000)
ct.tl.cluster_communication(adata_dis500, database_name='user_database', pathway_name='COL4A2-ITGA2_ITGB1', clustering='leiden2',n_permutations=1000)

In [None]:
ct.tl.cluster_position(adata_dis500, clustering='leiden2')

In [None]:
adata_dis500.uns['commot_cluster-leiden2-user_database-COL4A1-ITGA2_ITGB1']['communication_pvalue']