In [None]:
import numpy as np
import pandas as pd
import scanpy as sc

In [None]:
st_adata=sc.read_h5ad('../data/DCIS1/spatial_DCIS1.h5ad')

## we cropped out the damaged areas of the original tissue slide follow the original research CellTrek
spatial = st_adata.obsm['spatial']
x = spatial[:, 0]
y = spatial[:, 1]
keep_idx = np.where((x >= 1380) & (y >= 2150))[0]
st_adata = st_adata[keep_idx, :]
print(st_adata)

sc.pl.spatial(st_adata,size=1,color='in_tissue',img_key='hires',alpha_img=1)

In [None]:
Celloc_results = pd.read_csv('./regular_mapping_results/DCIS1/Celloc_mapping_results.csv',index_col=0)
x = Celloc_results['predict_x']
y = Celloc_results['predict_y']
keep_idx = np.where((x >= 1380) & (y >= 2150))[0]
Celloc_results = Celloc_results.iloc[keep_idx, :]


def rand_jitter(arr,interval,seed_value=1):
    np.random.seed(seed_value)
    return arr + np.random.uniform(-interval/4,interval/4,len(arr))
x_interval = np.median(np.unique(np.diff(np.sort(np.unique(Celloc_results['predict_x'])))))*3
y_interval = np.median(np.unique(np.diff(np.sort(np.unique(Celloc_results['predict_y'])))))*3
Celloc_results['predict_x'] = rand_jitter(Celloc_results['predict_x'].values,x_interval)
Celloc_results['predict_y'] = rand_jitter(Celloc_results['predict_y'].values,y_interval)
print(Celloc_results)

The Epithelial3 cells are Tumor cells

In [None]:
epi3 = Celloc_results.query("CellType == 'Epithelial3'")
nkt = Celloc_results.query("CellType == 'NK/T'")

def find_nearest_epi3(nkt_cell,n):
    nkt_x = nkt_cell["predict_x"]
    nkt_y = nkt_cell["predict_y"]
    distances = []
    for i, epi3_cell in epi3.iterrows():
        epi3_x = epi3_cell["predict_x"]
        epi3_y = epi3_cell["predict_y"]
        distance = np.linalg.norm(np.array([nkt_x, nkt_y]) - np.array([epi3_x, epi3_y]))
        distances.append((distance, i))
    distances.sort()
    return np.mean([distance for distance, _ in distances[:n]])

nkt["distance5"] = nkt.apply(find_nearest_epi3, args=(5,), axis=1)

In [None]:
T_cell_ano=pd.read_csv('./regular_mapping_results/DCIS1/T_cell_ano.txt',index_col=0)
nkt["celltype"]=T_cell_ano.loc[nkt.index.values,'celltype'].values
nkt["Tex_Score"]=T_cell_ano.loc[nkt.index.values,'Tex_Score'].values
print(nkt)

In [None]:
result_df = nkt.copy()
result_df = result_df.loc[~result_df.index.duplicated(keep='first')]
print(result_df)

## Figure 5i

In [None]:
import seaborn as sns
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
fig,ax = plt.subplots(figsize=(8,6))
# Calculate correlation coefficient and P value
corr, pval = pearsonr(result_df['distance5'], result_df['Tex_Score'])

# Draw a correlation diagram
sns.regplot(x='distance5', y='Tex_Score', data=result_df)

# Add P value annotation
if pval < 0.001:
    text = "***"
elif pval < 0.01:
    text = "**"
elif pval < 0.05:
    text = "*"
else:
    text = "ns"
plt.annotate("p = {:.3e}".format(pval), xy=(0.05, 0.95), xycoords='axes fraction', fontsize=12, ha='left', va='top')
plt.annotate("r = {:.3e}".format(corr), xy=(0.05, 0.85), xycoords='axes fraction', fontsize=12, ha='left', va='top')
plt.annotate(text, xy=(0.05, 0.9), xycoords='axes fraction', fontsize=12, ha='left', va='top')
# plot_file = "../data/DCIS2/Tex_score_and_Dis.pdf"
# fig.savefig(plot_file,dpi=600,bbox_inches = "tight")
plt.show()

In [None]:
# Calculate the median of column "distance5"
median_distance5 = result_df['distance5'].median()
# Divide the "distance5" column into two columns: "close" and "far" based on the median
result_df['distance'] = result_df['distance5'].apply(lambda x: 'proxT' if x <= median_distance5 else 'distT')
print(result_df)

## Figure 5j

In [None]:
CD4Tex=result_df.loc[result_df['celltype'] == 'CD4Tex']
CD4T=result_df.loc[result_df['celltype'] == 'CD4T']
CD8Tex=result_df.loc[result_df['celltype'] == 'CD8Tex']
CD8T=result_df.loc[result_df['celltype'] == 'CD8T']
Treg=result_df.loc[result_df['celltype'] == 'Treg']
NaiveT=result_df.loc[result_df['celltype'] == 'NaiveT']

import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
import pandas as pd
from statannotations.Annotator import Annotator

celltypes = {'CD4Tex': CD4Tex, 'CD4T': CD4T, 'CD8Tex': CD8Tex, 'CD8T': CD8T, 'Treg': Treg, 'NaiveT': NaiveT}

fig, axs = plt.subplots(2, 3, figsize=(6,8))  
axs = axs.ravel()  

for i, (celltype, data) in enumerate(celltypes.items()):
    ax = axs[i]  
    order = ['proxT','distT']
    sns.boxplot(data=data, x='distance', order=order, y='Tex_Score', showfliers = False,
                palette=['#E11F26','#387EB8'],
                width=0.5, whis=0.4, ax=ax)  
    pairs=[('proxT','distT')]

    annotator = Annotator(ax, pairs, data=data, x='distance', y='Tex_Score', order=order)
    annotator.configure(test='Mann-Whitney', text_format='star',line_height=0.03,line_width=1)
    annotator.apply_and_annotate()

    for spine in ["top","right"]:
        ax.spines[spine].set_visible(False)
    ax.set_ylim(-0.15, 0.65)

    ax.set_title(celltype)  

plt.tight_layout()  
plt.show()

## Figure 5e

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
import pandas as pd
from statannotations.Annotator import Annotator

fig,ax = plt.subplots(figsize=(4,3))
# 绘制箱线图
order = ['CD8T','NaiveT','CD4T','CD4Tex','Treg','CD8Tex']
sns.boxplot(data=result_df, x='celltype', order=order, y='distance5', showfliers = False,
            palette=['#97509E','#F7EC37','#387EB8','#4DAE49','#A55627','#F47E20'],
            width=0.5, whis=0.3)  
pairs=[("CD8T", "CD8Tex"), ("CD4T", "CD4Tex"), ("CD4T", "Treg")]

annotator = Annotator(ax, pairs, data=result_df, x='celltype', y='distance5', order=order)
annotator.configure(test='Mann-Whitney', text_format='star',line_height=0.03,line_width=1)
annotator.apply_and_annotate()

for spine in ["top","right"]:
    ax.spines[spine].set_visible(False)

plot_file = "../data/DCIS2/distance_P.pdf"
fig.savefig(plot_file,dpi=600,bbox_inches = "tight")
plt.show()

## Figure 5g

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
import pandas as pd
from statannotations.Annotator import Annotator

fig,ax = plt.subplots(figsize=(4,3))
# 绘制箱线图
order = ['CD8T','NaiveT','CD4T','CD4Tex','Treg','CD8Tex']
sns.boxplot(x='celltype', order=order, y='Tex_Score', showfliers = False,data=T_cell_ano,
            palette=['#97509E','#F7EC37','#387EB8','#4DAE49','#A55627','#F47E20'],
            width=0.5, whis=0.3)  
pairs=[("CD8T", "CD8Tex"), ("CD4T", "CD4Tex"), ("CD4T", "Treg")]

annotator = Annotator(ax, pairs, data=T_cell_ano, x='celltype', y='Tex_Score', order=order)
annotator.configure(test='Mann-Whitney', text_format='star',line_height=0.03,line_width=1)
annotator.apply_and_annotate()

for spine in ["top","right"]:
    ax.spines[spine].set_visible(False)

plot_file = "../data/DCIS2/Tex_score_P.pdf"
fig.savefig(plot_file,dpi=600,bbox_inches = "tight")
plt.show()

In [None]:
sub_Celloc_results=Celloc_results[Celloc_results['CellType']=="NK/T"]
sub_Celloc_results["celltype"]=T_cell_ano.loc[sub_Celloc_results.index.values,'celltype'].values
sub_Celloc_results["Tex_Score"]=T_cell_ano.loc[sub_Celloc_results.index.values,'Tex_Score'].values
sub_Celloc_results["distance5"]=result_df.loc[sub_Celloc_results.index.values,'distance5'].values
print(sub_Celloc_results)

In [None]:
sub_st_adata=st_adata[sub_Celloc_results['Predict']]
sub_st_adata.obs['celltype']=list(sub_Celloc_results['celltype'])
sub_st_adata.obs['Tex_Score']=list(sub_Celloc_results['Tex_Score'])
sub_st_adata.obs['distance5']=list(sub_Celloc_results['distance5'])
sub_st_adata.obsm['spatial']=sub_st_adata.obsm['spatial'].astype(float)

coordinate_data=pd.DataFrame(sub_st_adata.obsm['spatial'],index=sub_st_adata.obs_names,columns=['x','y'])
def rand_jitter(arr,interval):
    return arr + np.random.uniform(-interval/4,interval/4,len(arr))
x_interval = np.median(np.unique(np.diff(np.sort(np.unique(coordinate_data['x'])))))*4
y_interval = np.median(np.unique(np.diff(np.sort(np.unique(coordinate_data['y'])))))*4
coordinate_data['x'] = rand_jitter(coordinate_data['x'].values,x_interval)
coordinate_data['y'] = rand_jitter(coordinate_data['y'].values,y_interval)
sub_st_adata.obsm['spatial']=np.array(coordinate_data[['x','y']])
print(sub_st_adata.obsm['spatial'])

## Figure 5h

In [None]:
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

c_list = ["#433C84", "#1B928B", "#E0E318"]

mycolor_cmp = mcolors.LinearSegmentedColormap.from_list('mycolor', c_list)

sc.pl.spatial(sub_st_adata,cmap=mycolor_cmp,
                color=['Tex_Score'], 
                ncols=5, wspace=0.25,hspace=0.3,
                spot_size=40,img_key='hires',alpha_img=0.6,vmin=0, vmax='p99.9',
                save='predict.pdf'
                )

## Figure 5d

In [None]:
sc.pl.spatial(sub_st_adata,spot_size=40,img_key='hires',color='celltype',alpha_img=0.6,save='Compass_6T_cell.pdf',
              palette=['#387EB8','#4DAE49','#97509E','#F47E20','#F7EC37','#A55627'])