# RTEs are dynamically expressed in neurodegenerative diseases
This script explores disease (stage) specificity of RTE expresion, corresponding to Figure 4, Supplemental Figure S4 <br>
Author: Wankun Deng
Email: dengwankun@hotmail.com
GitHub: https://github.com/wkdeng

In [None]:
%load_ext rpy2.ipython
import os
os.chdir('scRTE/scripts')
from common import *
load_sc_data()

In [None]:
%%R
if(getwd()!='/home/wdeng3/workspace/Codespace/scRTE/scripts'){
    setwd('/home/wdeng3/workspace/Codespace/scRTE/scripts')
}
source('common.r')

In [None]:
dataset_diff=[]
for dataset in datasets:
    cell_exp=all_dfs[dataset]
    cell_umap=all_cell_umaps[dataset]
    
    print(dataset)
    neurons_control=cell_umap.loc[cell_umap['predicted.celltype'].isin(['Ex']) & (cell_umap['Diagnosis']=='Control'),:].index
    neurons_disease=cell_umap.loc[cell_umap['predicted.celltype'].isin(['Ex']) & (cell_umap['Diagnosis']!='Control'),:].index
    rrtes=[x for x in te_cls.keys() if te_cls[x] in ['LTR','LINE','SINE']]
    rte_exp=cell_exp.loc[:,[x for x in cell_exp.columns if x in rrtes]]

    def calculate_one_rte(args):
        control_list,disease_list,rte,dataset=args
        pval=scipy.stats.mannwhitneyu(disease_list,control_list)[1]
        mean1=np.mean(disease_list)
        mean2=np.mean(control_list)
        fc=np.log2(mean1/mean2)
        pce1=np.count_nonzero(disease_list>1)/(1.0*len(disease_list))
        pce2=np.count_nonzero(control_list>1)/(1.0*len(control_list))
        return pval,fc,rte,dataset,pce1,pce2,mean1,mean2

    args=[]
    for rte in rte_exp.columns:
        args.append([rte_exp.loc[neurons_control,rte],rte_exp.loc[neurons_disease,rte],rte,dataset])

    pool=Pool(50)
    all_tests=pool.map(calculate_one_rte,args)
    pool.close()
    pool.join()
    dataset_diff.extend(all_tests)
dataset_diff=pd.DataFrame(dataset_diff,columns=['Pvalue','log2(FC)','RTE','Dataset','PCE1','PCE2','Mean1','Mean2'])

## DE genes

In [None]:
dataset='AD_HS_00003.1'

cell_exp=all_dfs[dataset].copy()
cell_umap=all_cell_umaps[dataset].copy()
cell_exp['predicted.celltype']=cell_umap['predicted.celltype']
cell_exp['Diagnosis']=cell_umap['Diagnosis']


cols=[x for x in cell_exp.columns if x not in rtes]
cols=[x for x in cols if not x.startswith('MT-')]
# cols.extend(['predicted.celltype','UMAP_1','UMAP_2','Diagnosis'])
cell_exp=cell_exp.loc[cell_exp['predicted.celltype']=='Ex',cols]

adata=anndata.AnnData(X=cell_exp.iloc[:,:-4])

adata.obs['Diagnosis']=['AD' if x!='Control' else 'Control' for x in cell_exp['Diagnosis']]
adata.obs['CellType']=cell_exp['predicted.celltype']
adata.obsm['X_umap']=cell_exp[['UMAP_1','UMAP_2']].to_numpy()
adata.var["mito"] = adata.var_names.str.startswith("MT-")
adata.layers["counts"] = adata.X.copy()
sc.pp.calculate_qc_metrics(adata, qc_vars=["mito"], inplace=True)
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
# fig,axs=plt.subplots(1,3,figsize=[45,8])

sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
# sc.pl.highly_variable_genes(adata)
sc.tl.pca(adata, svd_solver='arpack')

sc.tl.rank_genes_groups(adata, 'Diagnosis', method='t-test',pts=True)
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)

names=pd.DataFrame.from_records(adata.uns['rank_genes_groups']['names'])['AD']
lfc=pd.DataFrame.from_records(adata.uns['rank_genes_groups']['logfoldchanges'])['AD']
pvals=pd.DataFrame.from_records(adata.uns['rank_genes_groups']['pvals_adj'])['AD']
df=pd.concat([names,lfc,pvals],axis=1)
df.columns=['Gene','LFC','Padj']
df['Padj']=[x if x > 0 else sys.float_info.min for x in df['Padj']]
df['Padj']=-np.log10(df['Padj'])

pts=adata.uns['rank_genes_groups']['pts']
pts=pts.loc[df['Gene'],:]
df.index=df['Gene']
df=pd.concat([df,pts],axis=1)
df.columns=['Gene','Log2FC','P_adj','AD_PoE','Control_PoE']

with plt.rc_context({"figure.figsize": (16, 8), "figure.dpi": (72)}):
    df_tmp=df#.loc[df['LFC'].abs()>0.2,:]
    sns.scatterplot(data=df_tmp,x='Log2FC',y='P_adj',hue='AD_PoE',size='Control_PoE')
    plt.xlim([-3,3])

## DE RTEs

In [None]:
all_diff_df={}
for dataset in dataset_diff['Dataset'].unique():
    print(dataset)
    cell_exp=all_dfs[dataset].copy()
    cell_umap=all_cell_umaps[dataset].copy()
    cell_exp['predicted.celltype']=cell_umap['predicted.celltype']
    cell_exp['Diagnosis']=cell_umap['Diagnosis']


    cols=[x for x in cell_exp.columns if x  in rtes]
    # cols=[x for x in cols if not x.startswith('MT-')]
    cols.extend(['predicted.celltype','UMAP_1','UMAP_2','Diagnosis'])
    cell_exp=cell_exp.loc[cell_exp['predicted.celltype']=='Ex',cols]

    adata=anndata.AnnData(X=cell_exp.iloc[:,:-4])
    adata.obs['Diagnosis']=['AD' if x!='Control' else 'Control' for x in cell_exp['Diagnosis']]
    adata.obs['CellType']=cell_exp['predicted.celltype']
    adata.obsm['X_umap']=cell_exp[['UMAP_1','UMAP_2']].to_numpy()
    adata.var["mito"] = adata.var_names.str.startswith("MT-")
    adata.layers["counts"] = adata.X.copy()
    sc.pp.calculate_qc_metrics(adata, qc_vars=["mito"], inplace=True)
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    # fig,axs=plt.subplots(1,3,figsize=[45,8])

    sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
    # sc.pl.highly_variable_genes(adata)
    sc.tl.pca(adata, svd_solver='arpack')

    sc.tl.rank_genes_groups(adata, 'Diagnosis', method='t-test',pts=True)
    # sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)

    names=pd.DataFrame.from_records(adata.uns['rank_genes_groups']['names'])['AD']
    lfc=pd.DataFrame.from_records(adata.uns['rank_genes_groups']['logfoldchanges'])['AD']
    pvals=pd.DataFrame.from_records(adata.uns['rank_genes_groups']['pvals_adj'])['AD']
    df=pd.concat([names,lfc,pvals],axis=1)
    df.columns=['Gene','LFC','Padj']
    df['Padj']=[x if x > 0 else sys.float_info.min for x in df['Padj']]
    df['Padj']=-np.log10(df['Padj'])

    pts=adata.uns['rank_genes_groups']['pts']
    pts=pts.loc[df['Gene'],:]
    df.index=df['Gene']
    df=pd.concat([df,pts],axis=1)
    df.columns=['Gene','Log2FC','P_adj','AD_PoE','Control_PoE']
    all_diff_df[dataset]=df.copy()


fig,axs=plt.subplots(4,4,figsize=[45,32])
count=0
for dataset in dataset_diff['Dataset'].unique():
    # with plt.rc_context({"figure.figsize": (16, 8), "figure.dpi": (72)}):
    df_tmp=all_diff_df[dataset].copy()
    sns.scatterplot(data=df_tmp,x='Log2FC',y='P_adj',hue='AD_PoE',size='Control_PoE',ax=axs[count//4,count%4])

    axs[count//4,count%4].set_xlim([-3, 3])
    df_tmp=df_tmp.loc[df_tmp['Log2FC'].abs()>0.5,:]
    df_tmp.sort_values(by='P_adj',ascending=False,inplace=True)
    for i in range(min(20,len(df_tmp))):
        axs[count//4,count%4].annotate(df_tmp.iloc[i,0],df_tmp.iloc[i,[1,2]])
    axs[count//4,count%4].set_title(dataset)
    count+=1

for dataset in all_diff_df:
    all_diff_df[dataset]['Exp_trend']=all_diff_df[dataset]['Log2FC'].apply(lambda x: 'Up' if x>0 else 'Down')
    tmp=[]
    for i in range(len(all_diff_df[dataset])):
        if all_diff_df[dataset].iloc[i,1]>0.5 and all_diff_df[dataset].iloc[i,2]>-np.log10(0.01):
            tmp.append('Up')
        elif all_diff_df[dataset].iloc[i,1]< -0.5 and all_diff_df[dataset].iloc[i,2]>-np.log10(0.01):
            tmp.append('Down')
        else:
            tmp.append('NotSig')
    all_diff_df[dataset]['Exp_trend']=tmp
    all_diff_df[dataset].to_csv(f'../data/analysis/{dataset}.diff.txt',sep='\t',header=True,index=False)

In [None]:
%%R
df<-read.csv('../data/analysis/AD_HS_00003.1.diff.txt',sep='\t')
df<-df[df$AD_PoE>0 | df$Control_PoE>0,]
# to_anno<-df[df$P_adj> 100 ,]
# to_anno<-to_anno[to_anno$Log2FC> 0.5 | to_anno$Log2FC< -0.8 ,]
to_anno<-df[df$Exp_trend !='NotSig' ,]
to_anno<-to_anno[to_anno$Gene %in% c('L1MCa','L1PA8A','L1PA4','L1M1','L1PB1','L1M3d','L1P4d','L1PA10','L1PA6','L1PA5','L1PA15','L1PA12','L1PA7',
                                        'L1MA3','L1MB1','L1MEd','L1PA16','L1MA9','L1PREC2','L1MA8'),]

p1<-ggplot(data=df,aes(x=Log2FC,y=P_adj,col=Exp_trend))+geom_point()+theme_Publication()+ylab('-log10(Adjusted P)')+xlab('Log2FC')+
xlim(-3,3)+ geom_vline(xintercept=c(-0.5, 0.5), col="red") +scale_color_manual(values=c("#386cb0", "black", "#fdb462")) +
geom_hline(yintercept=-log10(0.01), col="red")+theme(legend.position="none") +
geom_text(data=to_anno, aes(x=Log2FC, y=P_adj, label=Gene))
print(p1)

df<-read.csv('../data/analysis/MS_HS_00002.diff.txt',sep='\t')
df<-df[df$AD_PoE>0 | df$Control_PoE>0,]
to_anno<-df[df$P_adj> 200 ,]
to_anno<-to_anno[to_anno$Log2FC> 0.5 | to_anno$Log2FC< -0.8 ,]
p2<-ggplot(data=df,aes(x=Log2FC,y=P_adj,col=Exp_trend))+geom_point()+theme_Publication()+ylab('-log10(Adjusted P)')+xlab('Log2FC')+
xlim(-3,3)+ geom_vline(xintercept=c(-0.5, 0.5), col="red") +scale_color_manual(values=c("#386cb0", "black", "#fdb462")) +
geom_hline(yintercept=-log10(0.01), col="red")+theme(legend.position="none") +
geom_text(data=to_anno, aes(x=Log2FC, y=P_adj, label=Gene))
print(p2)

df<-read.csv('../data/analysis/PD_HS_00001.diff.txt',sep='\t')
df<-df[df$AD_PoE>0 | df$Control_PoE>0,]
to_anno<-df[df$P_adj> 10 ,]
to_anno<-to_anno[to_anno$Log2FC> 0.5 | to_anno$Log2FC< -0.5 ,]
p3<-ggplot(data=df,aes(x=Log2FC,y=P_adj,col=Exp_trend))+geom_point()+theme_Publication()+ylab('-log10(Adjusted P)')+xlab('Log2FC')+
xlim(-3,3)+ geom_vline(xintercept=c(-0.5, 0.5), col="red") +scale_color_manual(values=c("#386cb0", "black", "#fdb462")) +
geom_hline(yintercept=-log10(0.01), col="red")+theme(legend.position="none") +
geom_text(data=to_anno, aes(x=Log2FC, y=P_adj, label=Gene))
print(p3)

pdf('../data/analysis/figures/de_volcano.pdf')
print(p1)
print(p2)
print(p3)
dev.off()

## Sample RTEs in UMAP
### AD_HS_00003.1

In [None]:
disease_exp=all_dfs['AD_HS_00003.1'].copy()
dt3_umap=all_cell_umaps['AD_HS_00003.1'].copy()
disease_exp['predicted.celltype']=dt3_umap['predicted.celltype']
disease_exp['Diagnosis']=dt3_umap['Diagnosis']
disease_exp=disease_exp.loc[disease_exp['predicted.celltype']=='Ex',:]
disease_exp[['L1PBa','AluYb8']]=np.log1p(disease_exp[['L1PBa','AluYb8']])
disease_exp[['L1PBa','AluYb8','Diagnosis','UMAP_1','UMAP_2']].to_csv('../data/analysis/disease_ex_ad3.1.txt',sep='\t',header=True,index=True)

In [None]:
%%R
ps<-list()
df<-read.csv('../data/analysis/disease_ex_ad3.1.txt',sep='\t',header=T,row.names=1)
ps[[length(ps)+1]]<-ggplot(df,aes(x=UMAP_1,y=UMAP_2))+geom_point(aes(color=Diagnosis),size=0.01)+theme_Publication()+scale_colour_Publication()+#theme(legend.position="none") + 
    theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))


df<-df[order(df$L1PBa),]

ps[[length(ps)+1]]<-ggplot(df,aes(x=UMAP_1,y=UMAP_2))+geom_point(aes(color=L1PBa),size=0.01)+theme_Publication()+scale_colour_gradientn(colors=c('#00F5E8','#EEEEEE','#F21D92'))+theme(legend.position="none")+ 
    theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) 
df<-df[order(df$AluYb8),]
ps[[length(ps)+1]]<-ggplot(df,aes(x=UMAP_1,y=UMAP_2))+geom_point(aes(color=AluYb8),size=0.01)+theme_Publication()+scale_colour_gradientn(colors=c('#00F5E8','#EEEEEE','#F21D92'))+theme(legend.position="none") + 
    theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) 


ps[[length(ps)+1]]<-ggplot(df,aes(x=Diagnosis,y=L1PBa,fill=Diagnosis))+geom_violin()+theme_Publication()+ylab('L1PBa')+xlab('Diagnosis')

ps[[length(ps)+1]]<-ggplot(df,aes(x=Diagnosis,y=AluYb8,fill=Diagnosis))+geom_violin()+theme_Publication()+ylab('AluYb8')+xlab('Diagnosis')
pdf('../data/analysis/figures/marker_disease_ad3.1.pdf')
for(p in ps){
    plot(p)
}
dev.off()
for(p in ps){
    plot(p)
}

### MS_HS_00002

In [None]:
disease_exp=all_dfs['MS_HS_00002'].copy()
dt3_umap=all_cell_umaps['MS_HS_00002'].copy()
disease_exp['predicted.celltype']=dt3_umap['predicted.celltype']
disease_exp['Diagnosis']=dt3_umap['Diagnosis']
disease_exp=disease_exp.loc[disease_exp['predicted.celltype']=='Ex',:]
disease_exp[['L1PBa','AluYb8']]=np.log1p(disease_exp[['L1PBa','AluYb8']])
disease_exp[['L3','AluYa5','Diagnosis','UMAP_1','UMAP_2']].to_csv('../data/analysis/disease_ex_ms2.txt',sep='\t',header=True,index=True)

dt3_umap=all_cell_umaps['MS_HS_00002'].copy()
dt3_exp=all_dfs['MS_HS_00002'].copy()

dt3_exp['Diagnosis']=dt3_umap['Diagnosis']
dt3_exp['predicted.celltype']=dt3_umap['predicted.celltype']
dt3_exp.drop(['UMAP_1','UMAP_2'],axis=1,inplace=True)
dt3_exp['UMAP_1']=dt3_umap['UMAP_1']
dt3_exp['UMAP_2']=dt3_umap['UMAP_2']
dt3_exp.fillna(0, inplace=True)

dt3_exp_neurons=dt3_exp.loc[dt3_exp['predicted.celltype']=='Ex',:]
adata_dt3=anndata.AnnData(X=dt3_exp_neurons.iloc[:,:-4])

adata_dt3.obs['Diagnosis']=dt3_exp_neurons['Diagnosis']
# adata_dt3.obs['Diagnosis2']=['Control' if x=='Stage_0' else 'AD' for x in dt3_exp_neurons['Diagnosis']]
adata_dt3.obs['CellType']=dt3_exp_neurons['predicted.celltype']
adata_dt3.obsm['X_umap']=dt3_exp_neurons[['UMAP_1','UMAP_2']].to_numpy()
adata_dt3.var["mito"] = adata_dt3.var_names.str.startswith("MT-")
adata_dt3.layers["counts"] = adata_dt3.X.copy()
sc.pp.calculate_qc_metrics(adata_dt3, qc_vars=["mito"], inplace=True)
sc.pp.normalize_total(adata_dt3, target_sum=1e4)
sc.pp.log1p(adata_dt3)

sc.pp.highly_variable_genes(adata_dt3, min_mean=0.0125, max_mean=3, min_disp=0.5)
# sc.pl.highly_variable_genes(adata_dt3)
sc.tl.pca(adata_dt3, svd_solver='arpack')

sc.pp.neighbors(adata_dt3, n_neighbors=10, n_pcs=40)
adata_dt3.obsm['X_umap']=dt3_exp_neurons[['UMAP_1','UMAP_2']].to_numpy()
# fig, axs = plt.subplots(1,2,figsize=(14, 7))
# sc.pl.pca(adata_dt3, color='CellType',ax=axs[0],show=False)
# sc.pl.umap(adata_dt3, color='Diagnosis',ax=axs[1])

dt3_exp_rtes=dt3_exp.loc[:,[x for x in dt3_exp.columns if x in rtes]]
dt3_exp_rtes=pd.concat([dt3_exp_rtes,dt3_exp[['Diagnosis','predicted.celltype','UMAP_1','UMAP_2']]],axis=1)
dt3_exp_rtes=dt3_exp_rtes.loc[dt3_exp_rtes['predicted.celltype']!='Mic',:]
adata_rtes=anndata.AnnData(X=dt3_exp_rtes.iloc[:,:-4])

adata_rtes.obs['Diagnosis']=dt3_exp_rtes['Diagnosis']
adata_rtes.obs['CellType']=dt3_exp_rtes['predicted.celltype']
adata_rtes.obsm['X_umap']=dt3_exp_rtes[['UMAP_1','UMAP_2']].to_numpy()
adata_rtes.var["mito"] = adata_rtes.var_names.str.startswith("MT-")
adata_rtes.layers["counts"] = adata_rtes.X.copy()
sc.pp.calculate_qc_metrics(adata_rtes, qc_vars=["mito"], inplace=True)
sc.pp.normalize_total(adata_rtes, target_sum=1e4)
sc.pp.log1p(adata_rtes)

sc.tl.rank_genes_groups(adata_rtes, 'Diagnosis', method='t-test')
# sc.pl.rank_genes_groups(adata_rtes, n_genes=25, sharey=False)

colors_high=['#DDDDDD','#F09288','#F03126']#'#EFCCC7','#F09288'
colors_high=['#00F5E8','#EEEEEE','#F21D92']
cmap_high=LinearSegmentedColormap.from_list('cmap_high', colors_high, N=100)

fig,axs=plt.subplots(1,3,figsize=[45,8],dpi=300)
sc.pl.umap(adata_dt3, color='Diagnosis',ax=axs[0],show=False,legend_fontsize=15)
sc.pl.umap(adata_dt3, color='L3',ax=axs[1],show=False,color_map=cmap_high)
sc.pl.umap(adata_dt3, color='AluYa5',ax=axs[2],show=False,color_map=cmap_high)

fig.savefig('../data/analysis/figures/marker_disease_ms2.pdf',dpi=300)

In [None]:
%%R
ps<-list()
df<-read.csv('../data/analysis/disease_ex_ms2.txt',sep='\t',header=T,row.names=1)

ps[[length(ps)+1]]<-ggplot(df,aes(x=Diagnosis,y=L3,fill=Diagnosis))+geom_violin()+theme_Publication()+ylab('L3')+xlab('Diagnosis')
ps[[length(ps)+1]]<-ggplot(df,aes(x=Diagnosis,y=AluYa5,fill=Diagnosis))+geom_violin()+theme_Publication()+ylab('AluYa5')+xlab('Diagnosis')
pdf('../data/analysis/figures/marker_disease_ms2.pdf',width=7,)
for(p in ps){
    plot(p)
}
dev.off()
for(p in ps){
    plot(p)
}

### PD_HS_00001

In [None]:
dt3_umap=all_cell_umaps['PD_HS_00001'].copy()
dt3_exp=all_dfs['PD_HS_00001'].copy()

dt3_exp['Diagnosis']=dt3_umap['Diagnosis']
dt3_exp['predicted.celltype']=dt3_umap['predicted.celltype']
dt3_exp.drop(['UMAP_1','UMAP_2'],axis=1,inplace=True)
dt3_exp['UMAP_1']=dt3_umap['UMAP_1']
dt3_exp['UMAP_2']=dt3_umap['UMAP_2']
dt3_exp.fillna(0, inplace=True)

dt3_exp_neurons=dt3_exp.loc[dt3_exp['predicted.celltype']=='Ex',:]
adata_dt3=anndata.AnnData(X=dt3_exp_neurons.iloc[:,:-4])

adata_dt3.obs['Diagnosis']=dt3_exp_neurons['Diagnosis']
# adata_dt3.obs['Diagnosis2']=['Control' if x=='Stage_0' else 'AD' for x in dt3_exp_neurons['Diagnosis']]
adata_dt3.obs['CellType']=dt3_exp_neurons['predicted.celltype']
adata_dt3.obsm['X_umap']=dt3_exp_neurons[['UMAP_1','UMAP_2']].to_numpy()
adata_dt3.var["mito"] = adata_dt3.var_names.str.startswith("MT-")
adata_dt3.layers["counts"] = adata_dt3.X.copy()
sc.pp.calculate_qc_metrics(adata_dt3, qc_vars=["mito"], inplace=True)
sc.pp.normalize_total(adata_dt3, target_sum=1e4)
sc.pp.log1p(adata_dt3)

sc.pp.highly_variable_genes(adata_dt3, min_mean=0.0125, max_mean=3, min_disp=0.5)
# sc.pl.highly_variable_genes(adata_dt3)
sc.tl.pca(adata_dt3, svd_solver='arpack')

sc.pp.neighbors(adata_dt3, n_neighbors=10, n_pcs=40)
adata_dt3.obsm['X_umap']=dt3_exp_neurons[['UMAP_1','UMAP_2']].to_numpy()
# fig, axs = plt.subplots(1,2,figsize=(14, 7))
# sc.pl.pca(adata_dt3, color='CellType',ax=axs[0],show=False)
# sc.pl.umap(adata_dt3, color='Diagnosis',ax=axs[1])

dt3_exp_rtes=dt3_exp.loc[:,[x for x in dt3_exp.columns if x in rtes]]
dt3_exp_rtes=pd.concat([dt3_exp_rtes,dt3_exp[['Diagnosis','predicted.celltype','UMAP_1','UMAP_2']]],axis=1)
dt3_exp_rtes=dt3_exp_rtes.loc[dt3_exp_rtes['predicted.celltype']!='Mic',:]
adata_rtes=anndata.AnnData(X=dt3_exp_rtes.iloc[:,:-4])

adata_rtes.obs['Diagnosis']=dt3_exp_rtes['Diagnosis']
adata_rtes.obs['CellType']=dt3_exp_rtes['predicted.celltype']
adata_rtes.obsm['X_umap']=dt3_exp_rtes[['UMAP_1','UMAP_2']].to_numpy()
adata_rtes.var["mito"] = adata_rtes.var_names.str.startswith("MT-")
adata_rtes.layers["counts"] = adata_rtes.X.copy()
sc.pp.calculate_qc_metrics(adata_rtes, qc_vars=["mito"], inplace=True)
sc.pp.normalize_total(adata_rtes, target_sum=1e4)
sc.pp.log1p(adata_rtes)

sc.tl.rank_genes_groups(adata_rtes, 'Diagnosis', method='t-test')
# sc.pl.rank_genes_groups(adata_rtes, n_genes=25, sharey=False)

colors_high=['#DDDDDD','#F09288','#F03126']#'#EFCCC7','#F09288'
colors_high=['#00F5E8','#EEEEEE','#F21D92']
cmap_high=LinearSegmentedColormap.from_list('cmap_high', colors_high, N=100)


fig,axs=plt.subplots(1,3,figsize=[45,8],dpi=300)
sc.pl.umap(adata_dt3, color='Diagnosis',ax=axs[0],show=False,legend_fontsize=15)
sc.pl.umap(adata_dt3, color='MSTA-int',ax=axs[1],show=False,color_map=cmap_high)
sc.pl.umap(adata_dt3, color='MLT1-int',ax=axs[2],show=False,color_map=cmap_high)

fig.savefig('../data/analysis/figures/marker_disease_pd1.pdf',dpi=300)