**Author:** Elias Rafael Ruiz-Morales

**Institution:** Wellcome Sanger institute

**July, 2023**

---

## Generate cellphoneDB input files (1) _meta.tsv and (2) _degs.tsv

In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import anndata
import os
import sys
sys.executable

'/opt/conda/bin/python'

In [2]:
def recoverDEGs(DEGdf, infection, celltype, direction):

    
    ### Up reg
    ###
    if direction=='up':

        #selecting only up regulated genes
        tempUP = DEGdf[DEGdf['logFC'] > 0].copy()
        
        #making new columns
        tempUP['cell_type'] = celltype
        tempUP['cluster'] = celltype + '.' + infection
        tempUP['gene']=list(tempUP.index.values)

        # select significant less restrictive
        tempUP =tempUP[ tempUP.p_val_adj < 0.05]
        tempUP = tempUP[ tempUP.logFC > 0.1]

        
        return(tempUP)


    ### Down reg
    ###
    if direction=='down':
        #selecting only down regulated genes
        tempUP = DEGdf[DEGdf['logFC'] < 0].copy()
        
        #making new columns
        tempUP['cell_type'] = celltype
        tempUP['cluster'] = celltype + '.' + infection
        tempUP['gene']=list(tempUP.index.values)


        # select significant less restrictive
        tempUP =tempUP[ tempUP.p_val_adj < 0.05]
        tempUP = tempUP[ tempUP.logFC < -0.1]
        
        return(tempDwn)


## Prepare INPUT

### (1) Load annadata_ single-cell explants from toxoplasma infection

In [3]:
adata = sc.read('../../data_integration/results/scVI/toxoplasma_singleCell_24h.h5ad')

In [4]:
adata.obs['cell_type'].values.describe()

Unnamed: 0_level_0,counts,freqs
categories,Unnamed: 1_level_1,Unnamed: 2_level_1
EVT_1,223,0.006673
EVT_2,606,0.018134
Endo_f,3520,0.105332
F,4956,0.148303
F_p,420,0.012568
F_sm,309,0.009247
HBC,11481,0.343557
HBC_p,120,0.003591
PAMM1,1402,0.041953
PV,2635,0.07885


In [5]:
adata.obs['stage_perInfection'].values.describe()

Unnamed: 0_level_0,counts,freqs
categories,Unnamed: 1_level_1,Unnamed: 2_level_1
Tg_24h,16806,0.502903
UI_Tg_24h,16612,0.497097


In [6]:
adata

AnnData object with n_obs × n_vars = 33418 × 36601
    obs: 'sample', 'stage', 'hpi', 'infection', 'percent_mito', 'n_counts', 'sample_barcode', 'assignment_SoC', 'donor_id', 'scrublet_score', 'scrublet_cluster_score', 'zscore', 'bh_pval', 'bonf_pval', 'S_score', 'G2M_score', 'phase', 'n_genes_by_counts', 'total_counts', 'total_counts_hs', 'pct_counts_hs', 'total_counts_tg', 'pct_counts_tg', 'Tg_infected', 'n_genes', '_scvi_batch', '_scvi_labels', '_scvi_local_l_mean', '_scvi_local_l_var', 'leiden_scvi', 'celltype_predictions', 'probabilities', 'scrublet_doublet', 'cell_type_2022', 'cell_type', 'souporcell_MFgenotype', 'MFgenotype', 'cell_type_broad', 'umap_density_Tg_infected', 'stage_perInfection', 'celltype-Stage', 'Tg_intracellular', 'celltype-Intracellular', 'Dev_Stage'
    var: 'gene_ids', 'feature_types', 'mean-0', 'std-0', 'mean-1', 'std-1', 'mean-2', 'std-2', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm'
    uns: 'Dev_Stage_colors', 'MFgenot

### DO NOT Subset dataset, use full matrix counts from infected manifod!! 

In [7]:
#Take only the INFECTED counts matrix
cellphoneDB_adata = adata[adata.obs['stage_perInfection']=='Tg_24h']

  res = method(*args, **kwargs)


In [8]:
#Take only the UNINFECTED counts matrix
#cellphoneDB_adataUI = adata[adata.obs['stage_perInfection']=='UI_Tg_24h']

In [9]:
cellphoneDB_adata.obs['cell_type']

Pla_HDBR13007975_AAACCCAAGACACACG        HBC
Pla_HDBR13007975_AAACCCACACTATCGA    VCT_CCC
Pla_HDBR13007975_AAACCCACAGGGTTGA        HBC
Pla_HDBR13007975_AAACCCACATCCGATA        HBC
Pla_HDBR13007975_AAACCCAGTCCGAAGA          F
                                      ...   
Pla_HDBR13798224_TTTGTTGGTTTGCAGT        HBC
Pla_HDBR13798224_TTTGTTGTCCCAAGCG     Endo_f
Pla_HDBR13798224_TTTGTTGTCCGGCTTT        HBC
Pla_HDBR13798224_TTTGTTGTCCTGATAG          F
Pla_HDBR13798224_TTTGTTGTCTCATGGA      VCT_p
Name: cell_type, Length: 16806, dtype: category
Categories (15, object): ['EVT_1', 'EVT_2', 'Endo_f', 'F', ..., 'VCT_CCC', 'VCT_fusing', 'VCT_p', 'iEVT']

In [10]:
cellphoneDB_adata.obs['donor_id'].values.describe()

Unnamed: 0_level_0,counts,freqs
categories,Unnamed: 1_level_1,Unnamed: 2_level_1
scDonor_Tg1,3722,0.221469
scDonor_Tg2,5144,0.306081
scDonor_Tg3,3549,0.211175
scDonor_Tg4,4391,0.261276


In [11]:
cellphoneDB_adata.obs['stage_perInfection'].values.describe()

Unnamed: 0_level_0,counts,freqs
categories,Unnamed: 1_level_1,Unnamed: 2_level_1
Tg_24h,16806,1.0


### Load *new* cell clusters annotation

In [12]:
### Add cluster subname such that this new name will match with DEG cluster name

#INFECTED
cellphoneDB_adata.obs['cell_type']= cellphoneDB_adata.obs.cell_type.astype('string')+'.'+cellphoneDB_adata.obs.infection.astype('string')


#UI
#cellphoneDB_adataUI.obs['cell_type']= cellphoneDB_adataUI.obs.cell_type.astype('string')+'.'+cellphoneDB_adataUI.obs.stage_perInfection.astype('string')

Trying to set attribute `.obs` of view, copying.


In [13]:
cellphoneDB_adata.obs['cell_type']

Pla_HDBR13007975_AAACCCAAGACACACG        HBC.Tg
Pla_HDBR13007975_AAACCCACACTATCGA    VCT_CCC.Tg
Pla_HDBR13007975_AAACCCACAGGGTTGA        HBC.Tg
Pla_HDBR13007975_AAACCCACATCCGATA        HBC.Tg
Pla_HDBR13007975_AAACCCAGTCCGAAGA          F.Tg
                                        ...    
Pla_HDBR13798224_TTTGTTGGTTTGCAGT        HBC.Tg
Pla_HDBR13798224_TTTGTTGTCCCAAGCG     Endo_f.Tg
Pla_HDBR13798224_TTTGTTGTCCGGCTTT        HBC.Tg
Pla_HDBR13798224_TTTGTTGTCCTGATAG          F.Tg
Pla_HDBR13798224_TTTGTTGTCTCATGGA      VCT_p.Tg
Name: cell_type, Length: 16806, dtype: string

In [14]:
cellphoneDB_adata

AnnData object with n_obs × n_vars = 16806 × 36601
    obs: 'sample', 'stage', 'hpi', 'infection', 'percent_mito', 'n_counts', 'sample_barcode', 'assignment_SoC', 'donor_id', 'scrublet_score', 'scrublet_cluster_score', 'zscore', 'bh_pval', 'bonf_pval', 'S_score', 'G2M_score', 'phase', 'n_genes_by_counts', 'total_counts', 'total_counts_hs', 'pct_counts_hs', 'total_counts_tg', 'pct_counts_tg', 'Tg_infected', 'n_genes', '_scvi_batch', '_scvi_labels', '_scvi_local_l_mean', '_scvi_local_l_var', 'leiden_scvi', 'celltype_predictions', 'probabilities', 'scrublet_doublet', 'cell_type_2022', 'cell_type', 'souporcell_MFgenotype', 'MFgenotype', 'cell_type_broad', 'umap_density_Tg_infected', 'stage_perInfection', 'celltype-Stage', 'Tg_intracellular', 'celltype-Intracellular', 'Dev_Stage'
    var: 'gene_ids', 'feature_types', 'mean-0', 'std-0', 'mean-1', 'std-1', 'mean-2', 'std-2', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm'
    uns: 'Dev_Stage_colors', 'MFgenot

In [15]:
sc.pp.normalize_per_cell(cellphoneDB_adata)
sc.pp.log1p(cellphoneDB_adata)                                                                                                            
# sc.pp.scale(adata, max_value=10)

  res = method(*args, **kwargs)


### The anndata object contains counts that have been normalized (per cell) and log-transformed, therefore save this as below

In [16]:
# counts input files 
cellphoneDB_adata.write('../data/toxoplasma_inf_normlogTransformed_24h.h5ad')

... storing 'cell_type' as categorical


In [17]:
# meta file
df_meta = pd.DataFrame(data={'Cell':list(cellphoneDB_adata.obs.index),
                             'cell_type':[ str(i) for i in cellphoneDB_adata.obs['cell_type']] })
df_meta.set_index('Cell', inplace=True)
df_meta.to_csv('../data/toxoplasma_inf_meta_24h.tsv', sep = '\t')

In [18]:
df_meta

Unnamed: 0_level_0,cell_type
Cell,Unnamed: 1_level_1
Pla_HDBR13007975_AAACCCAAGACACACG,HBC.Tg
Pla_HDBR13007975_AAACCCACACTATCGA,VCT_CCC.Tg
Pla_HDBR13007975_AAACCCACAGGGTTGA,HBC.Tg
Pla_HDBR13007975_AAACCCACATCCGATA,HBC.Tg
Pla_HDBR13007975_AAACCCAGTCCGAAGA,F.Tg
...,...
Pla_HDBR13798224_TTTGTTGGTTTGCAGT,HBC.Tg
Pla_HDBR13798224_TTTGTTGTCCCAAGCG,Endo_f.Tg
Pla_HDBR13798224_TTTGTTGTCCGGCTTT,HBC.Tg
Pla_HDBR13798224_TTTGTTGTCCTGATAG,F.Tg


### (2) Do DEGs individually from precalculated DEGs files

In [19]:
# PATH to immune cells
path='../../diffGeneExpression/macrophages/results/SC_limma_Immune_TOX_24h/'

### HB

In [20]:
HBDEG = pd.read_csv(path+'1_DEGs_SC_TOX_24h_HBC.tsv', header=0, index_col=0, sep='\t')

In [21]:
HBDEG_up= recoverDEGs(HBDEG, infection='Tg', celltype='HBC', direction='up')
HBDEG_up.head(5)

Unnamed: 0,logFC,logCPM,F,PValue,bonferroni,p_val_adj,cell_type,cluster,gene
COL3A1,0.796033,6.282735,136.443574,2.289547e-11,1.689686e-07,1.689686e-07,HBC,HBC.Tg,COL3A1
COL1A1,0.847071,4.494423,70.662051,1.338823e-08,9.880516e-05,4.940258e-05,HBC,HBC.Tg,COL1A1
DYNLRB1,0.332891,5.848545,57.359292,8.412154e-08,0.000620817,0.0001723061,HBC,HBC.Tg,DYNLRB1
MYO1G,0.608635,3.957769,53.63194,1.487998e-07,0.001098143,0.0002196286,HBC,HBC.Tg,MYO1G
DTNA,0.335151,7.616277,50.481931,2.463888e-07,0.00181835,0.0002597642,HBC,HBC.Tg,DTNA


In [22]:
# make sure DEGs only includes samples in meta
cl2include = set(cellphoneDB_adata.obs.cell_type.tolist())
idx = np.array([i in cl2include for i in HBDEG_up.cluster])
HBDEG_up = HBDEG_up[ idx ]

In [23]:
# 1st column = cluster; 2nd column = gene 
cncol = HBDEG_up.columns.tolist()
cncol = [item for item in cncol if item not in ['cluster', 'gene'] ]
HBDEG_up= HBDEG_up[ ['cluster', 'gene']+cncol ]
pd.DataFrame(HBDEG_up).to_csv('../data/toxoplasma_inf_HBC_DEGsup.tsv', sep='\t', index=False)

In [24]:
HBDEG_up.head()

Unnamed: 0,cluster,gene,logFC,logCPM,F,PValue,bonferroni,p_val_adj,cell_type
COL3A1,HBC.Tg,COL3A1,0.796033,6.282735,136.443574,2.289547e-11,1.689686e-07,1.689686e-07,HBC
COL1A1,HBC.Tg,COL1A1,0.847071,4.494423,70.662051,1.338823e-08,9.880516e-05,4.940258e-05,HBC
DYNLRB1,HBC.Tg,DYNLRB1,0.332891,5.848545,57.359292,8.412154e-08,0.000620817,0.0001723061,HBC
MYO1G,HBC.Tg,MYO1G,0.608635,3.957769,53.63194,1.487998e-07,0.001098143,0.0002196286,HBC
DTNA,HBC.Tg,DTNA,0.335151,7.616277,50.481931,2.463888e-07,0.00181835,0.0002597642,HBC


In [25]:
#HBDEG_up=pd.read_table('../data/toxoplasma_inf_HBC_DEGsup.tsv')
#HBDEG_up.loc[['VEGFA']]
HBDEG_up.loc[HBDEG_up['gene'] == 'ISG15']

Unnamed: 0,cluster,gene,logFC,logCPM,F,PValue,bonferroni,p_val_adj,cell_type


### PAMM1

In [26]:
PAMM1DEG = pd.read_csv(path+'2_DEGs_SC_TOX_24h_PAMM1.tsv', header=0, index_col=0, sep='\t')

In [27]:
PAMM1DEG_up= recoverDEGs(PAMM1DEG, infection='Tg', celltype='PAMM1', direction='up')
PAMM1DEG_up.head(5)

Unnamed: 0,logFC,logCPM,F,PValue,bonferroni,p_val_adj,cell_type,cluster,gene
MET,1.003315,7.576231,30.267887,6e-06,0.037725,0.003399,PAMM1,PAMM1.Tg,MET
SERPINB9,0.606788,6.309922,31.040125,5e-06,0.031063,0.003399,PAMM1,PAMM1.Tg,SERPINB9
DOCK8,0.589602,9.475681,29.35852,8e-06,0.047588,0.003399,PAMM1,PAMM1.Tg,DOCK8
PDE3B,0.661068,7.912461,25.186082,2.3e-05,0.145396,0.006322,PAMM1,PAMM1.Tg,PDE3B
CXCL5,0.649151,13.082072,25.270278,2.3e-05,0.14203,0.006322,PAMM1,PAMM1.Tg,CXCL5


In [28]:
# make sure DEGs only includes samples in meta
cl2include = set(cellphoneDB_adata.obs.cell_type.tolist())
idx = np.array([i in cl2include for i in PAMM1DEG_up.cluster])
PAMM1DEG_up = PAMM1DEG_up[ idx ]

In [29]:
# 1st column = cluster; 2nd column = gene 
cncol = PAMM1DEG_up.columns.tolist()
cncol = [item for item in cncol if item not in ['cluster', 'gene'] ]
PAMM1DEG_up= PAMM1DEG_up[ ['cluster', 'gene']+cncol ]
pd.DataFrame(PAMM1DEG_up).to_csv('../data/toxoplasma_inf_PAMM1_DEGsup.tsv', sep='\t', index=False)

In [30]:
PAMM1DEG_up.head(5)

Unnamed: 0,cluster,gene,logFC,logCPM,F,PValue,bonferroni,p_val_adj,cell_type
MET,PAMM1.Tg,MET,1.003315,7.576231,30.267887,6e-06,0.037725,0.003399,PAMM1
SERPINB9,PAMM1.Tg,SERPINB9,0.606788,6.309922,31.040125,5e-06,0.031063,0.003399,PAMM1
DOCK8,PAMM1.Tg,DOCK8,0.589602,9.475681,29.35852,8e-06,0.047588,0.003399,PAMM1
PDE3B,PAMM1.Tg,PDE3B,0.661068,7.912461,25.186082,2.3e-05,0.145396,0.006322,PAMM1
CXCL5,PAMM1.Tg,CXCL5,0.649151,13.082072,25.270278,2.3e-05,0.14203,0.006322,PAMM1


In [36]:
# PATH to FibroEndo cells DGE tables
path='../../diffGeneExpression/fibroEndo/results/SC_limma_fibroEndo_TOX_24h/'

### F

In [37]:
FDEG = pd.read_csv(path+'/1_DEGs_SC_TOX_24h_F.tsv', header=0, index_col=0, sep='\t')

In [38]:
FDEG_up= recoverDEGs(FDEG, infection='Tg', celltype='F', direction='up')
FDEG_up.head(5)

Unnamed: 0,logFC,logCPM,F,PValue,bonferroni,p_val_adj,cell_type,cluster,gene
PLOD2,0.346437,9.71067,57.301917,4.569245e-08,0.000318,0.000318,F,F.Tg,PLOD2
PGAM1,0.294528,7.414381,49.648052,1.649194e-07,0.001149,0.000574,F,F.Tg,PGAM1
SNX9,0.359026,7.800909,45.928065,3.231338e-07,0.002251,0.000736,F,F.Tg,SNX9
NOTCH2,0.300958,6.890188,42.55203,6.14065e-07,0.004278,0.000856,F,F.Tg,NOTCH2
MYO10,0.288179,10.052584,39.956914,1.028769e-06,0.007166,0.000896,F,F.Tg,MYO10


In [39]:
# make sure DEGs only includes samples in meta
cl2include = set(cellphoneDB_adata.obs.cell_type.tolist())
idx = np.array([i in cl2include for i in FDEG_up.cluster])
FDEG_up = FDEG_up[ idx ]

In [40]:
# 1st column = cluster; 2nd column = gene 
cncol = FDEG_up.columns.tolist()
cncol = [item for item in cncol if item not in ['cluster', 'gene'] ]
FDEG_up= FDEG_up[ ['cluster', 'gene']+cncol ]
pd.DataFrame(FDEG_up).to_csv('../data/toxoplasma_inf_F_DEGsup.tsv', sep='\t', index=False)

In [45]:
FDEG_up.head()

Unnamed: 0,cluster,gene,logFC,logCPM,F,PValue,bonferroni,p_val_adj,cell_type
PLOD2,F.Tg,PLOD2,0.346437,9.71067,57.301917,4.569245e-08,0.000318,0.000318,F
PGAM1,F.Tg,PGAM1,0.294528,7.414381,49.648052,1.649194e-07,0.001149,0.000574,F
SNX9,F.Tg,SNX9,0.359026,7.800909,45.928065,3.231338e-07,0.002251,0.000736,F
NOTCH2,F.Tg,NOTCH2,0.300958,6.890188,42.55203,6.14065e-07,0.004278,0.000856,F
MYO10,F.Tg,MYO10,0.288179,10.052584,39.956914,1.028769e-06,0.007166,0.000896,F


In [46]:
FDEG_up.loc[['PGAM1']]

Unnamed: 0,cluster,gene,logFC,logCPM,F,PValue,bonferroni,p_val_adj,cell_type
PGAM1,F.Tg,PGAM1,0.294528,7.414381,49.648052,1.649194e-07,0.001149,0.000574,F


### PV

In [47]:
PVDEG = pd.read_csv(path+'/2_DEGs_SC_TOX_24h_PV.tsv', header=0, index_col=0, sep='\t')

In [48]:
PVDEG_up= recoverDEGs(PVDEG, infection='Tg', celltype='PV', direction='up')
PVDEG_up.head(5)

Unnamed: 0,logFC,logCPM,F,PValue,bonferroni,p_val_adj,cell_type,cluster,gene
CD68,0.778783,4.45769,56.390919,3.322928e-08,0.000246,0.000246,PV,PV.Tg,CD68
SERPINE1,0.596916,10.63182,42.412198,4.462031e-07,0.003306,0.001102,PV,PV.Tg,SERPINE1
SPP1,0.924016,7.838992,39.349764,8.459709e-07,0.006268,0.001254,PV,PV.Tg,SPP1
PGAM1,0.393088,7.736512,37.133623,1.369823e-06,0.010149,0.001362,PV,PV.Tg,PGAM1
RNASE1,0.628468,5.194213,37.819184,1.177979e-06,0.008728,0.001362,PV,PV.Tg,RNASE1


In [49]:
# make sure DEGs only includes samples in meta
cl2include = set(cellphoneDB_adata.obs.cell_type.tolist())
idx = np.array([i in cl2include for i in PVDEG_up.cluster])
PVDEG_up = PVDEG_up[ idx ]

In [50]:
# 1st column = cluster; 2nd column = gene 
cncol = PVDEG_up.columns.tolist()
cncol = [item for item in cncol if item not in ['cluster', 'gene'] ]
PVDEG_up= PVDEG_up[ ['cluster', 'gene']+cncol ]
pd.DataFrame(PVDEG_up).to_csv('../data/toxoplasma_inf_PV_DEGsup.tsv', sep='\t', index=False)

In [51]:
PVDEG_up.head()

Unnamed: 0,cluster,gene,logFC,logCPM,F,PValue,bonferroni,p_val_adj,cell_type
CD68,PV.Tg,CD68,0.778783,4.45769,56.390919,3.322928e-08,0.000246,0.000246,PV
SERPINE1,PV.Tg,SERPINE1,0.596916,10.63182,42.412198,4.462031e-07,0.003306,0.001102,PV
SPP1,PV.Tg,SPP1,0.924016,7.838992,39.349764,8.459709e-07,0.006268,0.001254,PV
PGAM1,PV.Tg,PGAM1,0.393088,7.736512,37.133623,1.369823e-06,0.010149,0.001362,PV
RNASE1,PV.Tg,RNASE1,0.628468,5.194213,37.819184,1.177979e-06,0.008728,0.001362,PV


### Endo_f

In [52]:
EndoDEG = pd.read_csv(path+'/3_DEGs_SC_TOX_24h_Endof.tsv', header=0, index_col=0, sep='\t')

In [53]:
EndoDEG_up= recoverDEGs(EndoDEG, infection='Tg', celltype='Endo_f', direction='up')
EndoDEG_up.head(5)

Unnamed: 0,logFC,logCPM,F,PValue,bonferroni,p_val_adj,cell_type,cluster,gene
MT-ATP6,0.395582,12.310631,29.299554,8e-06,0.06086,0.010143,Endo_f,Endo_f.Tg,MT-ATP6
TM4SF1,0.261449,10.779658,25.94236,2e-05,0.147604,0.018174,Endo_f,Endo_f.Tg,TM4SF1
PHF5A,0.352516,4.80343,25.176129,2.4e-05,0.182148,0.018174,Endo_f,Endo_f.Tg,PHF5A
MT-CO2,0.25641,12.635936,25.523726,2.2e-05,0.16551,0.018174,Endo_f,Endo_f.Tg,MT-CO2
COL1A2,0.590334,4.731228,24.484398,3e-05,0.220835,0.018403,Endo_f,Endo_f.Tg,COL1A2


In [54]:
# make sure DEGs only includes samples in meta
cl2include = set(cellphoneDB_adata.obs.cell_type.tolist())
idx = np.array([i in cl2include for i in EndoDEG_up.cluster])
EndoDEG_up = EndoDEG_up[ idx ]

In [55]:
# 1st column = cluster; 2nd column = gene 
cncol = EndoDEG_up.columns.tolist()
cncol = [item for item in cncol if item not in ['cluster', 'gene'] ]
EndoDEG_up= EndoDEG_up[ ['cluster', 'gene']+cncol ]
pd.DataFrame(EndoDEG_up).to_csv('../data/toxoplasma_inf_Endof_DEGsup.tsv', sep='\t', index=False)

In [56]:
EndoDEG_up.head()

Unnamed: 0,cluster,gene,logFC,logCPM,F,PValue,bonferroni,p_val_adj,cell_type
MT-ATP6,Endo_f.Tg,MT-ATP6,0.395582,12.310631,29.299554,8e-06,0.06086,0.010143,Endo_f
TM4SF1,Endo_f.Tg,TM4SF1,0.261449,10.779658,25.94236,2e-05,0.147604,0.018174,Endo_f
PHF5A,Endo_f.Tg,PHF5A,0.352516,4.80343,25.176129,2.4e-05,0.182148,0.018174,Endo_f
MT-CO2,Endo_f.Tg,MT-CO2,0.25641,12.635936,25.523726,2.2e-05,0.16551,0.018174,Endo_f
COL1A2,Endo_f.Tg,COL1A2,0.590334,4.731228,24.484398,3e-05,0.220835,0.018403,Endo_f


In [58]:
EndoDEG_up.loc[['PHF5A']]

Unnamed: 0,cluster,gene,logFC,logCPM,F,PValue,bonferroni,p_val_adj,cell_type
PHF5A,Endo_f.Tg,PHF5A,0.352516,4.80343,25.176129,2.4e-05,0.182148,0.018174,Endo_f


In [59]:
# PATH to Trophoblast cells DGE tables
path='../../diffGeneExpression/trophoblast/results/SC_limma_Trophoblast_TOX_24h/'

### VCT_fusing

In [96]:
VCTfDEG = pd.read_csv(path+'/1_DEGs_SC_TOX_24h_VCT_fusing.tsv', header=0, index_col=0, sep='\t')

In [97]:
VCTfDEG_up= recoverDEGs(VCTfDEG, infection='Tg', celltype='VCT_fusing', direction='up')
VCTfDEG_up.head(5)

Unnamed: 0,logFC,logCPM,F,PValue,bonferroni,p_val_adj,cell_type,cluster,gene
TIMP1,0.883189,6.423208,88.01946,1.126633e-09,9e-06,9e-06,VCT_fusing,VCT_fusing.Tg,TIMP1
IFI6,0.448185,10.421668,55.318923,8.556514e-08,0.000652,0.000326,VCT_fusing,VCT_fusing.Tg,IFI6
SPP1,0.717596,6.877959,47.980641,2.897721e-07,0.002207,0.000736,VCT_fusing,VCT_fusing.Tg,SPP1
COL3A1,0.814079,5.123309,43.569166,6.419772e-07,0.00489,0.001222,VCT_fusing,VCT_fusing.Tg,COL3A1
DYNLRB1,0.418769,5.728626,39.369841,1.439906e-06,0.010968,0.002194,VCT_fusing,VCT_fusing.Tg,DYNLRB1


In [98]:
# make sure DEGs only includes samples in meta
cl2include = set(cellphoneDB_adata.obs.cell_type.tolist())
idx = np.array([i in cl2include for i in VCTfDEG_up.cluster])
VCTfVCTfDEG_upDEG = VCTfDEG_up[ idx ]

In [99]:
# 1st column = cluster; 2nd column = gene 
cncol = VCTfDEG_up.columns.tolist()
cncol = [item for item in cncol if item not in ['cluster', 'gene'] ]
VCTfDEG_up= VCTfDEG_up[ ['cluster', 'gene']+cncol ]
pd.DataFrame(VCTfDEG_up).to_csv('../data/toxoplasma_inf_VCTfusing_DEGsup.tsv', sep='\t', index=False)

In [100]:
VCTfDEG_up.head()

Unnamed: 0,cluster,gene,logFC,logCPM,F,PValue,bonferroni,p_val_adj,cell_type
TIMP1,VCT_fusing.Tg,TIMP1,0.883189,6.423208,88.01946,1.126633e-09,9e-06,9e-06,VCT_fusing
IFI6,VCT_fusing.Tg,IFI6,0.448185,10.421668,55.318923,8.556514e-08,0.000652,0.000326,VCT_fusing
SPP1,VCT_fusing.Tg,SPP1,0.717596,6.877959,47.980641,2.897721e-07,0.002207,0.000736,VCT_fusing
COL3A1,VCT_fusing.Tg,COL3A1,0.814079,5.123309,43.569166,6.419772e-07,0.00489,0.001222,VCT_fusing
DYNLRB1,VCT_fusing.Tg,DYNLRB1,0.418769,5.728626,39.369841,1.439906e-06,0.010968,0.002194,VCT_fusing


In [101]:
VCTfDEG_up.loc[['SPP1']]

Unnamed: 0,cluster,gene,logFC,logCPM,F,PValue,bonferroni,p_val_adj,cell_type
SPP1,VCT_fusing.Tg,SPP1,0.717596,6.877959,47.980641,2.897721e-07,0.002207,0.000736,VCT_fusing


### VCT

In [90]:
VCTDEG = pd.read_csv(path+'/2_DEGs_SC_TOX_24h_VCT.tsv', header=0, index_col=0, sep='\t')

In [91]:
VCTDEG_up= recoverDEGs(VCTDEG, infection='Tg', celltype='VCT', direction='up')
VCTDEG_up.head(5)

Unnamed: 0,logFC,logCPM,F,PValue,bonferroni,p_val_adj,cell_type,cluster,gene
CXCL8,0.974553,5.553529,118.972231,1.140086e-11,1.082967e-07,1.082967e-07,VCT,VCT.Tg,CXCL8
TIMP1,0.808608,6.236301,80.224796,9.055197e-10,8.601531e-06,4.300766e-06,VCT,VCT.Tg,TIMP1
RNF17,1.837875,3.048054,60.373804,1.653236e-08,0.0001570409,5.234696e-05,VCT,VCT.Tg,RNF17
CCL4L2,1.032796,2.540356,57.375081,2.714022e-08,0.0002578049,6.445124e-05,VCT,VCT.Tg,CCL4L2
IFI6,0.32117,10.736599,55.654252,3.636252e-08,0.0003454076,6.908152e-05,VCT,VCT.Tg,IFI6


In [92]:
# make sure DEGs only includes samples in meta
cl2include = set(cellphoneDB_adata.obs.cell_type.tolist())
idx = np.array([i in cl2include for i in VCTDEG_up.cluster])
VCTDEG_up = VCTDEG_up[ idx ]

In [93]:
# 1st column = cluster; 2nd column = gene 
cncol = VCTDEG_up.columns.tolist()
cncol = [item for item in cncol if item not in ['cluster', 'gene'] ]
VCTDEG_up= VCTDEG_up[ ['cluster', 'gene']+cncol ]
pd.DataFrame(VCTDEG_up).to_csv('../data/toxoplasma_inf_VCT_DEGsup.tsv', sep='\t', index=False)

In [94]:
VCTDEG_up.head()

Unnamed: 0,cluster,gene,logFC,logCPM,F,PValue,bonferroni,p_val_adj,cell_type
CXCL8,VCT.Tg,CXCL8,0.974553,5.553529,118.972231,1.140086e-11,1.082967e-07,1.082967e-07,VCT
TIMP1,VCT.Tg,TIMP1,0.808608,6.236301,80.224796,9.055197e-10,8.601531e-06,4.300766e-06,VCT
RNF17,VCT.Tg,RNF17,1.837875,3.048054,60.373804,1.653236e-08,0.0001570409,5.234696e-05,VCT
CCL4L2,VCT.Tg,CCL4L2,1.032796,2.540356,57.375081,2.714022e-08,0.0002578049,6.445124e-05,VCT
IFI6,VCT.Tg,IFI6,0.32117,10.736599,55.654252,3.636252e-08,0.0003454076,6.908152e-05,VCT


In [95]:
VCTDEG_up.loc[['SPP1']]

Unnamed: 0,cluster,gene,logFC,logCPM,F,PValue,bonferroni,p_val_adj,cell_type
SPP1,VCT.Tg,SPP1,0.399504,6.847701,15.368978,0.00051,1.0,0.024099,VCT


### (3) Run cellphoneDB