# Looking at the L/R interactinos enriched in particular pairs of cell types

CellphoneDB

Validation cohort - CVID patients from group Ib (Freiburg) vs all healthy patients

24.03.2021

This code uses DEGs computed for each cluster to identify relevant L/R interactions between the cells in a microenviroment

Code from Luz rewritten in python

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import itertools

In [2]:
# Define cutoff variables
filter_int_user_curated = True # Use only user_curated interactions?
per_cutoff = 0.1 # min % of cells in the cluster required  with expression > 0 for the gene
pval_cutoff = 0.05 # max adjusted p-value requeired to consider a gene as DEG

# as of 18.03.2021, not using the logFC cutoff at all! --> 0 here
logFC_cutoff = 0 # min logFC to consider a gene as DEG

## Load cellphone database


In [3]:
# Gene names
genes_cpDB = pd.read_csv('/home/jovyan/notebooks/Vento_Lab/CVID/202009_new_analysis_revision/CITE_all_samples_analysis/CVID/scTranscriptomics_CITE/cellphonedb_analysis/hsa_uniprot.txt', 
                         sep = '\t')

In [4]:
genes_cpDB

Unnamed: 0,uniprot,Entry,gene_name
0,P01611,KVD12_HUMAN,IGKV1D-12
1,P01615,KVD28_HUMAN,IGKV2D-28
2,Q15334,L2GL1_HUMAN,LLGL1
3,Q6ZP29,LAAT1_HUMAN,PQLC2
4,Q9GZZ8,LACRT_HUMAN,LACRT
...,...,...,...
20311,Q9H900,ZWILC_HUMAN,ZWILCH
20312,P98169,ZXDB_HUMAN,ZXDB
20313,Q2QGD7,ZXDC_HUMAN,ZXDC
20314,Q15942,ZYX_HUMAN,ZYX


In [5]:
# Complexes members
com_cpDB = pd.read_csv('/home/jovyan/notebooks/Vento_Lab/CVID/202009_new_analysis_revision/CITE_all_samples_analysis/CVID/scTranscriptomics_CITE/cellphonedb_analysis/database_20210218/complex_generated.csv')
com_cpDB['complex_name'] = ['complex:' + complex_name for complex_name in com_cpDB['complex_name']]

In [6]:
com_cpDB

Unnamed: 0,complex_name,uniprot_1,uniprot_2,uniprot_3,uniprot_4,transmembrane,peripheral,secreted,secreted_desc,secreted_highlight,receptor,receptor_desc,integrin,other,other_desc,pdb_id,pdb_structure,stoichiometry,comments_complex
0,complex:contactin complex II,Q12860,Q92823,,,True,False,False,,False,False,,False,False,,,FALSE,,NRCAM bind in cis and in trans to contactin-1
1,complex:IL6 receptor,P08887,P40189,,,True,False,False,,False,True,Cytokine receptor IL6 family,False,False,,1p9m,binding,IL6;IL6;IL6R;IL6R;IL6ST;IL6ST,Signal activation necessitate an association w...
2,complex:AT8B4CC50B complex,Q8TF62,Q3MIR4,,,True,False,False,,False,False,,False,False,,,FALSE,,Interacts with beta subunits TMEM30A and TMEM30B
3,complex:KCNV1KCNB2 complex,Q6PIU1,Q92953,,,True,False,False,,False,False,,False,False,,,FALSE,,Has to be associated with another potassium ch...
4,complex:LRFN3LRFN5 complex,Q9BTN0,Q96NI6,,,True,False,False,,False,False,,False,False,,,FALSE,,"Can form heteromeric complexes with LRFN1, LRF..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
614,complex:FZD8_LRP6,O75581,Q9H461,,,True,False,False,,False,False,,False,False,,,False,,
615,complex:FZD9_LRP5,O75197,O00144,,,True,False,False,,False,False,,False,False,,,False,,
616,complex:FZD9_LRP6,O75581,O00144,,,True,False,False,,False,False,,False,False,,,False,,
617,complex:FZD10_LRP5,O75197,Q9ULW2,,,True,False,False,,False,False,,False,False,,,False,,


In [7]:
#'complex:FZD8_LRP6'[8:]

In [8]:
#com_cpDB[(com_cpDB['complex_name'] == 'complex:IL6 receptor')].loc[:, ['uniprot_1', 'uniprot_2', 'uniprot_3', 'uniprot_4']].values

In [9]:
#genes_cpDB[genes_cpDB['uniprot'].isin(['P08887','P40189'])]['gene_name']

In [10]:
# Generate complexes2gene symbol dictionary
Com2Gene = {}


for complex_name in np.unique(com_cpDB['complex_name']):
    #print(complex_name)
    
    # getting rid of 'complex:' in the beginning
    #complex_name = complex_name[8:]
    #print(complex_name)
    
    curr_complex_proteins = list(com_cpDB[(com_cpDB['complex_name'] == complex_name)].loc[:, ['uniprot_1', 'uniprot_2', 'uniprot_3', 'uniprot_4']].values.tolist())
    # flatten list 
    curr_complex_proteins = [item for sublist in curr_complex_proteins for item in sublist]
    #print(curr_complex_proteins)
    # remove nans
    curr_complex_proteins = [x for x in curr_complex_proteins if str(x) != 'nan']
    #print('after removing nans:', curr_complex_proteins)
    
    # getting corresponding gene names from the gene table
    Com2Gene[complex_name] = list(genes_cpDB[genes_cpDB['uniprot'].isin(curr_complex_proteins)]['gene_name'])
    

In [11]:
list(Com2Gene.items())[:10]

[('complex:12oxoLeukotrieneB4_byPTGR1', ['PTGR1']),
 ('complex:17aHydroxyprogesterone_byCYP17A1', ['CYP17A1']),
 ('complex:22Hydroxycholesterol_byCYP11A1', ['CYP11A1']),
 ('complex:22Hydroxycholesterol_byCYP3A4', ['CYP3A4']),
 ('complex:2arachidonoylglycerol_byDAGLA', ['DAGLA']),
 ('complex:2arachidonoylglycerol_byDAGLB', ['DAGLB']),
 ('complex:5-alpha-Dihydroprogesterone_byDHRS9', ['DHRS9']),
 ('complex:5HT3C5HT3A complex', ['HTR3A', 'HTR3C']),
 ('complex:5HT3C5HT3A_complex', ['HTR3A', 'HTR3C']),
 ('complex:5HT3D receptor', ['HTR3A', 'HTR3D'])]

In [12]:
# Load interactions from cellphoneDB/out/means.txt output file                   
int_cpDB = pd.read_csv('/home/jovyan/notebooks/Vento_Lab/CVID/202009_new_analysis_revision/CITE_all_samples_analysis/CVID/scTranscriptomics_CITE/cellphonedb_analysis/out/means.txt',
                      sep='\t')

# disregarding pairwise average expression values
int_cpDB = int_cpDB.loc[:, list(int_cpDB.columns)[:11]]
int_cpDB

Unnamed: 0,id_cp_interaction,interacting_pair,partner_a,partner_b,gene_a,gene_b,secreted,receptor_a,receptor_b,annotation_strategy,is_integrin
0,CPI-SS0A7B487D4,KLRG2_WNT11,simple:A4D1S0,simple:O96014,KLRG2,WNT11,True,True,False,InnateDB-All,False
1,CPI-CS0481C1F9A,FZD1_LRP5_WNT11,complex:FZD1_LRP5,simple:O96014,,WNT11,True,False,False,user_curated,False
2,CPI-CS0F29C6285,FZD1_LRP6_WNT11,complex:FZD1_LRP6,simple:O96014,,WNT11,True,False,False,user_curated,False
3,CPI-CS0372FC240,FZD2_LRP5_WNT11,complex:FZD2_LRP5,simple:O96014,,WNT11,True,False,False,user_curated,False
4,CPI-CS031A2034E,FZD2_LRP6_WNT11,complex:FZD2_LRP6,simple:O96014,,WNT11,True,False,False,user_curated,False
...,...,...,...,...,...,...,...,...,...,...,...
1351,CPI-SC047CEF2DD,CRLF2_TSLPR,simple:Q9HC73,complex:TSLPR,CRLF2,,True,True,True,user_curated,False
1352,CPI-SS04C672963,ESAM_ESAM,simple:Q96AP7,simple:Q96AP7,ESAM,ESAM,False,False,False,user_curated,False
1353,CPI-SC001AFA16D,NRTN_RET receptor 2,simple:Q99748,complex:RET receptor 2,NRTN,,True,False,True,curated,False
1354,CPI-SC060C69786,NRTN_RET_receptor_2,simple:Q99748,complex:RET_receptor_2,NRTN,,True,False,True,user_curated,False


In [13]:
np.unique(int_cpDB['annotation_strategy'], return_counts=True)

(array(['I2D', 'I2D,IMEx,InnateDB,InnateDB-All,IntAct,MINT',
        'I2D,IMEx,InnateDB,IntAct', 'I2D,IMEx,InnateDB-All,IntAct',
        'I2D,IMEx,InnateDB-All,IntAct,MINT', 'I2D,IMEx,InnateDB-All,MINT',
        'I2D,InnateDB', 'I2D,InnateDB-All', 'I2D,InnateDB-All,IntAct',
        'I2D,IntAct', 'IMEx', 'IMEx,InnateDB-All,IntAct',
        'IMEx,InnateDB-All,IntAct,MatrixDB', 'IMEx,InnateDB-All,MINT',
        'IMEx,InnateDB-All,UniProt', 'IMEx,IntAct', 'IMEx,MINT',
        'InnateDB', 'InnateDB-All', 'InnateDB-All,MINT', 'curated',
        'guidetopharmacology.org', 'user_curated'], dtype=object),
 array([ 42,   1,   1,   4,   1,   1,   2,  21,   1,   2,   2,   4,   1,
          6,   1,  26,   2,   2,  67,   2, 279,  63, 825]))

In [14]:
# MANDATORY: remove "curated" because we have cleaned and renamed them (this is a long story, just do it)
# these interactions have either been renamed or excluded so best not to use them
int_cpDB = int_cpDB[int_cpDB['annotation_strategy'] != 'curated']

In [15]:
# OPTIONAL: Use only user_curated interactions?
if filter_int_user_curated:
    int_cpDB = int_cpDB[int_cpDB['annotation_strategy'] == 'user_curated']

In [16]:
int_cpDB

Unnamed: 0,id_cp_interaction,interacting_pair,partner_a,partner_b,gene_a,gene_b,secreted,receptor_a,receptor_b,annotation_strategy,is_integrin
1,CPI-CS0481C1F9A,FZD1_LRP5_WNT11,complex:FZD1_LRP5,simple:O96014,,WNT11,True,False,False,user_curated,False
2,CPI-CS0F29C6285,FZD1_LRP6_WNT11,complex:FZD1_LRP6,simple:O96014,,WNT11,True,False,False,user_curated,False
3,CPI-CS0372FC240,FZD2_LRP5_WNT11,complex:FZD2_LRP5,simple:O96014,,WNT11,True,False,False,user_curated,False
4,CPI-CS031A2034E,FZD2_LRP6_WNT11,complex:FZD2_LRP6,simple:O96014,,WNT11,True,False,False,user_curated,False
5,CPI-CS02643715E,FZD3_LRP5_WNT11,complex:FZD3_LRP5,simple:O96014,,WNT11,True,False,False,user_curated,False
...,...,...,...,...,...,...,...,...,...,...,...
1350,CPI-SC090068F7B,TSLP_TSLPR,simple:Q969D9,complex:TSLPR,TSLP,,True,False,True,user_curated,False
1351,CPI-SC047CEF2DD,CRLF2_TSLPR,simple:Q9HC73,complex:TSLPR,CRLF2,,True,True,True,user_curated,False
1352,CPI-SS04C672963,ESAM_ESAM,simple:Q96AP7,simple:Q96AP7,ESAM,ESAM,False,False,False,user_curated,False
1354,CPI-SC060C69786,NRTN_RET_receptor_2,simple:Q99748,complex:RET_receptor_2,NRTN,,True,False,True,user_curated,False


In [17]:
int_cpDB.loc[1,:]

id_cp_interaction        CPI-CS0481C1F9A
interacting_pair         FZD1_LRP5_WNT11
partner_a              complex:FZD1_LRP5
partner_b                  simple:O96014
gene_a                               NaN
gene_b                             WNT11
secreted                            True
receptor_a                         False
receptor_b                         False
annotation_strategy         user_curated
is_integrin                        False
Name: 1, dtype: object

In [18]:
'complex:FZD1_LRP5' in list(Com2Gene.keys())

True

In [19]:
list(Com2Gene.keys())[:5]

['complex:12oxoLeukotrieneB4_byPTGR1',
 'complex:17aHydroxyprogesterone_byCYP17A1',
 'complex:22Hydroxycholesterol_byCYP11A1',
 'complex:22Hydroxycholesterol_byCYP3A4',
 'complex:2arachidonoylglycerol_byDAGLA']

In [20]:
'complex:FZD1_LRP5' in list(com_cpDB['complex_name'])

True

In [21]:
# Generate Int2Gene dictionary
Int2Gene = {}

for i in int_cpDB.index:
    #print('row number:', i)
    curr_df_row = int_cpDB.loc[i,:]
    #print('row:', curr_df_row)
    
    # if partnerA is complex (aka np.isnan(table['gene_a']) == True), then retrieve members from dictionary
    # complex name will be in table['partner_a']
    if str(curr_df_row['gene_a']) == 'nan':
        partner_A = Com2Gene[curr_df_row['partner_a']]
    else:
        # if it's not a complex, then get the partner A from table['gene_a']
        partner_A = [curr_df_row['gene_a']]
        
    # if partnerB is complex, then retrieve members from dictionary
    # complex name will be in table['partner_b']
    if str(curr_df_row['gene_b']) == 'nan':
        partner_B = Com2Gene[curr_df_row['partner_b']]
    else:
        # if it's not a complex, then get the partner B from table['gene_b']
        partner_B = [curr_df_row['gene_b']]
        
    interaction_id = curr_df_row['interacting_pair']

    Int2Gene[interaction_id] = {'partner_a': partner_A,
                  'partner_b': partner_B}

In [22]:
list(Int2Gene.items())[:10]

[('FZD1_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD1'], 'partner_b': ['WNT11']}),
 ('FZD1_LRP6_WNT11', {'partner_a': ['FZD1', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD2_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD2'], 'partner_b': ['WNT11']}),
 ('FZD2_LRP6_WNT11', {'partner_a': ['FZD2', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD3_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD3'], 'partner_b': ['WNT11']}),
 ('FZD3_LRP6_WNT11', {'partner_a': ['FZD3', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD4_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD4'], 'partner_b': ['WNT11']}),
 ('FZD4_LRP6_WNT11', {'partner_a': ['FZD4', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD5_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD5'], 'partner_b': ['WNT11']}),
 ('FZD5_LRP6_WNT11', {'partner_a': ['FZD5', 'LRP6'], 'partner_b': ['WNT11']})]

## Load cluster's gene percentage expression

Prepared in S2 notebook

In [23]:
# Load percentage expression info
# Matrix of genes (rows) per celltypes (columns) containing the proportion [0-1] of cells 
# in a celltype expressing the gene
path_Exp = '/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/PercentExpressed_for_cellphone_20210308_subanalysis_Freiburg_Ib_and_healthy_donors.csv'

# Load matrix
Per_df = pd.read_csv(path_Exp, index_col=0) 

# Dictionary of celltypes2expressed genes
genes_expr_per_cell_type = {} 

for ct in Per_df.columns:
    print(ct)
    curr_table = pd.DataFrame(Per_df.loc[:, ct])
    
    # only leave in genes expressed in this cell type according to a threshold declared in the beginnning of this notebook
    mask = (curr_table[ct] > per_cutoff)
    genes_expr_per_cell_type[ct] = list(curr_table[ct][mask].index)

B_cells_memory
B_cells_memory_activated
B_cells_naive
B_cells_naive_activated
MAIT_cells
Macrophages
Monocytes_classical
Monocytes_intermediate
Monocytes_non-classical
NK_CD16_bright
NK_CD16_bright_activated
NK_CD56_bright
NK_CD56_bright_activated
Plasma_cells
Precursor_cells
T4_activated
T4_memory
T4_naive
T8_activated
T8_naive
TCM_CD8+
TEM_CD8+
TMRA_CD8+
T_gd
T_regs
cDC1
cDC2
iNKT_cells
pDC


In [24]:
Per_df

Unnamed: 0_level_0,B_cells_memory,B_cells_memory_activated,B_cells_naive,B_cells_naive_activated,MAIT_cells,Macrophages,Monocytes_classical,Monocytes_intermediate,Monocytes_non-classical,NK_CD16_bright,...,T8_naive,TCM_CD8+,TEM_CD8+,TMRA_CD8+,T_gd,T_regs,cDC1,cDC2,iNKT_cells,pDC
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
RP11-34P13.7,0.000000,0.000000,0.000712,0.000000,0.000000,0.000713,0.000250,0.000000,0.000000,0.000000,...,0.000445,0.000574,0.000000,0.000000,0.000793,0.000000,0.000744,0.000000,0.00082,0.0000
FO538757.2,0.113089,0.250522,0.085470,0.077905,0.074561,0.252673,0.217881,0.171779,0.210669,0.066403,...,0.069364,0.087830,0.067472,0.065642,0.077716,0.155192,0.187004,0.100437,0.07541,0.0625
AP006222.2,0.002094,0.004175,0.000000,0.000000,0.001096,0.005702,0.004257,0.006135,0.002712,0.000527,...,0.000000,0.000574,0.001007,0.000000,0.000793,0.001346,0.003224,0.000000,0.00082,0.0000
RP4-669L17.10,0.002094,0.003016,0.002137,0.001277,0.000000,0.001069,0.000000,0.000383,0.002712,0.000264,...,0.001334,0.000000,0.000504,0.001397,0.002379,0.001731,0.001240,0.000000,0.00000,0.0000
RP5-857K21.4,0.001047,0.000000,0.000000,0.000000,0.000000,0.000000,0.000751,0.000000,0.000000,0.000000,...,0.000445,0.000574,0.000000,0.000000,0.000000,0.000000,0.000496,0.000000,0.00082,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CTD-2541M15.3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.0000
THEGL,0.000000,0.000000,0.000000,0.000000,0.000000,0.000356,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000248,0.000000,0.00000,0.0000
KIAA1644,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000904,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000496,0.006550,0.00000,0.0000
RP11-132A1.3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.0000


In [25]:
len(genes_expr_per_cell_type['B_cells_memory'])

4092

## Load DE expression info

In [26]:
path_DE = '/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/joint_DEGs_list_all_cell_types_for_cellphone_20210308_subanalysis_Freiburg_Ib_and_healthy_donors.csv'
DE_df = pd.read_csv(path_DE, #row.names = 0
                )

DE_df


Unnamed: 0.1,Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_cluster,AveExpr_rest,percentExpr_cluster,percentExpr_rest,cluster
0,0,IgD,1.023776,2.592023e-32,3.952057e-28,2.132361,1.108585,0.821826,0.565217,B_cells_memory
1,1,CD73(Ecto-5'-nucleotidase),-0.958522,2.139729e-22,1.631222e-18,1.089468,2.047991,0.550111,0.735178,B_cells_memory
2,2,AC079767.4,0.384022,3.641864e-12,1.850917e-08,0.724130,0.340108,0.409800,0.221344,B_cells_memory
3,3,IGHG3,0.311110,5.833035e-12,1.876685e-08,0.505015,0.193905,0.309577,0.128458,B_cells_memory
4,4,IgA,-0.543810,6.154275e-12,1.876685e-08,0.110669,0.654480,0.048998,0.175889,B_cells_memory
...,...,...,...,...,...,...,...,...,...,...
445057,10940,HENMT1,-0.000066,9.993520e-01,9.996314e-01,0.086109,0.086175,0.058824,0.048387,pDC
445058,10941,MYD88,0.000108,9.993756e-01,9.996314e-01,0.269189,0.269081,0.147059,0.161290,pDC
445059,10942,LBHD1,0.000030,9.994487e-01,9.996314e-01,0.034850,0.034821,0.029412,0.032258,pDC
445060,10943,SRSF5,-0.000073,9.997225e-01,9.998138e-01,1.159138,1.159211,0.588235,0.645161,pDC


In [27]:
'PTPRC' in list(DE_df['Gene'])

True

In [28]:
logFC_cutoff

0

In [29]:
pval_cutoff

0.05

In [30]:
per_cutoff

0.1

In [31]:
# filter the DE table according to cutoffs declared in the beginning of the notebook
# separately for upreg and downreg genes

DE_df = DE_df[(abs(DE_df['logFC']) > logFC_cutoff)
             & (DE_df['adj.P.Val'] < pval_cutoff)
             & (DE_df['percentExpr_cluster'] > per_cutoff)]

DE_df_upreg = DE_df[DE_df['logFC'] > 0]
DE_df_downreg = DE_df[DE_df['logFC'] < 0]

In [32]:
# without logFC filtering at all
print(DE_df.shape)
print(DE_df_upreg.shape)
print(DE_df_downreg.shape)

(5686, 10)
(3635, 10)
(2051, 10)


In [33]:
np.unique(DE_df_upreg['cluster'])

array(['B_cells_memory', 'B_cells_memory_activated', 'B_cells_naive',
       'B_cells_naive_activated', 'MAIT_cells', 'Macrophages',
       'Monocytes_classical', 'Monocytes_intermediate',
       'Monocytes_non-classical', 'NK_CD16_bright',
       'NK_CD16_bright_activated', 'NK_CD56_bright',
       'NK_CD56_bright_activated', 'Plasma_cells', 'Precursor_cells',
       'T4_activated', 'T4_memory', 'T4_naive', 'T8_activated',
       'T8_naive', 'TCM_CD8+', 'TEM_CD8+', 'TMRA_CD8+', 'T_gd', 'T_regs',
       'cDC1', 'cDC2', 'iNKT_cells'], dtype=object)

In [34]:
np.unique(DE_df_downreg['cluster'])

array(['B_cells_memory', 'B_cells_memory_activated', 'B_cells_naive',
       'B_cells_naive_activated', 'MAIT_cells', 'Macrophages',
       'Monocytes_classical', 'Monocytes_intermediate',
       'Monocytes_non-classical', 'NK_CD16_bright',
       'NK_CD16_bright_activated', 'NK_CD56_bright_activated',
       'Plasma_cells', 'Precursor_cells', 'T4_activated', 'T4_memory',
       'T4_naive', 'T8_activated', 'T8_naive', 'TCM_CD8+', 'TEM_CD8+',
       'TMRA_CD8+', 'T_gd', 'T_regs', 'cDC1', 'cDC2', 'iNKT_cells'],
      dtype=object)

In [35]:
# Build dictionary clusters2DE_genes
# separately for upreg and downreg genes

clusters_upreg = list(np.unique(DE_df_upreg['cluster']))
clusters_downreg = list(np.unique(DE_df_downreg['cluster']))

is_DE_upreg = {}
is_DE_downreg = {}

for cluster in clusters_upreg:
    is_DE_upreg[cluster] = list(DE_df_upreg[DE_df_upreg['cluster'] == cluster]['Gene'])
    
for cluster in clusters_downreg:
    is_DE_downreg[cluster] = list(DE_df_downreg[DE_df_downreg['cluster'] == cluster]['Gene'])

In [36]:
len(is_DE_upreg['B_cells_memory_activated'])

233

In [37]:
'CD40' in is_DE_downreg['B_cells_memory_activated']

False

In [38]:
len(is_DE_downreg['B_cells_memory'])

14

In [39]:
for ct in list(is_DE_upreg.keys()):
    print(ct)
    print(len(is_DE_upreg[ct]), '\n')

B_cells_memory
36 

B_cells_memory_activated
233 

B_cells_naive
20 

B_cells_naive_activated
6 

MAIT_cells
11 

Macrophages
477 

Monocytes_classical
502 

Monocytes_intermediate
96 

Monocytes_non-classical
67 

NK_CD16_bright
91 

NK_CD16_bright_activated
7 

NK_CD56_bright
1 

NK_CD56_bright_activated
3 

Plasma_cells
23 

Precursor_cells
2 

T4_activated
909 

T4_memory
41 

T4_naive
20 

T8_activated
247 

T8_naive
20 

TCM_CD8+
15 

TEM_CD8+
57 

TMRA_CD8+
13 

T_gd
15 

T_regs
382 

cDC1
337 

cDC2
1 

iNKT_cells
3 



In [40]:
for ct in list(is_DE_downreg.keys()):
    print(ct)
    print(len(is_DE_downreg[ct]), '\n')

B_cells_memory
14 

B_cells_memory_activated
630 

B_cells_naive
11 

B_cells_naive_activated
14 

MAIT_cells
53 

Macrophages
122 

Monocytes_classical
93 

Monocytes_intermediate
90 

Monocytes_non-classical
36 

NK_CD16_bright
203 

NK_CD16_bright_activated
5 

NK_CD56_bright_activated
1 

Plasma_cells
7 

Precursor_cells
1 

T4_activated
101 

T4_memory
15 

T4_naive
7 

T8_activated
334 

T8_naive
7 

TCM_CD8+
12 

TEM_CD8+
8 

TMRA_CD8+
5 

T_gd
11 

T_regs
36 

cDC1
232 

cDC2
1 

iNKT_cells
2 



In [41]:
is_DE_upreg['B_cells_naive_activated']

['RPS4Y1', 'IgD', 'IGHV4-34', 'HLA-DPA1', 'MT-ND6', 'HLA-B']

In [42]:
is_DE_downreg['B_cells_naive_activated']

["CD73(Ecto-5'-nucleotidase)",
 'HLA-DRB5',
 'CD57Recombinant',
 'IgA',
 'NLRP2-1',
 'RPS10',
 'CD357(GITR)',
 'CD303(BDCA-2)',
 'B7-H4',
 'TCRVa24-Ja18(iNKTcell)',
 'TCRa/B',
 'CD197(CCR7)',
 'CD206(MMR)',
 'IgGFc']

## Define cell pairs to test

In [43]:
len(list(genes_expr_per_cell_type.keys()))

29

In [44]:
list(genes_expr_per_cell_type.keys())

['B_cells_memory',
 'B_cells_memory_activated',
 'B_cells_naive',
 'B_cells_naive_activated',
 'MAIT_cells',
 'Macrophages',
 'Monocytes_classical',
 'Monocytes_intermediate',
 'Monocytes_non-classical',
 'NK_CD16_bright',
 'NK_CD16_bright_activated',
 'NK_CD56_bright',
 'NK_CD56_bright_activated',
 'Plasma_cells',
 'Precursor_cells',
 'T4_activated',
 'T4_memory',
 'T4_naive',
 'T8_activated',
 'T8_naive',
 'TCM_CD8+',
 'TEM_CD8+',
 'TMRA_CD8+',
 'T_gd',
 'T_regs',
 'cDC1',
 'cDC2',
 'iNKT_cells',
 'pDC']

In [45]:
# Get all pairwise cluster combinations: A--B, B--A
# Consider the reverse interaction too: A--B but B--A as well
pairwise_cluster_combinations = list(itertools.permutations(list(genes_expr_per_cell_type.keys()), 2))
len(pairwise_cluster_combinations)


812

In [46]:
pairwise_cluster_combinations[:5]

[('B_cells_memory', 'B_cells_memory_activated'),
 ('B_cells_memory', 'B_cells_naive'),
 ('B_cells_memory', 'B_cells_naive_activated'),
 ('B_cells_memory', 'MAIT_cells'),
 ('B_cells_memory', 'Macrophages')]

In [47]:
# add self interactions
self_inter_combinations = [(ct, ct) for ct in list(genes_expr_per_cell_type.keys())]
pairwise_cluster_combinations = pairwise_cluster_combinations + self_inter_combinations
len(pairwise_cluster_combinations)


841

In [48]:
len(is_DE_upreg.keys())

28

In [49]:
len(is_DE_downreg.keys())

27

In [50]:
# We only want to test pairs including at least one celltype in the DE folder

pairwise_cluster_combinations_upreg = [elem for elem in pairwise_cluster_combinations if elem[0] in list(is_DE_upreg.keys()) and elem[1] in list(is_DE_upreg.keys())]
pairwise_cluster_combinations_downreg = [elem for elem in pairwise_cluster_combinations if elem[0] in list(is_DE_downreg.keys()) and elem[1] in list(is_DE_downreg.keys())]

In [51]:
len(pairwise_cluster_combinations_upreg)

784

In [52]:
len(pairwise_cluster_combinations_downreg)

729

In [53]:
# Make cluster pair labels: celltypeA--celltypeB             
cluster_combinations_labels_upreg = [comb[0] + '---' + comb[1] for comb in pairwise_cluster_combinations_upreg]
cluster_combinations_labels_downreg = [comb[0] + '---' + comb[1] for comb in pairwise_cluster_combinations_downreg]

In [54]:
len(cluster_combinations_labels_upreg)

784

In [55]:
len(cluster_combinations_labels_downreg)

729

# Retrieve CellphoneDB L/R interactions

A relevant interaction shoudl have

1. All their participants expressed in the corresponding celltypes
2. At least one participant is a DEG

In [56]:
len(Int2Gene.keys())

825

In [57]:
# Make scaffold matrix: L/R interactions (rows) x celltype pairs (columns)
df_Exrp_LR_in_celltype_pairs_upreg = pd.DataFrame(index = list(Int2Gene.keys()),
                                          columns = cluster_combinations_labels_upreg,
                                          data = np.zeros( (len(list(Int2Gene.keys())), 
                                                            len(cluster_combinations_labels_upreg)) )
                                         )

df_Exrp_LR_in_celltype_pairs_downreg = pd.DataFrame(index = list(Int2Gene.keys()),
                                          columns = cluster_combinations_labels_downreg,
                                          data = np.zeros( (len(list(Int2Gene.keys())), 
                                                            len(cluster_combinations_labels_downreg)) )
                                         )

In [58]:
df_Exrp_LR_in_celltype_pairs_upreg

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---B_cells_naive,B_cells_memory---B_cells_naive_activated,B_cells_memory---MAIT_cells,B_cells_memory---Macrophages,B_cells_memory---Monocytes_classical,B_cells_memory---Monocytes_intermediate,B_cells_memory---Monocytes_non-classical,B_cells_memory---NK_CD16_bright,B_cells_memory---NK_CD16_bright_activated,...,T8_activated---T8_activated,T8_naive---T8_naive,TCM_CD8+---TCM_CD8+,TEM_CD8+---TEM_CD8+,TMRA_CD8+---TMRA_CD8+,T_gd---T_gd,T_regs---T_regs,cDC1---cDC1,cDC2---cDC2,iNKT_cells---iNKT_cells
FZD1_LRP5_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD1_LRP6_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD2_LRP5_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD2_LRP6_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD3_LRP5_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TSLP_TSLPR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CRLF2_TSLPR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ESAM_ESAM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NRTN_RET_receptor_2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [59]:
df_Exrp_LR_in_celltype_pairs_downreg

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---B_cells_naive,B_cells_memory---B_cells_naive_activated,B_cells_memory---MAIT_cells,B_cells_memory---Macrophages,B_cells_memory---Monocytes_classical,B_cells_memory---Monocytes_intermediate,B_cells_memory---Monocytes_non-classical,B_cells_memory---NK_CD16_bright,B_cells_memory---NK_CD16_bright_activated,...,T8_activated---T8_activated,T8_naive---T8_naive,TCM_CD8+---TCM_CD8+,TEM_CD8+---TEM_CD8+,TMRA_CD8+---TMRA_CD8+,T_gd---T_gd,T_regs---T_regs,cDC1---cDC1,cDC2---cDC2,iNKT_cells---iNKT_cells
FZD1_LRP5_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD1_LRP6_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD2_LRP5_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD2_LRP6_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD3_LRP5_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TSLP_TSLPR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CRLF2_TSLPR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ESAM_ESAM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NRTN_RET_receptor_2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [60]:
%%time

# LR_pairs_celltype_pairs_df will be a binary matrix with 1 indicating that all the genes in the interaction are expressed in the corresponding celltype
# So, fill 1 if all genes are expressed in all clusters

for interaction in list(df_Exrp_LR_in_celltype_pairs_upreg.index):
    #print('interaction', interaction, list(df_Exrp_LR_in_celltype_pairs_upreg.index).index(interaction)+1, 
    #      'out of', len(list(df_Exrp_LR_in_celltype_pairs_upreg.index)))
    for ct_pair in list(df_Exrp_LR_in_celltype_pairs_upreg.columns):
        #print(ct_pair)
        
        ct_A = ct_pair.split('---')[0]
        ct_B = ct_pair.split('---')[1]
        
        partner_A_genes = Int2Gene[interaction]['partner_a']
        partner_B_genes = Int2Gene[interaction]['partner_b']
        
        # are all partner_A genes expressed in celltype_A and are all partner_B genes expressed in celltype_B?
        are_all_expressed = all(elem in genes_expr_per_cell_type[ct_A] for elem in partner_A_genes) & all(elem in genes_expr_per_cell_type[ct_B] for elem in partner_B_genes)
        
        if are_all_expressed:
            df_Exrp_LR_in_celltype_pairs_upreg.loc[interaction, ct_pair] = 1


CPU times: user 1min 14s, sys: 3.11 ms, total: 1min 14s
Wall time: 1min 14s


In [61]:
%%time

# LR_pairs_celltype_pairs_df will be a binary matrix with 1 indicating that all the genes in the interaction are expressed in the corresponding celltype
# So, fill 1 if all genes are expressed in all clusters

for interaction in list(df_Exrp_LR_in_celltype_pairs_downreg.index):
    #print('interaction', interaction, list(df_Exrp_LR_in_celltype_pairs_downreg.index).index(interaction)+1, 
    #      'out of', len(list(df_Exrp_LR_in_celltype_pairs_downreg.index)))
    for ct_pair in list(df_Exrp_LR_in_celltype_pairs_downreg.columns):
        #print(ct_pair)
        
        ct_A = ct_pair.split('---')[0]
        ct_B = ct_pair.split('---')[1]
        
        partner_A_genes = Int2Gene[interaction]['partner_a']
        partner_B_genes = Int2Gene[interaction]['partner_b']
        
        # are all partner_A genes expressed in celltype_A and are all partner_B genes expressed in celltype_B?
        are_all_expressed = all(elem in genes_expr_per_cell_type[ct_A] for elem in partner_A_genes) & all(elem in genes_expr_per_cell_type[ct_B] for elem in partner_B_genes)
        
        if are_all_expressed:
            df_Exrp_LR_in_celltype_pairs_downreg.loc[interaction, ct_pair] = 1


CPU times: user 1min 10s, sys: 7.65 ms, total: 1min 10s
Wall time: 1min 10s


In [62]:
np.unique(df_Exrp_LR_in_celltype_pairs_upreg.values, return_counts=True)

(array([0., 1.]), array([635932,  10868]))

In [63]:
np.unique(df_Exrp_LR_in_celltype_pairs_downreg.values, return_counts=True)

(array([0., 1.]), array([591142,  10283]))

In [64]:
np.unique(df_Exrp_LR_in_celltype_pairs_upreg.sum(axis=0))

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
       27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 39.])

In [65]:
np.unique(df_Exrp_LR_in_celltype_pairs_downreg.sum(axis=0))

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
       27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 39.])

In [66]:
df_Exrp_LR_in_celltype_pairs_upreg.shape

(825, 784)

In [67]:
df_Exrp_LR_in_celltype_pairs_downreg.shape

(825, 729)

In [68]:
# keep celltype pairs with at least one expressed interaction

df_Exrp_LR_in_celltype_pairs_upreg = df_Exrp_LR_in_celltype_pairs_upreg.loc[:, (df_Exrp_LR_in_celltype_pairs_upreg != 0).any(axis=0)]
print(df_Exrp_LR_in_celltype_pairs_upreg.shape)

df_Exrp_LR_in_celltype_pairs_downreg = df_Exrp_LR_in_celltype_pairs_downreg.loc[:, (df_Exrp_LR_in_celltype_pairs_downreg != 0).any(axis=0)]
print(df_Exrp_LR_in_celltype_pairs_downreg.shape)

(825, 784)
(825, 729)


In [69]:
# keep interactions with at least one celltype_pair
df_Exrp_LR_in_celltype_pairs_upreg = df_Exrp_LR_in_celltype_pairs_upreg.loc[(df_Exrp_LR_in_celltype_pairs_upreg != 0).any(axis=1), :]
print(df_Exrp_LR_in_celltype_pairs_upreg.shape)

df_Exrp_LR_in_celltype_pairs_downreg = df_Exrp_LR_in_celltype_pairs_downreg.loc[(df_Exrp_LR_in_celltype_pairs_downreg != 0).any(axis=1), :]
print(df_Exrp_LR_in_celltype_pairs_downreg.shape)

(127, 784)
(125, 729)


In [70]:
df_Exrp_LR_in_celltype_pairs_downreg.columns

Index(['B_cells_memory---B_cells_memory_activated',
       'B_cells_memory---B_cells_naive',
       'B_cells_memory---B_cells_naive_activated',
       'B_cells_memory---MAIT_cells', 'B_cells_memory---Macrophages',
       'B_cells_memory---Monocytes_classical',
       'B_cells_memory---Monocytes_intermediate',
       'B_cells_memory---Monocytes_non-classical',
       'B_cells_memory---NK_CD16_bright',
       'B_cells_memory---NK_CD16_bright_activated',
       ...
       'T8_activated---T8_activated', 'T8_naive---T8_naive',
       'TCM_CD8+---TCM_CD8+', 'TEM_CD8+---TEM_CD8+', 'TMRA_CD8+---TMRA_CD8+',
       'T_gd---T_gd', 'T_regs---T_regs', 'cDC1---cDC1', 'cDC2---cDC2',
       'iNKT_cells---iNKT_cells'],
      dtype='object', length=729)

In [71]:
df_Exrp_LR_in_celltype_pairs_downreg.loc[:,'B_cells_memory---B_cells_naive_activated']

PVR_CD96              0.0
PVR_CD226             0.0
PVR_TIGIT             0.0
NOTCH1_DLL3           0.0
NOTCH2_DLL3           0.0
                     ... 
CCR4_CCL17            0.0
CD47_SIRB1_complex    0.0
LAIR1_LILRB4          0.0
CLEC2B_KLRF1          0.0
CRLF2_TSLPR           0.0
Name: B_cells_memory---B_cells_naive_activated, Length: 125, dtype: float64

In [72]:
df_Exrp_LR_in_celltype_pairs_downreg

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---B_cells_naive,B_cells_memory---B_cells_naive_activated,B_cells_memory---MAIT_cells,B_cells_memory---Macrophages,B_cells_memory---Monocytes_classical,B_cells_memory---Monocytes_intermediate,B_cells_memory---Monocytes_non-classical,B_cells_memory---NK_CD16_bright,B_cells_memory---NK_CD16_bright_activated,...,T8_activated---T8_activated,T8_naive---T8_naive,TCM_CD8+---TCM_CD8+,TEM_CD8+---TEM_CD8+,TMRA_CD8+---TMRA_CD8+,T_gd---T_gd,T_regs---T_regs,cDC1---cDC1,cDC2---cDC2,iNKT_cells---iNKT_cells
PVR_CD96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_CD226,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_TIGIT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NOTCH1_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NOTCH2_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CCR4_CCL17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRB1_complex,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
LAIR1_LILRB4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
CLEC2B_KLRF1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [73]:
list(df_Exrp_LR_in_celltype_pairs_downreg.index) == list(df_Exrp_LR_in_celltype_pairs_upreg.index)

False

In [74]:
np.unique(df_Exrp_LR_in_celltype_pairs_upreg.values, return_counts=True)

(array([0., 1.]), array([88700, 10868]))

In [75]:
np.unique(df_Exrp_LR_in_celltype_pairs_downreg.values, return_counts=True)

(array([0., 1.]), array([80842, 10283]))

In [76]:
np.sum(df_Exrp_LR_in_celltype_pairs_upreg.values)

10868.0

In [77]:
np.sum(df_Exrp_LR_in_celltype_pairs_downreg.values)

10283.0

In [78]:
# Initialize DE matrix from LR_pairs_celltype_pairs_df and set all values to 0
# DE will be a binary matrix with 1 indicating that all the genes in the interaction are expressed in the corresponding celltype
# and one is a DE in the celltypes of interests
# Make scaffold matrix: L/R interactions (rows) x celltype pairs (columns)
df_Exrp_LR_in_celltype_pairs_upreg_DE = pd.DataFrame(index = list(df_Exrp_LR_in_celltype_pairs_upreg.index),
                                          columns = list(df_Exrp_LR_in_celltype_pairs_upreg.columns),
                                            data = np.zeros( (len(list(df_Exrp_LR_in_celltype_pairs_upreg.index)), 
                                                            len(list(df_Exrp_LR_in_celltype_pairs_upreg.columns))))
                                            )
df_Exrp_LR_in_celltype_pairs_downreg_DE = pd.DataFrame(index = list(df_Exrp_LR_in_celltype_pairs_downreg.index),
                                          columns = list(df_Exrp_LR_in_celltype_pairs_downreg.columns),
                                            data = np.zeros( (len(list(df_Exrp_LR_in_celltype_pairs_downreg.index)), 
                                                            len(list(df_Exrp_LR_in_celltype_pairs_downreg.columns))))
                                            )

In [79]:
%%time
# So, fill 1 if at least one gene in the interaction is DE
for interaction in list(df_Exrp_LR_in_celltype_pairs_upreg_DE.index):
    #print('interaction', interaction, list(df_Exrp_LR_in_celltype_pairs_upreg_DE.index).index(interaction)+1, 
    #      'out of', len(list(df_Exrp_LR_in_celltype_pairs_upreg_DE.index)))
    for ct_pair in list(df_Exrp_LR_in_celltype_pairs_upreg_DE.columns):
        #print(ct_pair)
        
        ct_A = ct_pair.split('---')[0]
        ct_B = ct_pair.split('---')[1]
        
        partner_A_genes = Int2Gene[interaction]['partner_a']
        partner_B_genes = Int2Gene[interaction]['partner_b']
        
        # are partner_A genes DE in celltype_A OR are partner_B genes DE in celltype_B?
        are_any_DE = all(elem in is_DE_upreg[ct_A] for elem in partner_A_genes) | all(elem in is_DE_upreg[ct_B] for elem in partner_B_genes)  
        
        if are_any_DE & (df_Exrp_LR_in_celltype_pairs_upreg.loc[interaction, ct_pair] == 1):
            df_Exrp_LR_in_celltype_pairs_upreg_DE.loc[interaction, ct_pair] = 1

CPU times: user 2.03 s, sys: 21 µs, total: 2.03 s
Wall time: 2.03 s


In [80]:
%%time
# So, fill 1 if at least one gene in the interaction is DE
for interaction in list(df_Exrp_LR_in_celltype_pairs_downreg_DE.index):
    #print('interaction', interaction, list(df_Exrp_LR_in_celltype_pairs_downreg_DE.index).index(interaction)+1, 
    #      'out of', len(list(df_Exrp_LR_in_celltype_pairs_downreg_DE.index)))
    for ct_pair in list(df_Exrp_LR_in_celltype_pairs_downreg_DE.columns):
        #print(ct_pair)
        
        ct_A = ct_pair.split('---')[0]
        ct_B = ct_pair.split('---')[1]
        
        partner_A_genes = Int2Gene[interaction]['partner_a']
        partner_B_genes = Int2Gene[interaction]['partner_b']
        
        # are partner_A genes DE in celltype_A OR are partner_B genes DE in celltype_B?
        are_any_DE = all(elem in is_DE_downreg[ct_A] for elem in partner_A_genes) | all(elem in is_DE_downreg[ct_B] for elem in partner_B_genes)  
        
        if are_any_DE & (df_Exrp_LR_in_celltype_pairs_downreg.loc[interaction, ct_pair] == 1):
            df_Exrp_LR_in_celltype_pairs_downreg_DE.loc[interaction, ct_pair] = 1

CPU times: user 1.63 s, sys: 0 ns, total: 1.63 s
Wall time: 1.63 s


In [81]:
df_Exrp_LR_in_celltype_pairs_upreg_DE

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---B_cells_naive,B_cells_memory---B_cells_naive_activated,B_cells_memory---MAIT_cells,B_cells_memory---Macrophages,B_cells_memory---Monocytes_classical,B_cells_memory---Monocytes_intermediate,B_cells_memory---Monocytes_non-classical,B_cells_memory---NK_CD16_bright,B_cells_memory---NK_CD16_bright_activated,...,T8_activated---T8_activated,T8_naive---T8_naive,TCM_CD8+---TCM_CD8+,TEM_CD8+---TEM_CD8+,TMRA_CD8+---TMRA_CD8+,T_gd---T_gd,T_regs---T_regs,cDC1---cDC1,cDC2---cDC2,iNKT_cells---iNKT_cells
PVR_CD96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_CD226,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_TIGIT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NOTCH1_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NOTCH2_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CCR4_CCL17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRB1_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LAIR1_LILRB4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CLEC2B_KLRF1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [82]:
df_Exrp_LR_in_celltype_pairs_downreg_DE

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---B_cells_naive,B_cells_memory---B_cells_naive_activated,B_cells_memory---MAIT_cells,B_cells_memory---Macrophages,B_cells_memory---Monocytes_classical,B_cells_memory---Monocytes_intermediate,B_cells_memory---Monocytes_non-classical,B_cells_memory---NK_CD16_bright,B_cells_memory---NK_CD16_bright_activated,...,T8_activated---T8_activated,T8_naive---T8_naive,TCM_CD8+---TCM_CD8+,TEM_CD8+---TEM_CD8+,TMRA_CD8+---TMRA_CD8+,T_gd---T_gd,T_regs---T_regs,cDC1---cDC1,cDC2---cDC2,iNKT_cells---iNKT_cells
PVR_CD96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_CD226,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_TIGIT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NOTCH1_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NOTCH2_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CCR4_CCL17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRB1_complex,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
LAIR1_LILRB4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CLEC2B_KLRF1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [83]:
# UPREG interactions

# keep interactions expressed in at least one celltype pair
df_Exrp_LR_in_celltype_pairs_upreg_DE = df_Exrp_LR_in_celltype_pairs_upreg_DE.loc[:, (df_Exrp_LR_in_celltype_pairs_upreg_DE != 0).any(axis=0)]
print('shape after filtering interactions')
print(df_Exrp_LR_in_celltype_pairs_upreg_DE.shape, '\n')

# keep celltypepairs with at least one interaction

df_Exrp_LR_in_celltype_pairs_upreg_DE = df_Exrp_LR_in_celltype_pairs_upreg_DE.loc[(df_Exrp_LR_in_celltype_pairs_upreg_DE != 0).any(axis=1),:]
print('shape after filtering cell type pairs')
print(df_Exrp_LR_in_celltype_pairs_upreg_DE.shape, '\n')

shape after filtering interactions
(127, 426) 

shape after filtering cell type pairs
(67, 426) 



In [84]:
# DOWNREG interactions

# keep interactions expressed in at least one celltype pair
df_Exrp_LR_in_celltype_pairs_downreg_DE = df_Exrp_LR_in_celltype_pairs_downreg_DE.loc[:, (df_Exrp_LR_in_celltype_pairs_downreg_DE != 0).any(axis=0)]
print('shape after filtering interactions')
print(df_Exrp_LR_in_celltype_pairs_downreg_DE.shape, '\n')

# keep celltypepairs with at least one interaction

df_Exrp_LR_in_celltype_pairs_downreg_DE = df_Exrp_LR_in_celltype_pairs_downreg_DE.loc[(df_Exrp_LR_in_celltype_pairs_downreg_DE != 0).any(axis=1),:]
print('shape after filtering cell type pairs')
print(df_Exrp_LR_in_celltype_pairs_downreg_DE.shape, '\n')

shape after filtering interactions
(125, 289) 

shape after filtering cell type pairs
(46, 289) 



In [85]:
np.unique(df_Exrp_LR_in_celltype_pairs_upreg_DE.values, return_counts=True)

(array([0., 1.]), array([27147,  1395]))

In [86]:
np.unique(df_Exrp_LR_in_celltype_pairs_downreg_DE.values, return_counts=True)

(array([0., 1.]), array([12491,   803]))

In [87]:
df_Exrp_LR_in_celltype_pairs_upreg_DE

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---Macrophages,B_cells_memory---Monocytes_classical,B_cells_memory---NK_CD16_bright,B_cells_memory---NK_CD56_bright,B_cells_memory---NK_CD56_bright_activated,B_cells_memory---Plasma_cells,B_cells_memory---T4_activated,B_cells_memory---T4_memory,B_cells_memory---T8_activated,...,Monocytes_intermediate---Monocytes_intermediate,NK_CD16_bright---NK_CD16_bright,NK_CD16_bright_activated---NK_CD16_bright_activated,T4_activated---T4_activated,T4_memory---T4_memory,T8_activated---T8_activated,TMRA_CD8+---TMRA_CD8+,T_gd---T_gd,T_regs---T_regs,cDC1---cDC1
PVR_CD96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_CD226,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_TIGIT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
SIRPA_CD47,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
LGALS9_HAVCR2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CD99_PILRA,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
SPN_SIGLEC1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD52_SIGLEC10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
LTBR_LTB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [88]:
df_Exrp_LR_in_celltype_pairs_downreg_DE

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---Macrophages,B_cells_memory---NK_CD16_bright,B_cells_memory---T4_activated,B_cells_memory---T_regs,B_cells_memory---cDC1,B_cells_memory_activated---B_cells_memory,B_cells_memory_activated---B_cells_naive,B_cells_memory_activated---B_cells_naive_activated,B_cells_memory_activated---MAIT_cells,...,iNKT_cells---cDC1,B_cells_memory_activated---B_cells_memory_activated,MAIT_cells---MAIT_cells,Macrophages---Macrophages,Monocytes_intermediate---Monocytes_intermediate,NK_CD16_bright---NK_CD16_bright,T4_activated---T4_activated,T8_activated---T8_activated,T_regs---T_regs,cDC1---cDC1
SIRPA_CD47,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
PLAUR_integrin_a4b1_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
CD40LG_integrin_a5b1_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TGFB1_TGFBR3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ICAM1_integrin_aMb2_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ICAM1_integrin_aXb2_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CXCR3_CXCL9,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
DPP4_CXCL9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD8_receptor_LCK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD74_APP,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Save results

In [89]:
save_path = '/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/'

In [90]:
# So DE is our output matrix
# Filter it accordingly for visualization
df_Exrp_LR_in_celltype_pairs_upreg_DE.to_csv(save_path + '20210324_cellphone_interactions_upreg_in_Freiburg_Ib_CVID_validation_cohort_no_logFC_cutoff.csv')
df_Exrp_LR_in_celltype_pairs_downreg_DE.to_csv(save_path + '20210324_cellphone_interactions_downreg_in_Freiburg_Ib_CVID_validation_no_logFC_cutoff.csv')

## Save results in a more readable format

Gene by gene breakdown with added DEG stats

### Upreg interactions

In [91]:
faulty_index_count = 0

vec2_append_upreg = {}

# row count
curr_count = 0

for interaction in df_Exrp_LR_in_celltype_pairs_upreg_DE.index:
    
    print(interaction, list(df_Exrp_LR_in_celltype_pairs_upreg_DE.index).index(interaction) + 1, 
          'out of', len(df_Exrp_LR_in_celltype_pairs_upreg_DE.index))
    # current row
    curr_table = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_upreg_DE.loc[interaction])
    curr_table = curr_table[curr_table[interaction] > 0]
    
    for celltype_pair in list(curr_table.index):
        
        # row by row
        vec2_append_upreg[str(curr_count)] = {}
        
        vec2_append_upreg[str(curr_count)]['interaction'] = interaction
        
        #print(celltype_pair)
        
        # getting genes, these are lists of length 1 for simple interactions and > 1 for complexes
        curr_partner_A_genes = Int2Gene[interaction]['partner_a']
        curr_partner_B_genes = Int2Gene[interaction]['partner_b']
        
        #print('curr partner A genes', curr_partner_A_genes, 'len:', len(curr_partner_A_genes))
        #print('curr partner B genes', curr_partner_B_genes, 'len:', len(curr_partner_B_genes))
        
        vec2_append_upreg[str(curr_count)]['partner_A_genes'] = curr_partner_A_genes
        vec2_append_upreg[str(curr_count)]['partner_B_genes'] = curr_partner_B_genes
        
        curr_celltype_A = celltype_pair.split('---')[0]
        curr_celltype_B = celltype_pair.split('---')[1]
        
        vec2_append_upreg[str(curr_count)]['celltype_A'] = curr_celltype_A
        vec2_append_upreg[str(curr_count)]['celltype_B'] = curr_celltype_B
        
        
        # are all partner_A genes DE in celltype_A and are all partner_B genes DE in celltype_B?
        # these DE table subsets have been filtered already according to cutoffs declared in the beginning of the notebook
        curr_celltype_A_DE_table_subset = DE_df_upreg[DE_df_upreg['cluster'] == curr_celltype_A]
        curr_celltype_A_DE_table_subset.set_index('Gene', inplace=True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_A_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_A])
        
        curr_celltype_B_DE_table_subset = DE_df_upreg[DE_df_upreg['cluster'] == curr_celltype_B]
        curr_celltype_B_DE_table_subset.set_index('Gene', inplace=True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_B_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_B])

        
        # if partner A is DE, add stats
        if all(elem in list(curr_celltype_A_DE_table_subset.index) for elem in curr_partner_A_genes):
            vec2_append_upreg[str(curr_count)]['is_partner_A_DE'] = True
            vec2_append_upreg[str(curr_count)]['logFC_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'logFC'])
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'adj.P.Val'])
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'percentExpr_cluster'])
            
        else: # if not DE, add 'NA'
            vec2_append_upreg[str(curr_count)]['is_partner_A_DE'] = False
            vec2_append_upreg[str(curr_count)]['logFC_gene_A'] = 'NA'
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_A'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_per_df_full.loc[curr_partner_A_genes,curr_celltype_A])
            
            
            
        # if partner B is DE, add stats
        if all(elem in list(curr_celltype_B_DE_table_subset.index) for elem in curr_partner_B_genes):
            vec2_append_upreg[str(curr_count)]['is_partner_B_DE'] = True
            vec2_append_upreg[str(curr_count)]['logFC_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'logFC'])
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'adj.P.Val'])
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'percentExpr_cluster'])
        else: # if not DE, add 'NA'
            vec2_append_upreg[str(curr_count)]['is_partner_B_DE'] = False
            vec2_append_upreg[str(curr_count)]['logFC_gene_B'] = 'NA'
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_B'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_per_df_full.loc[curr_partner_B_genes,curr_celltype_B])
            
        curr_count += 1
    
    

PVR_CD96 1 out of 67
PVR_CD226 2 out of 67
PVR_TIGIT 3 out of 67
SIRPA_CD47 4 out of 67
LGALS9_HAVCR2 5 out of 67
PLAUR_integrin_a4b1_complex 6 out of 67
ICAM1_integrin_aMb2_complex 7 out of 67
ICAM1_integrin_aXb2_complex 8 out of 67
CD8_receptor_LCK 9 out of 67
CD94:NKG2A_HLA-E 10 out of 67
CD94:NKG2C_HLA-E 11 out of 67
CD94:NKG2E_HLA-E 12 out of 67
CD74_APP 13 out of 67
ICAM1_SPN 14 out of 67
ICAM1_ITGAL 15 out of 67
ICAM1_integrin_aLb2_complex 16 out of 67
ICAM2_integrin_aLb2_complex 17 out of 67
ICAM3_integrin_aLb2_complex 18 out of 67
F11R_integrin_aLb2_complex 19 out of 67
NRP2_SEMA3C 20 out of 67
HLA-A_KIR3DL1 21 out of 67
HLA-F_KIR3DL1 22 out of 67
HLA-F_KIR3DL2 23 out of 67
HLA-B_KIR3DL2 24 out of 67
HLA-F_LILRB2 25 out of 67
HLA-F_LILRB1 26 out of 67
CCL4_CCR5 27 out of 67
CCL3_CCR5 28 out of 67
CCL5_CCR5 29 out of 67
KLRB1_CLEC2D 30 out of 67
TNF_TNFRSF1A 31 out of 67
TNF_TNFRSF1B 32 out of 67
LTA_TNFRSF1B 33 out of 67
CCR6_CCL20 34 out of 67
TNFRSF13B_TNFSF13 35 out of 67
C

In [92]:
# outlining the final table format
df_output_upreg = pd.DataFrame(columns = ['interaction',
                                     'partner_A_genes',
                                     'partner_B_genes',
                                     'celltype_A',
                                     'celltype_B',
                                     'is_partner_A_DE',
                                     'logFC_gene_A',
                                     'adj_pval_gene_A',
                                     'percent_expr_gene_A',
                                     'is_partner_B_DE',
                                     'logFC_gene_B',
                                     'adj_pval_gene_B',
                                     'percent_expr_gene_B'],
                              index = list(vec2_append_upreg.keys())
                            )
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,,,,,,,,,,,,,
1,,,,,,,,,,,,,
2,,,,,,,,,,,,,
3,,,,,,,,,,,,,
4,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1390,,,,,,,,,,,,,
1391,,,,,,,,,,,,,
1392,,,,,,,,,,,,,
1393,,,,,,,,,,,,,


In [93]:
list(df_output_upreg.columns) == list(vec2_append_upreg['0'].keys())

True

In [94]:
len(vec2_append_upreg.keys())

1395

In [95]:
vec2_append_upreg['0'].keys()

dict_keys(['interaction', 'partner_A_genes', 'partner_B_genes', 'celltype_A', 'celltype_B', 'is_partner_A_DE', 'logFC_gene_A', 'adj_pval_gene_A', 'percent_expr_gene_A', 'is_partner_B_DE', 'logFC_gene_B', 'adj_pval_gene_B', 'percent_expr_gene_B'])

In [96]:
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,,,,,,,,,,,,,
1,,,,,,,,,,,,,
2,,,,,,,,,,,,,
3,,,,,,,,,,,,,
4,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1390,,,,,,,,,,,,,
1391,,,,,,,,,,,,,
1392,,,,,,,,,,,,,
1393,,,,,,,,,,,,,


In [97]:
%%time

for i in list(vec2_append_upreg.keys()):
    #print(i)
    curr_keys = list(vec2_append_upreg[i].keys())
    for col in curr_keys:
        df_output_upreg.loc[i,col] = vec2_append_upreg[i][col]

CPU times: user 1.57 s, sys: 0 ns, total: 1.57 s
Wall time: 1.57 s


In [98]:
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,PVR_CD96,[PVR],[CD96],cDC2,B_cells_memory_activated,False,,,[0.11572052401746726],True,[0.0810261029411074],[1.8377473841979697e-12],[0.190452]
1,PVR_CD226,[PVR],[CD226],cDC2,T4_activated,False,,,[0.11572052401746726],True,[0.0402476771428872],[0.0005443527049235],[0.196709]
2,PVR_CD226,[PVR],[CD226],cDC2,T8_activated,False,,,[0.11572052401746726],True,[0.06679185630153299],[0.0034127445906409998],[0.199569]
3,PVR_TIGIT,[PVR],[TIGIT],cDC2,NK_CD16_bright,False,,,[0.11572052401746726],True,[0.166463829424932],[2.69773954043196e-09],[0.17574800000000002]
4,PVR_TIGIT,[PVR],[TIGIT],cDC2,T4_activated,False,,,[0.11572052401746726],True,[0.0627912489029173],[7.76358275034236e-07],[0.174771]
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1390,CLEC2B_KLRF1,[CLEC2B],[KLRF1],B_cells_memory_activated,NK_CD56_bright,True,[0.0865666009888044],[1.98406169480595e-07],[0.370352],False,,,[0.28434504792332266]
1391,CLEC2B_KLRF1,[CLEC2B],[KLRF1],Monocytes_non-classical,NK_CD16_bright,True,[0.121015724365579],[0.0484646875109062],[0.41031899999999993],False,,,[0.28537549407114626]
1392,CLEC2B_KLRF1,[CLEC2B],[KLRF1],Monocytes_non-classical,NK_CD56_bright,True,[0.121015724365579],[0.0484646875109062],[0.41031899999999993],False,,,[0.28434504792332266]
1393,CLEC2B_KLRF1,[CLEC2B],[KLRF1],T8_activated,NK_CD16_bright,True,[0.049941405943636996],[0.0383848076124716],[0.130653],False,,,[0.28537549407114626]


In [99]:
# getting rid of the square parentheses [] in all the values

cols2correct = ['partner_A_genes', 'partner_B_genes', 'logFC_gene_A', 'adj_pval_gene_A',
       'percent_expr_gene_A', 'logFC_gene_B',
       'adj_pval_gene_B', 'percent_expr_gene_B']

for row in list(df_output_upreg.index):
    #print('row', row)
    for col in cols2correct:
        #print('column', col)
        curr_value = df_output_upreg.loc[row, col] # with []
        #print(curr_value)
        if (curr_value != 'NA') & (len(curr_value) == 1): # ignoring NAs and lists of length > 1 - complex genes that is
        #if not isinstance(curr_value, list) & (curr_value != 'NA'): # ignoring NAs and lists of length > 1 - complex genes that is
            df_output_upreg.loc[row, col] = curr_value[0] # this just get the element - string if a gene, numerical value if it's a stat
            

In [100]:
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,PVR_CD96,PVR,CD96,cDC2,B_cells_memory_activated,False,,,0.115721,True,0.0810261,1.83775e-12,0.190452
1,PVR_CD226,PVR,CD226,cDC2,T4_activated,False,,,0.115721,True,0.0402477,0.000544353,0.196709
2,PVR_CD226,PVR,CD226,cDC2,T8_activated,False,,,0.115721,True,0.0667919,0.00341274,0.199569
3,PVR_TIGIT,PVR,TIGIT,cDC2,NK_CD16_bright,False,,,0.115721,True,0.166464,2.69774e-09,0.175748
4,PVR_TIGIT,PVR,TIGIT,cDC2,T4_activated,False,,,0.115721,True,0.0627912,7.76358e-07,0.174771
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1390,CLEC2B_KLRF1,CLEC2B,KLRF1,B_cells_memory_activated,NK_CD56_bright,True,0.0865666,1.98406e-07,0.370352,False,,,0.284345
1391,CLEC2B_KLRF1,CLEC2B,KLRF1,Monocytes_non-classical,NK_CD16_bright,True,0.121016,0.0484647,0.410319,False,,,0.285375
1392,CLEC2B_KLRF1,CLEC2B,KLRF1,Monocytes_non-classical,NK_CD56_bright,True,0.121016,0.0484647,0.410319,False,,,0.284345
1393,CLEC2B_KLRF1,CLEC2B,KLRF1,T8_activated,NK_CD16_bright,True,0.0499414,0.0383848,0.130653,False,,,0.285375


In [101]:
df_output_upreg.columns

Index(['interaction', 'partner_A_genes', 'partner_B_genes', 'celltype_A',
       'celltype_B', 'is_partner_A_DE', 'logFC_gene_A', 'adj_pval_gene_A',
       'percent_expr_gene_A', 'is_partner_B_DE', 'logFC_gene_B',
       'adj_pval_gene_B', 'percent_expr_gene_B'],
      dtype='object')

### Splitting tables into 2 tables: simple interactions and complex interactions, latter being deconvoluted into pseudo-interactions for each subunit of a complex

In [102]:
# getting indices of complex interactions
complex_interaction_rows_upreg = []

for n_row in list(df_output_upreg.index):
    #print('row', n_row)
    
    curr_partner_A_genes = df_output_upreg.loc[n_row, 'partner_A_genes']
    curr_partner_B_genes = df_output_upreg.loc[n_row, 'partner_B_genes']
    
    if isinstance(curr_partner_A_genes, list) or isinstance(curr_partner_B_genes, list): # if partner A or B is a complex
        complex_interaction_rows_upreg.append(n_row)
        

In [103]:
len(complex_interaction_rows_upreg)

255

In [104]:
np.unique(complex_interaction_rows_upreg, return_counts=True)

(array(['100', '101', '102', '103', '104', '105', '106', '107', '108',
        '109', '110', '111', '112', '113', '114', '115', '116', '117',
        '118', '119', '120', '121', '122', '123', '124', '125', '126',
        '127', '128', '129', '130', '131', '132', '133', '134', '135',
        '136', '137', '138', '139', '140', '141', '142', '143', '144',
        '145', '146', '147', '148', '149', '150', '151', '152', '153',
        '154', '155', '156', '157', '158', '159', '160', '161', '162',
        '163', '164', '165', '166', '167', '168', '169', '170', '171',
        '172', '173', '174', '175', '176', '177', '371', '372', '373',
        '374', '375', '376', '377', '378', '379', '380', '381', '382',
        '383', '384', '385', '386', '387', '388', '389', '390', '391',
        '392', '393', '394', '395', '396', '397', '398', '399', '400',
        '401', '402', '403', '404', '405', '406', '407', '408', '409',
        '410', '411', '412', '413', '414', '415', '416', '417', '418',
      

In [105]:
# splitting simple and complex interactions into 2 separate tables
df_output_upreg_complex = df_output_upreg.loc[complex_interaction_rows_upreg,:]
df_output_upreg_simple = df_output_upreg.drop(complex_interaction_rows_upreg, axis=0)

In [106]:
df_output_upreg_simple.shape

(1140, 13)

In [107]:
df_output_upreg_complex.shape

(255, 13)

In [108]:
df_output_upreg.shape

(1395, 13)

In [109]:
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,PVR_CD96,PVR,CD96,cDC2,B_cells_memory_activated,False,,,0.115721,True,0.0810261,1.83775e-12,0.190452
1,PVR_CD226,PVR,CD226,cDC2,T4_activated,False,,,0.115721,True,0.0402477,0.000544353,0.196709
2,PVR_CD226,PVR,CD226,cDC2,T8_activated,False,,,0.115721,True,0.0667919,0.00341274,0.199569
3,PVR_TIGIT,PVR,TIGIT,cDC2,NK_CD16_bright,False,,,0.115721,True,0.166464,2.69774e-09,0.175748
4,PVR_TIGIT,PVR,TIGIT,cDC2,T4_activated,False,,,0.115721,True,0.0627912,7.76358e-07,0.174771
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1390,CLEC2B_KLRF1,CLEC2B,KLRF1,B_cells_memory_activated,NK_CD56_bright,True,0.0865666,1.98406e-07,0.370352,False,,,0.284345
1391,CLEC2B_KLRF1,CLEC2B,KLRF1,Monocytes_non-classical,NK_CD16_bright,True,0.121016,0.0484647,0.410319,False,,,0.285375
1392,CLEC2B_KLRF1,CLEC2B,KLRF1,Monocytes_non-classical,NK_CD56_bright,True,0.121016,0.0484647,0.410319,False,,,0.284345
1393,CLEC2B_KLRF1,CLEC2B,KLRF1,T8_activated,NK_CD16_bright,True,0.0499414,0.0383848,0.130653,False,,,0.285375


In [110]:
df_output_upreg_complex

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
51,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Macrophages,B_cells_memory,True,0.151074,0.000301322,0.784717,False,,,"[0.18324607329842926, 0.3507853403141361]"
52,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Macrophages,B_cells_memory_activated,True,0.151074,0.000301322,0.784717,False,,,"[0.202041289723962, 0.4040825794479239]"
53,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Macrophages,MAIT_cells,True,0.151074,0.000301322,0.784717,False,,,"[0.10526315789473684, 0.22697368421052636]"
54,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Macrophages,Monocytes_classical,True,0.151074,0.000301322,0.784717,False,,,"[0.3378412221387428, 0.297771099423992]"
55,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Macrophages,Monocytes_intermediate,True,0.151074,0.000301322,0.784717,False,,,"[0.22392638036809814, 0.13496932515337426]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
494,F11R_integrin_aLb2_complex,F11R,"[ITGB2, ITGAL]",Precursor_cells,T8_activated,False,,,0.117825,True,"[0.0797768090332497, 0.0659150111200208]","[0.0162202521743839, 0.0134643416027286]","[0.32448, 0.222541]"
495,F11R_integrin_aLb2_complex,F11R,"[ITGB2, ITGAL]",cDC1,NK_CD16_bright,False,,,0.124752,True,"[0.183384801684276, 0.103222248103342]","[7.379929642220871e-05, 0.0078961490661264]","[0.385265, 0.192632]"
496,F11R_integrin_aLb2_complex,F11R,"[ITGB2, ITGAL]",cDC1,T8_activated,False,,,0.124752,True,"[0.0797768090332497, 0.0659150111200208]","[0.0162202521743839, 0.0134643416027286]","[0.32448, 0.222541]"
497,F11R_integrin_aLb2_complex,F11R,"[ITGB2, ITGAL]",cDC2,NK_CD16_bright,False,,,0.20524,True,"[0.183384801684276, 0.103222248103342]","[7.379929642220871e-05, 0.0078961490661264]","[0.385265, 0.192632]"


In [111]:
# checking whether any complexes interact with other complexes and how many subunits each complex contains here

n_subunits_upreg = []

for n_row in list(df_output_upreg.index):
    #print('row', n_row)
    
    curr_partner_A_genes = df_output_upreg.loc[n_row, 'partner_A_genes']
    curr_partner_B_genes = df_output_upreg.loc[n_row, 'partner_B_genes']
    
    if isinstance(curr_partner_A_genes, list) and isinstance(curr_partner_B_genes, list): # if partner A AND B is a complex
        print('row', n_row)
        print('both are complexes')
        
    if isinstance(curr_partner_A_genes, list): # if partner A is a complex
        print('row', n_row)
        print('curr_partner_A_genes',curr_partner_A_genes)
        print('partner A is a complex, len is:', len(curr_partner_A_genes))
        n_subunits_upreg.append(len(curr_partner_A_genes))
        
    if isinstance(curr_partner_B_genes, list): # if partner B is a complex
        print('row', n_row)
        print('curr_partner_B_genes',curr_partner_B_genes)
        print('partner B is a complex, len is:', len(curr_partner_B_genes))
        n_subunits_upreg.append(len(curr_partner_B_genes))

row 51
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 52
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 53
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 54
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 55
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 56
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 57
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 58
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 59
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 60
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 61
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 62
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 63
curr_partner_B_genes 

In [112]:
np.unique(n_subunits_upreg, return_counts=True)

(array([2]), array([255]))

#### So 1 more scenario to ignore: there are no interactions of a complex with a complex
#### And max complex size is 2 subunits here

In [113]:
df_output_upreg_complex

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
51,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Macrophages,B_cells_memory,True,0.151074,0.000301322,0.784717,False,,,"[0.18324607329842926, 0.3507853403141361]"
52,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Macrophages,B_cells_memory_activated,True,0.151074,0.000301322,0.784717,False,,,"[0.202041289723962, 0.4040825794479239]"
53,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Macrophages,MAIT_cells,True,0.151074,0.000301322,0.784717,False,,,"[0.10526315789473684, 0.22697368421052636]"
54,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Macrophages,Monocytes_classical,True,0.151074,0.000301322,0.784717,False,,,"[0.3378412221387428, 0.297771099423992]"
55,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Macrophages,Monocytes_intermediate,True,0.151074,0.000301322,0.784717,False,,,"[0.22392638036809814, 0.13496932515337426]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
494,F11R_integrin_aLb2_complex,F11R,"[ITGB2, ITGAL]",Precursor_cells,T8_activated,False,,,0.117825,True,"[0.0797768090332497, 0.0659150111200208]","[0.0162202521743839, 0.0134643416027286]","[0.32448, 0.222541]"
495,F11R_integrin_aLb2_complex,F11R,"[ITGB2, ITGAL]",cDC1,NK_CD16_bright,False,,,0.124752,True,"[0.183384801684276, 0.103222248103342]","[7.379929642220871e-05, 0.0078961490661264]","[0.385265, 0.192632]"
496,F11R_integrin_aLb2_complex,F11R,"[ITGB2, ITGAL]",cDC1,T8_activated,False,,,0.124752,True,"[0.0797768090332497, 0.0659150111200208]","[0.0162202521743839, 0.0134643416027286]","[0.32448, 0.222541]"
497,F11R_integrin_aLb2_complex,F11R,"[ITGB2, ITGAL]",cDC2,NK_CD16_bright,False,,,0.20524,True,"[0.183384801684276, 0.103222248103342]","[7.379929642220871e-05, 0.0078961490661264]","[0.385265, 0.192632]"


In [114]:
# Duplicating the table and then choosing only 0th or 1st or 2nd values for the complexes
df_output_upreg_complex_member_1 = df_output_upreg_complex.copy()
df_output_upreg_complex_member_2 = df_output_upreg_complex.copy()
#df_output_upreg_complex_member_3 = df_output_upreg_complex.copy()

In [115]:
# splitting complex interaction entries by subunits / members
# for any values in these tables, if it's a list, save the 0th for df_output_upreg_complex_member_1 and 1st for df_output_upreg_complex_member_2

# which rows contain interaction with a 3-subunit complex? to then subset df_output_downreg_complex_member_3
#subunit_3_rows = []

for n_row in list(df_output_upreg_complex.index):
    #print('outside for loop, row', n_row)
    
    for col in df_output_upreg_complex.columns:
        #print('col', col)
        if isinstance(df_output_upreg_complex.loc[n_row, col], list):
            df_output_upreg_complex_member_1.loc[n_row, col] = df_output_upreg_complex.loc[n_row, col][0]
            df_output_upreg_complex_member_2.loc[n_row, col] = df_output_upreg_complex.loc[n_row, col][1]
            
            # additionally, if there are 3 subunits, separate into 3 entries
            #if len(df_output_upreg_complex.loc[n_row, col]) == 3:
                #df_output_upreg_complex_member_3.loc[n_row, col] = df_output_upreg_complex.loc[n_row, col][2]
                #print('3-subunit complex')
                #print('row', n_row)
                #print('adding to subunit_3_rows')
                #subunit_3_rows.append(n_row)


In [116]:
# making indices uniques for concatenantion later
df_output_upreg_complex_member_1.index = [idx + '_member_1' for idx in df_output_upreg_complex_member_1.index]
df_output_upreg_complex_member_2.index = [idx + '_member_2' for idx in df_output_upreg_complex_member_2.index]
#df_output_upreg_complex_member_3.index = [idx + '_member_3' for idx in df_output_upreg_complex_member_3.index]

In [117]:
# getting all indices
idx_concat = list(df_output_upreg_complex_member_1.index) + list(df_output_upreg_complex_member_2.index) #+ list(df_output_upreg_complex_member_3.index)

# sorting by original index number, so that the order is: member 1, member 2 and (where applicable) member 3
idx_concat.sort(key = lambda x: x.split('_')[0])
idx_concat

['100_member_1',
 '100_member_2',
 '101_member_1',
 '101_member_2',
 '102_member_1',
 '102_member_2',
 '103_member_1',
 '103_member_2',
 '104_member_1',
 '104_member_2',
 '105_member_1',
 '105_member_2',
 '106_member_1',
 '106_member_2',
 '107_member_1',
 '107_member_2',
 '108_member_1',
 '108_member_2',
 '109_member_1',
 '109_member_2',
 '110_member_1',
 '110_member_2',
 '111_member_1',
 '111_member_2',
 '112_member_1',
 '112_member_2',
 '113_member_1',
 '113_member_2',
 '114_member_1',
 '114_member_2',
 '115_member_1',
 '115_member_2',
 '116_member_1',
 '116_member_2',
 '117_member_1',
 '117_member_2',
 '118_member_1',
 '118_member_2',
 '119_member_1',
 '119_member_2',
 '120_member_1',
 '120_member_2',
 '121_member_1',
 '121_member_2',
 '122_member_1',
 '122_member_2',
 '123_member_1',
 '123_member_2',
 '124_member_1',
 '124_member_2',
 '125_member_1',
 '125_member_2',
 '126_member_1',
 '126_member_2',
 '127_member_1',
 '127_member_2',
 '128_member_1',
 '128_member_2',
 '129_member_1

In [118]:
df_output_upreg_complex_deconv = pd.concat([df_output_upreg_complex_member_1, df_output_upreg_complex_member_2])

In [120]:
df_output_upreg_complex_deconv

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
51_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Macrophages,B_cells_memory,True,0.151074,0.000301322,0.784717,False,,,0.183246
52_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Macrophages,B_cells_memory_activated,True,0.151074,0.000301322,0.784717,False,,,0.202041
53_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Macrophages,MAIT_cells,True,0.151074,0.000301322,0.784717,False,,,0.105263
54_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Macrophages,Monocytes_classical,True,0.151074,0.000301322,0.784717,False,,,0.337841
55_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Macrophages,Monocytes_intermediate,True,0.151074,0.000301322,0.784717,False,,,0.223926
...,...,...,...,...,...,...,...,...,...,...,...,...,...
494_member_2,F11R_integrin_aLb2_complex,F11R,ITGAL,Precursor_cells,T8_activated,False,,,0.117825,True,0.065915,0.0134643,0.222541
495_member_2,F11R_integrin_aLb2_complex,F11R,ITGAL,cDC1,NK_CD16_bright,False,,,0.124752,True,0.103222,0.00789615,0.192632
496_member_2,F11R_integrin_aLb2_complex,F11R,ITGAL,cDC1,T8_activated,False,,,0.124752,True,0.065915,0.0134643,0.222541
497_member_2,F11R_integrin_aLb2_complex,F11R,ITGAL,cDC2,NK_CD16_bright,False,,,0.20524,True,0.103222,0.00789615,0.192632


In [121]:
# organising entries so that member 1 entry is followed by member 2 entry and then member 3 entry if applicable
df_output_upreg_complex_deconv = df_output_upreg_complex_deconv.loc[idx_concat,:]

In [122]:
df_output_upreg_complex_deconv

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
100_member_1,ICAM1_integrin_aMb2_complex,ICAM1,ITGB2,Macrophages,Monocytes_non-classical,True,0.100439,0.00146904,0.495959,False,,,0.855335
100_member_2,ICAM1_integrin_aMb2_complex,ICAM1,ITGAM,Macrophages,Monocytes_non-classical,True,0.100439,0.00146904,0.495959,False,,,0.10217
101_member_1,ICAM1_integrin_aMb2_complex,ICAM1,ITGB2,Macrophages,cDC1,True,0.100439,0.00146904,0.495959,False,,,0.900794
101_member_2,ICAM1_integrin_aMb2_complex,ICAM1,ITGAM,Macrophages,cDC1,True,0.100439,0.00146904,0.495959,False,,,0.173611
102_member_1,ICAM1_integrin_aMb2_complex,ICAM1,ITGB2,Macrophages,Macrophages,True,0.100439,0.00146904,0.495959,False,,,0.856736
...,...,...,...,...,...,...,...,...,...,...,...,...,...
97_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,Macrophages,Macrophages,True,0.151074,0.000301322,0.784717,False,,,0.21454
98_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,cDC1,cDC1,True,0.133012,0.0003918,0.737173,False,,,0.345734
98_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,cDC1,cDC1,True,0.133012,0.0003918,0.737173,False,,,0.133433
99_member_1,ICAM1_integrin_aMb2_complex,ICAM1,ITGB2,Macrophages,Monocytes_classical,True,0.100439,0.00146904,0.495959,False,,,0.788881


In [123]:
# saving these deconvoluted complex interactions
df_output_upreg_complex_deconv.to_csv(save_path + '20210324_cellphone_interactions_table_with_gene_stats_upreg_in_Freiburg_Ib_CVID_validation_cohort_no_logFC_cutoff_complexes_deconv_into_pseduinteractions.csv')

In [124]:
# saving the simple interactions table
df_output_upreg_simple.to_csv(save_path + '20210324_cellphone_interactions_table_with_gene_stats_upreg_in_Freiburg_Ib_CVID_validation_cohort_no_logFC_cutoff_simple_interactions.csv')

In [125]:
save_path

'/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/'

In [104]:
#df_output_upreg.to_csv(save_path + '20210318_cellphone_interactions_table_with_gene_stats_upreg_in_CVID_validation_cohort_no_logFC_cutoff.csv')

### Downreg interactions

In [126]:
faulty_index_count = 0

vec2_append_downreg = {}

# row count
curr_count = 0

for interaction in df_Exrp_LR_in_celltype_pairs_downreg_DE.index:
    
    print(interaction, list(df_Exrp_LR_in_celltype_pairs_downreg_DE.index).index(interaction) + 1, 
          'out of', len(df_Exrp_LR_in_celltype_pairs_downreg_DE.index))
    # current row
    curr_table = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_downreg_DE.loc[interaction])
    curr_table = curr_table[curr_table[interaction] > 0]
    
    for celltype_pair in list(curr_table.index):
        
        #print('row', curr_count)
        
        #print('celltype pair', celltype_pair)
        
        # row by row
        vec2_append_downreg[str(curr_count)] = {}
        
        vec2_append_downreg[str(curr_count)]['interaction'] = interaction
        
        #print(celltype_pair)
        
        # getting genes, these are lists of length 1 for simple interactions and > 1 for complexes
        curr_partner_A_genes = Int2Gene[interaction]['partner_a']
        curr_partner_B_genes = Int2Gene[interaction]['partner_b']
        
        #print('curr partner A genes', curr_partner_A_genes, 'len:', len(curr_partner_A_genes))
        #print('curr partner B genes', curr_partner_B_genes, 'len:', len(curr_partner_B_genes))
        
        vec2_append_downreg[str(curr_count)]['partner_A_genes'] = curr_partner_A_genes
        vec2_append_downreg[str(curr_count)]['partner_B_genes'] = curr_partner_B_genes
        
        curr_celltype_A = celltype_pair.split('---')[0]
        curr_celltype_B = celltype_pair.split('---')[1]
        
        #print('curr partner A cell type', curr_celltype_A)
        #print('curr partner B cell type', curr_celltype_B)
        
        vec2_append_downreg[str(curr_count)]['celltype_A'] = curr_celltype_A
        vec2_append_downreg[str(curr_count)]['celltype_B'] = curr_celltype_B
        
        
        # are all partner_A genes DE in celltype_A and are all partner_B genes DE in celltype_B?
        # these DE table subsets have been filtered already according to cutoffs declared in the beginning of the notebook
        curr_celltype_A_DE_table_subset = DE_df_downreg[DE_df_downreg['cluster'] == curr_celltype_A]
        curr_celltype_A_DE_table_subset.set_index('Gene', inplace=True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_A_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_A])
        
        curr_celltype_B_DE_table_subset = DE_df_downreg[DE_df_downreg['cluster'] == curr_celltype_B]
        curr_celltype_B_DE_table_subset.set_index('Gene', inplace=True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_B_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_B])

        # if partner A is DE, add stats
        if all(elem in list(curr_celltype_A_DE_table_subset.index) for elem in curr_partner_A_genes):
            #print('curr partner A is DE')
            vec2_append_downreg[str(curr_count)]['is_partner_A_DE'] = True
            vec2_append_downreg[str(curr_count)]['logFC_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'logFC'])
            vec2_append_downreg[str(curr_count)]['adj_pval_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'adj.P.Val'])
            vec2_append_downreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'percentExpr_cluster'])
            
        else: # if not DE, add 'NA'
            #print('curr partner A is NOT DE')
            vec2_append_downreg[str(curr_count)]['is_partner_A_DE'] = False
            vec2_append_downreg[str(curr_count)]['logFC_gene_A'] = 'NA'
            vec2_append_downreg[str(curr_count)]['adj_pval_gene_A'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_downreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_per_df_full.loc[curr_partner_A_genes,curr_celltype_A])
            
            
        # if partner B is DE, add stats
        if all(elem in list(curr_celltype_B_DE_table_subset.index) for elem in curr_partner_B_genes):
            #print('curr partner B is DE')
            vec2_append_downreg[str(curr_count)]['is_partner_B_DE'] = True
            vec2_append_downreg[str(curr_count)]['logFC_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'logFC'])
            vec2_append_downreg[str(curr_count)]['adj_pval_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'adj.P.Val'])
            vec2_append_downreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'percentExpr_cluster'])
        else: # if not DE, add 'NA'
            #print('curr partner B is NOT DE')
            vec2_append_downreg[str(curr_count)]['is_partner_B_DE'] = False
            vec2_append_downreg[str(curr_count)]['logFC_gene_B'] = 'NA'
            vec2_append_downreg[str(curr_count)]['adj_pval_gene_B'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_downreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_per_df_full.loc[curr_partner_B_genes,curr_celltype_B])
            
        curr_count += 1
    
    

SIRPA_CD47 1 out of 46
PLAUR_integrin_a4b1_complex 2 out of 46
CD40LG_integrin_a5b1_complex 3 out of 46
TGFB1_TGFBR3 4 out of 46
ICAM1_integrin_aMb2_complex 5 out of 46
ICAM1_integrin_aXb2_complex 6 out of 46
CXCR3_CXCL9 7 out of 46
DPP4_CXCL9 8 out of 46
CD8_receptor_LCK 9 out of 46
CD74_APP 10 out of 46
ICAM1_SPN 11 out of 46
ICAM1_ITGAL 12 out of 46
ICAM1_integrin_aLb2_complex 13 out of 46
NRP1_VEGFB 14 out of 46
GMCSFR_CSF2 15 out of 46
HLA-A_KIR3DL1 16 out of 46
HLA-F_LILRB2 17 out of 46
CCL4_CCR5 18 out of 46
CCL3_CCR5 19 out of 46
KLRB1_CLEC2D 20 out of 46
TNF_TNFRSF1A 21 out of 46
LTA_TNFRSF1A 22 out of 46
TNF_TNFRSF1B 23 out of 46
LTA_TNFRSF1B 24 out of 46
CCR7_CCL19 25 out of 46
CD40_CD40LG 26 out of 46
IL21_receptor_IL21 27 out of 46
IL9_receptor_IL9 28 out of 46
CCL3_CCR1 29 out of 46
CCL22_CCR4 30 out of 46
CCL22_DPP4 31 out of 46
CXCL10_DPP4 32 out of 46
CXCL10_CXCR3 33 out of 46
CD72_SEMA4D 34 out of 46
LTA_TNFRSF14 35 out of 46
TGFB1_TGFbeta_receptor1 36 out of 46
IFNG_

In [127]:
# outlining the final table format
df_output_downreg = pd.DataFrame(columns = ['interaction',
                                     'partner_A_genes',
                                     'partner_B_genes',
                                     'celltype_A',
                                     'celltype_B',
                                     'is_partner_A_DE',
                                     'logFC_gene_A',
                                     'adj_pval_gene_A',
                                     'percent_expr_gene_A',
                                     'is_partner_B_DE',
                                     'logFC_gene_B',
                                     'adj_pval_gene_B',
                                     'percent_expr_gene_B'],
                              index = list(vec2_append_downreg.keys())
                            )
df_output_downreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,,,,,,,,,,,,,
1,,,,,,,,,,,,,
2,,,,,,,,,,,,,
3,,,,,,,,,,,,,
4,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
798,,,,,,,,,,,,,
799,,,,,,,,,,,,,
800,,,,,,,,,,,,,
801,,,,,,,,,,,,,


In [128]:
list(df_output_downreg.columns) == list(vec2_append_downreg['0'].keys())

True

In [129]:
len(vec2_append_downreg.keys())

803

In [130]:
vec2_append_downreg['0'].keys()

dict_keys(['interaction', 'partner_A_genes', 'partner_B_genes', 'celltype_A', 'celltype_B', 'is_partner_A_DE', 'logFC_gene_A', 'adj_pval_gene_A', 'percent_expr_gene_A', 'is_partner_B_DE', 'logFC_gene_B', 'adj_pval_gene_B', 'percent_expr_gene_B'])

In [131]:
df_output_downreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,,,,,,,,,,,,,
1,,,,,,,,,,,,,
2,,,,,,,,,,,,,
3,,,,,,,,,,,,,
4,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
798,,,,,,,,,,,,,
799,,,,,,,,,,,,,
800,,,,,,,,,,,,,
801,,,,,,,,,,,,,


In [132]:
%%time

for i in list(vec2_append_downreg.keys()):
    #print(i)
    curr_keys = list(vec2_append_downreg[i].keys())
    for col in curr_keys:
        df_output_downreg.loc[i,col] = vec2_append_downreg[i][col]

CPU times: user 893 ms, sys: 39 µs, total: 893 ms
Wall time: 897 ms


In [133]:
df_output_downreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,SIRPA_CD47,[SIRPA],[CD47],Macrophages,cDC1,False,,,[0.5124732715609408],True,[-0.0746250650273631],[0.0349351990503401],[0.471204]
1,SIRPA_CD47,[SIRPA],[CD47],Monocytes_classical,cDC1,False,,,[0.3686451289757075],True,[-0.0746250650273631],[0.0349351990503401],[0.471204]
2,SIRPA_CD47,[SIRPA],[CD47],Monocytes_intermediate,cDC1,False,,,[0.21855828220858892],True,[-0.0746250650273631],[0.0349351990503401],[0.471204]
3,SIRPA_CD47,[SIRPA],[CD47],Monocytes_non-classical,cDC1,False,,,[0.3752260397830018],True,[-0.0746250650273631],[0.0349351990503401],[0.471204]
4,SIRPA_CD47,[SIRPA],[CD47],cDC2,cDC1,False,,,[0.15502183406113534],True,[-0.0746250650273631],[0.0349351990503401],[0.471204]
...,...,...,...,...,...,...,...,...,...,...,...,...,...
798,CLEC2B_KLRF1,[CLEC2B],[KLRF1],TMRA_CD8+,NK_CD16_bright,False,,,[0.3659217877094972],True,[-0.26734731525508104],[9.66843449230409e-13],[0.194935]
799,CLEC2B_KLRF1,[CLEC2B],[KLRF1],T_gd,NK_CD16_bright,False,,,[0.3076923076923077],True,[-0.26734731525508104],[9.66843449230409e-13],[0.194935]
800,CLEC2B_KLRF1,[CLEC2B],[KLRF1],cDC1,NK_CD16_bright,False,,,[0.2566964285714285],True,[-0.26734731525508104],[9.66843449230409e-13],[0.194935]
801,CLEC2B_KLRF1,[CLEC2B],[KLRF1],iNKT_cells,NK_CD16_bright,False,,,[0.11639344262295083],True,[-0.26734731525508104],[9.66843449230409e-13],[0.194935]


In [134]:
cols2correct = ['partner_A_genes', 'partner_B_genes', 'logFC_gene_A', 'adj_pval_gene_A',
       'percent_expr_gene_A', 'logFC_gene_B',
       'adj_pval_gene_B', 'percent_expr_gene_B']

for row in list(df_output_downreg.index):
    #print('row', row)
    for col in cols2correct:
        #print('column', col)
        curr_value = df_output_downreg.loc[row, col] # with []
        #print(curr_value)
        if (curr_value != 'NA') & (len(curr_value) == 1): # ignoring NAs and lists of length > 1 - complex genes that is
        #if not isinstance(curr_value, list) & (curr_value != 'NA'): # ignoring NAs and lists of length > 1 - complex genes that is
            df_output_downreg.loc[row, col] = curr_value[0] # this just get the element - string if a gene, numerical value if it's a stat
            

In [135]:
df_output_downreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,SIRPA_CD47,SIRPA,CD47,Macrophages,cDC1,False,,,0.512473,True,-0.0746251,0.0349352,0.471204
1,SIRPA_CD47,SIRPA,CD47,Monocytes_classical,cDC1,False,,,0.368645,True,-0.0746251,0.0349352,0.471204
2,SIRPA_CD47,SIRPA,CD47,Monocytes_intermediate,cDC1,False,,,0.218558,True,-0.0746251,0.0349352,0.471204
3,SIRPA_CD47,SIRPA,CD47,Monocytes_non-classical,cDC1,False,,,0.375226,True,-0.0746251,0.0349352,0.471204
4,SIRPA_CD47,SIRPA,CD47,cDC2,cDC1,False,,,0.155022,True,-0.0746251,0.0349352,0.471204
...,...,...,...,...,...,...,...,...,...,...,...,...,...
798,CLEC2B_KLRF1,CLEC2B,KLRF1,TMRA_CD8+,NK_CD16_bright,False,,,0.365922,True,-0.267347,9.66843e-13,0.194935
799,CLEC2B_KLRF1,CLEC2B,KLRF1,T_gd,NK_CD16_bright,False,,,0.307692,True,-0.267347,9.66843e-13,0.194935
800,CLEC2B_KLRF1,CLEC2B,KLRF1,cDC1,NK_CD16_bright,False,,,0.256696,True,-0.267347,9.66843e-13,0.194935
801,CLEC2B_KLRF1,CLEC2B,KLRF1,iNKT_cells,NK_CD16_bright,False,,,0.116393,True,-0.267347,9.66843e-13,0.194935


In [136]:
df_output_downreg.columns

Index(['interaction', 'partner_A_genes', 'partner_B_genes', 'celltype_A',
       'celltype_B', 'is_partner_A_DE', 'logFC_gene_A', 'adj_pval_gene_A',
       'percent_expr_gene_A', 'is_partner_B_DE', 'logFC_gene_B',
       'adj_pval_gene_B', 'percent_expr_gene_B'],
      dtype='object')

In [137]:
save_path

'/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/'

In [138]:
#df_output_downreg.to_csv(save_path + '20210318_cellphone_interactions_table_with_gene_stats_downreg_in_CVID_validation_cohort_no_logFC_cutoff.csv')

### Splitting tables into 2 tables: simple interactions and complex interactions, latter being deconvoluted into pseudo-interactions for each subunit of a complex

In [139]:
# getting indices of complex interactions
complex_interaction_rows_downreg = []

for n_row in list(df_output_downreg.index):
    #print('row', n_row)
    
    curr_partner_A_genes = df_output_downreg.loc[n_row, 'partner_A_genes']
    curr_partner_B_genes = df_output_downreg.loc[n_row, 'partner_B_genes']
    
    if isinstance(curr_partner_A_genes, list) or isinstance(curr_partner_B_genes, list): # if partner A or B is a complex
        complex_interaction_rows_downreg.append(n_row)
        

In [140]:
len(complex_interaction_rows_downreg)

156

In [141]:
np.unique(complex_interaction_rows_downreg, return_counts=True)

(array(['10', '11', '12', '129', '13', '130', '131', '132', '133', '134',
        '135', '136', '137', '138', '139', '14', '140', '141', '142',
        '143', '144', '145', '146', '147', '15', '153', '154', '155',
        '156', '157', '158', '159', '16', '160', '161', '162', '163',
        '164', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26',
        '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '42',
        '43', '431', '432', '433', '434', '435', '436', '437', '438',
        '439', '44', '440', '45', '46', '47', '48', '49', '50', '571',
        '572', '573', '574', '575', '576', '577', '578', '579', '580',
        '581', '582', '583', '584', '585', '586', '587', '588', '589',
        '590', '591', '592', '593', '594', '595', '596', '597', '598',
        '599', '6', '600', '601', '602', '603', '604', '605', '606', '607',
        '608', '7', '73', '74', '75', '750', '751', '752', '753', '754',
        '755', '756', '757', '758', '759', '76', '760', '761', '

In [142]:
# splitting simple and complex interactions into 2 separate tables
df_output_downreg_complex = df_output_downreg.loc[complex_interaction_rows_downreg,:]
df_output_downreg_simple = df_output_downreg.drop(complex_interaction_rows_downreg, axis=0)

In [143]:
df_output_downreg_simple.shape

(647, 13)

In [144]:
df_output_downreg_complex.shape

(156, 13)

In [145]:
df_output_downreg.shape

(803, 13)

In [146]:
df_output_downreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,SIRPA_CD47,SIRPA,CD47,Macrophages,cDC1,False,,,0.512473,True,-0.0746251,0.0349352,0.471204
1,SIRPA_CD47,SIRPA,CD47,Monocytes_classical,cDC1,False,,,0.368645,True,-0.0746251,0.0349352,0.471204
2,SIRPA_CD47,SIRPA,CD47,Monocytes_intermediate,cDC1,False,,,0.218558,True,-0.0746251,0.0349352,0.471204
3,SIRPA_CD47,SIRPA,CD47,Monocytes_non-classical,cDC1,False,,,0.375226,True,-0.0746251,0.0349352,0.471204
4,SIRPA_CD47,SIRPA,CD47,cDC2,cDC1,False,,,0.155022,True,-0.0746251,0.0349352,0.471204
...,...,...,...,...,...,...,...,...,...,...,...,...,...
798,CLEC2B_KLRF1,CLEC2B,KLRF1,TMRA_CD8+,NK_CD16_bright,False,,,0.365922,True,-0.267347,9.66843e-13,0.194935
799,CLEC2B_KLRF1,CLEC2B,KLRF1,T_gd,NK_CD16_bright,False,,,0.307692,True,-0.267347,9.66843e-13,0.194935
800,CLEC2B_KLRF1,CLEC2B,KLRF1,cDC1,NK_CD16_bright,False,,,0.256696,True,-0.267347,9.66843e-13,0.194935
801,CLEC2B_KLRF1,CLEC2B,KLRF1,iNKT_cells,NK_CD16_bright,False,,,0.116393,True,-0.267347,9.66843e-13,0.194935


In [147]:
df_output_downreg_complex

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
6,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Monocytes_intermediate,B_cells_memory,True,-0.10696,0.032057,0.258178,False,,,"[0.18324607329842926, 0.3507853403141361]"
7,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Monocytes_intermediate,B_cells_memory_activated,True,-0.10696,0.032057,0.258178,False,,,"[0.202041289723962, 0.4040825794479239]"
8,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Monocytes_intermediate,MAIT_cells,True,-0.10696,0.032057,0.258178,False,,,"[0.10526315789473684, 0.22697368421052636]"
9,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Monocytes_intermediate,Macrophages,True,-0.10696,0.032057,0.258178,False,,,"[0.4950106913756237, 0.21454027084818247]"
10,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Monocytes_intermediate,Monocytes_classical,True,-0.10696,0.032057,0.258178,False,,,"[0.3378412221387428, 0.297771099423992]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
774,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",cDC1,Monocytes_classical,True,-0.0746251,0.0349352,0.471204,False,,,"[0.1214625594790884, 0.9872276483846733]"
775,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",cDC2,Macrophages,False,,,0.39738,True,"[-0.0734444140365127, -0.10115312022629101]","[3.04013565696667e-05, 0.0001947332761205]","[0.163115, 0.991183]"
776,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",iNKT_cells,Macrophages,False,,,0.27623,True,"[-0.0734444140365127, -0.10115312022629101]","[3.04013565696667e-05, 0.0001947332761205]","[0.163115, 0.991183]"
777,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",Macrophages,Macrophages,False,,,0.50784,True,"[-0.0734444140365127, -0.10115312022629101]","[3.04013565696667e-05, 0.0001947332761205]","[0.163115, 0.991183]"


In [148]:
# checking whether any complexes interact with other complexes and how many subunits each complex contains here

n_subunits_downreg = []

for n_row in list(df_output_downreg.index):
    #print('row', n_row)
    
    curr_partner_A_genes = df_output_downreg.loc[n_row, 'partner_A_genes']
    curr_partner_B_genes = df_output_downreg.loc[n_row, 'partner_B_genes']
    
    if isinstance(curr_partner_A_genes, list) and isinstance(curr_partner_B_genes, list): # if partner A AND B is a complex
        print('row', n_row)
        print('both are complexes')
        
    if isinstance(curr_partner_A_genes, list): # if partner A is a complex
        print('row', n_row)
        print('curr_partner_A_genes',curr_partner_A_genes)
        print('partner A is a complex, len is:', len(curr_partner_A_genes))
        n_subunits_downreg.append(len(curr_partner_A_genes))
        
    if isinstance(curr_partner_B_genes, list): # if partner B is a complex
        print('row', n_row)
        print('curr_partner_B_genes',curr_partner_B_genes)
        print('partner B is a complex, len is:', len(curr_partner_B_genes))
        n_subunits_downreg.append(len(curr_partner_B_genes))

row 6
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 7
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 8
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 9
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 10
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 11
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 12
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 13
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 14
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 15
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 16
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 17
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 18
curr_partner_B_genes ['IT

In [149]:
np.unique(n_subunits_downreg, return_counts=True)

(array([2]), array([156]))

#### So 1 more scenario to ignore: there are no interactions of a complex with a complex
#### And max complex size is 2 subunits here

In [150]:
df_output_downreg_complex

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
6,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Monocytes_intermediate,B_cells_memory,True,-0.10696,0.032057,0.258178,False,,,"[0.18324607329842926, 0.3507853403141361]"
7,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Monocytes_intermediate,B_cells_memory_activated,True,-0.10696,0.032057,0.258178,False,,,"[0.202041289723962, 0.4040825794479239]"
8,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Monocytes_intermediate,MAIT_cells,True,-0.10696,0.032057,0.258178,False,,,"[0.10526315789473684, 0.22697368421052636]"
9,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Monocytes_intermediate,Macrophages,True,-0.10696,0.032057,0.258178,False,,,"[0.4950106913756237, 0.21454027084818247]"
10,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Monocytes_intermediate,Monocytes_classical,True,-0.10696,0.032057,0.258178,False,,,"[0.3378412221387428, 0.297771099423992]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
774,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",cDC1,Monocytes_classical,True,-0.0746251,0.0349352,0.471204,False,,,"[0.1214625594790884, 0.9872276483846733]"
775,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",cDC2,Macrophages,False,,,0.39738,True,"[-0.0734444140365127, -0.10115312022629101]","[3.04013565696667e-05, 0.0001947332761205]","[0.163115, 0.991183]"
776,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",iNKT_cells,Macrophages,False,,,0.27623,True,"[-0.0734444140365127, -0.10115312022629101]","[3.04013565696667e-05, 0.0001947332761205]","[0.163115, 0.991183]"
777,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",Macrophages,Macrophages,False,,,0.50784,True,"[-0.0734444140365127, -0.10115312022629101]","[3.04013565696667e-05, 0.0001947332761205]","[0.163115, 0.991183]"


In [151]:
# Duplicating the table and then choosing only 0th or 1st or 2nd values for the complexes
df_output_downreg_complex_member_1 = df_output_downreg_complex.copy()
df_output_downreg_complex_member_2 = df_output_downreg_complex.copy()
#df_output_downreg_complex_member_3 = df_output_downreg_complex.copy()

In [152]:
# splitting complex interaction entries by subunits / members
# for any values in these tables, if it's a list, save the 0th for df_output_upreg_complex_member_1 and 1st for df_output_upreg_complex_member_2

# which rows contain interaction with a 3-subunit complex? to then subset df_output_downreg_complex_member_3
#subunit_3_rows = []

for n_row in list(df_output_downreg_complex.index):
    #print('outside for loop, row', n_row)
    
    for col in df_output_downreg_complex.columns:
        #print('col', col)
        if isinstance(df_output_downreg_complex.loc[n_row, col], list):
            df_output_downreg_complex_member_1.loc[n_row, col] = df_output_downreg_complex.loc[n_row, col][0]
            df_output_downreg_complex_member_2.loc[n_row, col] = df_output_downreg_complex.loc[n_row, col][1]
            
            # additionally, if there are 3 subunits, separate into 3 entries
            #if len(df_output_upreg_complex.loc[n_row, col]) == 3:
                #df_output_upreg_complex_member_3.loc[n_row, col] = df_output_upreg_complex.loc[n_row, col][2]
                #print('3-subunit complex')
                #print('row', n_row)
                #print('adding to subunit_3_rows')
                #subunit_3_rows.append(n_row)


In [153]:
# making indices uniques for concatenantion later
df_output_downreg_complex_member_1.index = [idx + '_member_1' for idx in df_output_downreg_complex_member_1.index]
df_output_downreg_complex_member_2.index = [idx + '_member_2' for idx in df_output_downreg_complex_member_2.index]
#df_output_upreg_complex_member_3.index = [idx + '_member_3' for idx in df_output_upreg_complex_member_3.index]

In [154]:
# getting all indices
idx_concat = list(df_output_downreg_complex_member_1.index) + list(df_output_downreg_complex_member_2.index)

# sorting by original index number, so that the order is: member 1, member 2 and (where applicable) member 3
idx_concat.sort(key = lambda x: x.split('_')[0])
idx_concat

['10_member_1',
 '10_member_2',
 '11_member_1',
 '11_member_2',
 '12_member_1',
 '12_member_2',
 '129_member_1',
 '129_member_2',
 '13_member_1',
 '13_member_2',
 '130_member_1',
 '130_member_2',
 '131_member_1',
 '131_member_2',
 '132_member_1',
 '132_member_2',
 '133_member_1',
 '133_member_2',
 '134_member_1',
 '134_member_2',
 '135_member_1',
 '135_member_2',
 '136_member_1',
 '136_member_2',
 '137_member_1',
 '137_member_2',
 '138_member_1',
 '138_member_2',
 '139_member_1',
 '139_member_2',
 '14_member_1',
 '14_member_2',
 '140_member_1',
 '140_member_2',
 '141_member_1',
 '141_member_2',
 '142_member_1',
 '142_member_2',
 '143_member_1',
 '143_member_2',
 '144_member_1',
 '144_member_2',
 '145_member_1',
 '145_member_2',
 '146_member_1',
 '146_member_2',
 '147_member_1',
 '147_member_2',
 '15_member_1',
 '15_member_2',
 '153_member_1',
 '153_member_2',
 '154_member_1',
 '154_member_2',
 '155_member_1',
 '155_member_2',
 '156_member_1',
 '156_member_2',
 '157_member_1',
 '157_mem

In [155]:
df_output_downreg_complex_deconv = pd.concat([df_output_downreg_complex_member_1, df_output_downreg_complex_member_2])

In [156]:
df_output_downreg_complex_deconv

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
6_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Monocytes_intermediate,B_cells_memory,True,-0.10696,0.032057,0.258178,False,,,0.183246
7_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Monocytes_intermediate,B_cells_memory_activated,True,-0.10696,0.032057,0.258178,False,,,0.202041
8_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Monocytes_intermediate,MAIT_cells,True,-0.10696,0.032057,0.258178,False,,,0.105263
9_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Monocytes_intermediate,Macrophages,True,-0.10696,0.032057,0.258178,False,,,0.495011
10_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Monocytes_intermediate,Monocytes_classical,True,-0.10696,0.032057,0.258178,False,,,0.337841
...,...,...,...,...,...,...,...,...,...,...,...,...,...
774_member_2,CD47_SIRB1_complex,CD47,TYROBP,cDC1,Monocytes_classical,True,-0.0746251,0.0349352,0.471204,False,,,0.987228
775_member_2,CD47_SIRB1_complex,CD47,TYROBP,cDC2,Macrophages,False,,,0.39738,True,-0.101153,0.000194733,0.991183
776_member_2,CD47_SIRB1_complex,CD47,TYROBP,iNKT_cells,Macrophages,False,,,0.27623,True,-0.101153,0.000194733,0.991183
777_member_2,CD47_SIRB1_complex,CD47,TYROBP,Macrophages,Macrophages,False,,,0.50784,True,-0.101153,0.000194733,0.991183


In [157]:
# organising entries so that member 1 entry is followed by member 2 entry and then member 3 entry if applicable
df_output_downreg_complex_deconv = df_output_downreg_complex_deconv.loc[idx_concat,:]

In [158]:
df_output_downreg_complex_deconv

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
10_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Monocytes_intermediate,Monocytes_classical,True,-0.10696,0.032057,0.258178,False,,,0.337841
10_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,Monocytes_intermediate,Monocytes_classical,True,-0.10696,0.032057,0.258178,False,,,0.297771
11_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Monocytes_intermediate,Monocytes_non-classical,True,-0.10696,0.032057,0.258178,False,,,0.347197
11_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,Monocytes_intermediate,Monocytes_non-classical,True,-0.10696,0.032057,0.258178,False,,,0.251356
12_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Monocytes_intermediate,NK_CD16_bright,True,-0.10696,0.032057,0.258178,False,,,0.142556
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,Monocytes_intermediate,MAIT_cells,True,-0.10696,0.032057,0.258178,False,,,0.226974
80_member_1,CD8_receptor_LCK,CD8B,LCK,iNKT_cells,B_cells_memory_activated,False,,,0.136885,True,-0.0589062,0.000114942,0.176382
80_member_2,CD8_receptor_LCK,CD8A,LCK,iNKT_cells,B_cells_memory_activated,False,,,0.166393,True,-0.0589062,0.000114942,0.176382
9_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Monocytes_intermediate,Macrophages,True,-0.10696,0.032057,0.258178,False,,,0.495011


In [159]:
# saving these deconvoluted complex interactions
df_output_downreg_complex_deconv.to_csv(save_path + '20210324_cellphone_interactions_table_with_gene_stats_downreg_in_Freiburg_Ib_CVID_validation_cohort_no_logFC_cutoff_complexes_deconv_into_pseduinteractions.csv')

In [160]:
# saving the simple interactions table
df_output_downreg_simple.to_csv(save_path + '20210324_cellphone_interactions_table_with_gene_stats_downreg_in_Freiburg_Ib_CVID_validation_cohort_no_logFC_cutoff_simple_interactions.csv')

In [161]:
save_path

'/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/'

In [93]:
# manually checking some hits from previous version of analysis of twins
for interaction in list(df_Exrp_LR_in_celltype_pairs_upreg_DE.index):
    if 'CCL22' in interaction:
        print(interaction)
        curr_subset = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_upreg_DE.loc[interaction])
        curr_subset_nonzero_interacting_celltype_pairs = list(curr_subset[curr_subset[interaction] > 0].index)
        print('this interaction is detected in following celltype pairs:', curr_subset_nonzero_interacting_celltype_pairs)

CCL22_CCR4
this interaction is detected in following celltype pairs: ['B_cells_memory---iNKT_cells', 'B_cells_memory_activated---iNKT_cells', 'B_cells_naive---iNKT_cells', 'B_cells_naive_activated---iNKT_cells', 'MAIT_cells---iNKT_cells', 'Macrophages---iNKT_cells', 'Monocytes_classical---iNKT_cells', 'Monocytes_intermediate---iNKT_cells', 'Monocytes_non-classical---iNKT_cells', 'NK_CD16_bright---iNKT_cells', 'NK_CD16_bright_activated---iNKT_cells', 'NK_CD56_bright_activated---iNKT_cells', 'Plasma_cells---iNKT_cells', 'Precursor_cells---iNKT_cells', 'T4_activated---iNKT_cells', 'T4_memory---iNKT_cells', 'T4_naive---iNKT_cells', 'T8_activated---iNKT_cells', 'T8_naive---iNKT_cells', 'TCM_CD8+---iNKT_cells', 'TEM_CD8+---iNKT_cells', 'TMRA_CD8+---iNKT_cells', 'T_gd---iNKT_cells', 'T_regs---iNKT_cells', 'cDC1---iNKT_cells', 'cDC2---iNKT_cells', 'iNKT_cells---B_cells_memory', 'iNKT_cells---B_cells_memory_activated', 'iNKT_cells---B_cells_naive', 'iNKT_cells---B_cells_naive_activated', 'iNKT_

In [90]:
curr_subset = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_upreg_DE.loc['CCL22_CCR4'])
curr_subset_nonzero_interacting_celltype_pairs = list(curr_subset[curr_subset['CCL22_CCR4'] > 0].index)
curr_subset_nonzero_interacting_celltype_pairs

['B_cells_memory---iNKT_cells',
 'B_cells_memory_activated---iNKT_cells',
 'B_cells_naive---iNKT_cells',
 'B_cells_naive_activated---iNKT_cells',
 'MAIT_cells---iNKT_cells',
 'Macrophages---iNKT_cells',
 'Monocytes_classical---iNKT_cells',
 'Monocytes_intermediate---iNKT_cells',
 'Monocytes_non-classical---iNKT_cells',
 'NK_CD16_bright---iNKT_cells',
 'NK_CD16_bright_activated---iNKT_cells',
 'NK_CD56_bright_activated---iNKT_cells',
 'Plasma_cells---iNKT_cells',
 'Precursor_cells---iNKT_cells',
 'T4_activated---iNKT_cells',
 'T4_memory---iNKT_cells',
 'T4_naive---iNKT_cells',
 'T8_activated---iNKT_cells',
 'T8_naive---iNKT_cells',
 'TCM_CD8+---iNKT_cells',
 'TEM_CD8+---iNKT_cells',
 'TMRA_CD8+---iNKT_cells',
 'T_gd---iNKT_cells',
 'T_regs---iNKT_cells',
 'cDC1---iNKT_cells',
 'cDC2---iNKT_cells',
 'iNKT_cells---B_cells_memory',
 'iNKT_cells---B_cells_memory_activated',
 'iNKT_cells---B_cells_naive',
 'iNKT_cells---B_cells_naive_activated',
 'iNKT_cells---MAIT_cells',
 'iNKT_cells---Mac

In [89]:
df_Exrp_LR_in_celltype_pairs_upreg_DE.loc['CCL22_CCR4']

B_cells_memory---B_cells_memory_activated    0.0
B_cells_memory---B_cells_naive               0.0
B_cells_memory---B_cells_naive_activated     0.0
B_cells_memory---MAIT_cells                  0.0
B_cells_memory---Macrophages                 0.0
                                            ... 
TMRA_CD8+---TMRA_CD8+                        0.0
T_gd---T_gd                                  0.0
T_regs---T_regs                              0.0
cDC1---cDC1                                  0.0
iNKT_cells---iNKT_cells                      1.0
Name: CCL22_CCR4, Length: 619, dtype: float64

In [92]:
# manually checking some hits from previous version of analysis of twins
for interaction in list(df_Exrp_LR_in_celltype_pairs_downreg_DE.index):
    if 'CCL22' in interaction:
        print(interaction)
        curr_subset = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_downreg_DE.loc[interaction])
        curr_subset_nonzero_interacting_celltype_pairs = list(curr_subset[curr_subset[interaction] > 0].index)
        print('this interaction is detected in following celltype pairs:', curr_subset_nonzero_interacting_celltype_pairs)

CCL22_CCR4
this interaction is detected in following celltype pairs: ['B_cells_memory_activated---B_cells_memory', 'B_cells_memory_activated---B_cells_naive', 'B_cells_memory_activated---B_cells_naive_activated', 'B_cells_memory_activated---MAIT_cells', 'B_cells_memory_activated---Macrophages', 'B_cells_memory_activated---Monocytes_classical', 'B_cells_memory_activated---Monocytes_intermediate', 'B_cells_memory_activated---Monocytes_non-classical', 'B_cells_memory_activated---NK_CD16_bright', 'B_cells_memory_activated---NK_CD16_bright_activated', 'B_cells_memory_activated---NK_CD56_bright_activated', 'B_cells_memory_activated---Plasma_cells', 'B_cells_memory_activated---T4_activated', 'B_cells_memory_activated---T4_memory', 'B_cells_memory_activated---T4_naive', 'B_cells_memory_activated---T8_activated', 'B_cells_memory_activated---T8_naive', 'B_cells_memory_activated---TCM_CD8+', 'B_cells_memory_activated---TEM_CD8+', 'B_cells_memory_activated---TMRA_CD8+', 'B_cells_memory_activated--

In [97]:
# manually checking some hits from previous version of analysis of twins
for interaction in list(df_Exrp_LR_in_celltype_pairs_upreg_DE.index):
    if 'CXCL10_CXCR3' in interaction:
        print(interaction)
        curr_subset = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_upreg_DE.loc[interaction])
        curr_subset_nonzero_interacting_celltype_pairs = list(curr_subset[curr_subset[interaction] > 0].index)
        print('this interaction is detected in following celltype pairs:', curr_subset_nonzero_interacting_celltype_pairs)

CXCL10_CXCR3
this interaction is detected in following celltype pairs: ['B_cells_memory---iNKT_cells', 'B_cells_memory_activated---iNKT_cells', 'B_cells_naive---iNKT_cells', 'B_cells_naive_activated---iNKT_cells', 'MAIT_cells---iNKT_cells', 'Macrophages---iNKT_cells', 'Monocytes_classical---iNKT_cells', 'Monocytes_intermediate---iNKT_cells', 'Monocytes_non-classical---iNKT_cells', 'NK_CD16_bright---iNKT_cells', 'NK_CD16_bright_activated---iNKT_cells', 'NK_CD56_bright_activated---iNKT_cells', 'Plasma_cells---iNKT_cells', 'Precursor_cells---iNKT_cells', 'T4_activated---iNKT_cells', 'T4_memory---iNKT_cells', 'T4_naive---iNKT_cells', 'T8_activated---iNKT_cells', 'T8_naive---iNKT_cells', 'TCM_CD8+---iNKT_cells', 'TEM_CD8+---iNKT_cells', 'TMRA_CD8+---iNKT_cells', 'T_gd---iNKT_cells', 'T_regs---iNKT_cells', 'cDC1---iNKT_cells', 'cDC2---iNKT_cells', 'iNKT_cells---B_cells_memory', 'iNKT_cells---B_cells_memory_activated', 'iNKT_cells---B_cells_naive', 'iNKT_cells---B_cells_naive_activated', 'iNK

In [98]:
# manually checking some hits from previous version of analysis of twins
for interaction in list(df_Exrp_LR_in_celltype_pairs_downreg_DE.index):
    if 'CXCL10_CXCR3' in interaction:
        print(interaction)
        curr_subset = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_downreg_DE.loc[interaction])
        curr_subset_nonzero_interacting_celltype_pairs = list(curr_subset[curr_subset[interaction] > 0].index)
        print('this interaction is detected in following celltype pairs:', curr_subset_nonzero_interacting_celltype_pairs)

CXCL10_CXCR3
this interaction is detected in following celltype pairs: ['B_cells_memory---NK_CD16_bright', 'B_cells_memory---TMRA_CD8+', 'B_cells_memory_activated---NK_CD16_bright', 'B_cells_memory_activated---TMRA_CD8+', 'B_cells_naive---NK_CD16_bright', 'B_cells_naive---TMRA_CD8+', 'B_cells_naive_activated---NK_CD16_bright', 'B_cells_naive_activated---TMRA_CD8+', 'MAIT_cells---NK_CD16_bright', 'MAIT_cells---TMRA_CD8+', 'Macrophages---NK_CD16_bright', 'Macrophages---TMRA_CD8+', 'Monocytes_classical---NK_CD16_bright', 'Monocytes_classical---TMRA_CD8+', 'Monocytes_intermediate---NK_CD16_bright', 'Monocytes_intermediate---TMRA_CD8+', 'Monocytes_non-classical---NK_CD16_bright', 'Monocytes_non-classical---TMRA_CD8+', 'NK_CD16_bright---TMRA_CD8+', 'NK_CD16_bright_activated---NK_CD16_bright', 'NK_CD16_bright_activated---TMRA_CD8+', 'NK_CD56_bright_activated---NK_CD16_bright', 'NK_CD56_bright_activated---TMRA_CD8+', 'Plasma_cells---NK_CD16_bright', 'Plasma_cells---TMRA_CD8+', 'T4_activated---

### Checking some stuff

In [29]:
# reading the user curated database starting files to see what interactions haven't made it here

path = '/home/jovyan/notebooks/Vento_Lab/CVID/202009_new_analysis_revision/CITE_all_samples_analysis/CVID/scTranscriptomics_CITE/cellphonedb_analysis/'

# saving them into .tsv files now
interactions_curated = pd.read_csv(path + 'interactions_curated_subset_notLuz.tsv', sep='\t', index_col=0)
complexes_curated = pd.read_csv(path + 'complex_curated.tsv', sep='\t', index_col=0)

In [30]:
interactions_curated.columns

Index(['partner_a', 'partner_b', 'protein_name_a', 'protein_name_b',
       'annotation_strategy', 'source', 'is_ppi', 'reactome_complex',
       'reactome_reaction', 'reactome_pathway', 'complexPortal_complex',
       'curator', 'comments'],
      dtype='object')

In [31]:
np.unique(interactions_curated['curator'], return_counts=True)

(array(['JRodriguezUbreva', 'RVentoTormo'], dtype=object), array([   1, 1339]))

In [32]:
np.unique(interactions_curated['annotation_strategy'], return_counts=True)

(array(['curated'], dtype=object), array([1340]))

In [65]:
interactions_curated#[interactions_curated['partner_a'] == 'Q92478']

Unnamed: 0_level_0,partner_a,partner_b,protein_name_a,protein_name_b,annotation_strategy,source,is_ppi,reactome_complex,reactome_reaction,reactome_pathway,complexPortal_complex,curator,comments
id_cp_interaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
,Q9Y275,Q96RJ3,TN13B_HUMAN,TR13C_HUMAN,curated,uniprot;reactome,True,R-HSA-5676540,R-HSA-5676599,R-HSA-1280215,,JRodriguezUbreva,
CPI-CC0041E1D30,IL12,IL12_receptor,,,curated,uniprot,True,,,,,RVentoTormo,
CPI-CC0104F2A96,ACVR_1B2A_receptor,Activin_ligand_ab,,,curated,PMID:22710174;PMID:22991378,True,,,,,RVentoTormo,
CPI-CC045C36F28,ACVR_1A2A_receptor,Activin_ligand_ab,,,curated,less_common_binding;PMID:22710174;PMID:22991378_,True,,,,,RVentoTormo,
CPI-CC051643E98,IL23,IL23_receptor,,,curated,uniprot,True,,,,,RVentoTormo,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
,O14905,Q6FHJ7,WNT9B_HUMAN,SFRP4_HUMAN,curated,PMID:12775774,True,,,,,RVentoTormo,Inhibition WNT. Soluble proteins
,O14905,Q8N474,WNT9B_HUMAN,SFRP1_HUMAN,curated,PMID:12775774,True,,,,,RVentoTormo,Inhibition WNT. Soluble proteins
,O14905,Q92765,WNT9B_HUMAN,SFRP3_HUMAN,curated,PMID:12775774,True,,,,,RVentoTormo,Inhibition WNT. Soluble proteins
,O14905,Q96HF1,WNT9B_HUMAN,SFRP2_HUMAN,curated,PMID:12775774,True,,,,,RVentoTormo,Inhibition WNT. Soluble proteins


In [28]:
#for interaction in int_cpDB['interacting_pair']:
#    if 'IL' in interaction:
#        print(interaction)
#        print(int_cpDB[int_cpDB['interacting_pair'] == interaction])

In [68]:
interactions_curated.columns

Index(['partner_a', 'partner_b', 'protein_name_a', 'protein_name_b',
       'annotation_strategy', 'source', 'is_ppi', 'reactome_complex',
       'reactome_reaction', 'reactome_pathway', 'complexPortal_complex',
       'curator', 'comments'],
      dtype='object')

In [33]:
list(interactions_curated['partner_a'])[:10]

['Q9Y275',
 'IL12',
 'ACVR_1B2A_receptor',
 'ACVR_1A2A_receptor',
 'IL23',
 'ACVR_1B2B_receptor',
 'integrin_aMb2_complex',
 'ACVR_1C2A_receptor',
 'ACVR_1A2B_receptor',
 'IL27']

For example, IL12 and IL12_receptor interaction is in the initial table but is not in the final, going to see if it made it into the expr table

In [70]:
for compl in list(complexes_curated.index):
    if 'OSMR' in compl:
        print(compl)
        print(complexes_curated.loc[compl,:])

OSMR
uniprot_1                                                           Q99650
uniprot_2                                                           P40189
uniprot_3                                                              NaN
uniprot_4                                                              NaN
transmembrane                                                         True
peripheral                                                           False
secreted                                                             False
secreted_desc                                                          NaN
secreted_highlight                                                   False
receptor                                                              True
receptor_desc                                 Cytokine_receptor_IL6_family
integrin                                                             False
other                                                                False
other_desc          

In [24]:
# database generated from 1.3K odd interactions
database_file = '/home/jovyan/notebooks/Vento_Lab/CVID/202009_new_analysis_revision/CITE_all_samples_analysis/CVID/scTranscriptomics_CITE/cellphonedb_analysis/database_20210218/cellphonedb_user_2021-02-18-14_26.db'

import sqlite3

def importdb(file_path):
    conn = sqlite3.connect(file_path)
    c = conn.cursor()
    c.execute("SELECT name FROM sqlite_master WHERE type='table';")
    for table in c.fetchall():
        yield list(c.execute('SELECT * from ?;', (table[0],)))

In [26]:
database = importdb(database_file)

In [30]:
database

AttributeError: 'generator' object has no attribute 'keys'