# Looking at the L/R interactinos enriched in particular pairs of cell types / microenvironments

CellphoneDB

Twins only

This code uses DEGs computed for each cluster to identify relevant L/R interactions between the cells in a microenviroment

Code from Luz rewritten in python

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import itertools

In [2]:
# Define cutoff variables
filter_int_user_curated = True # Use only user_curated interactions?
per_cutoff = 0.1 # min % of cells in the cluster required  with expression > 0 for the gene
pval_cutoff = 0.05 # max adjusted p-value requeired to consider a gene as DEG

# as of 18.03.2021, not using the logFC cutoff at all! --> 0 here
logFC_cutoff = 0 # min logFC to consider a gene as DEG

## Load cellphone database


In [3]:
# Gene names
genes_cpDB = pd.read_csv('/home/jovyan/notebooks/Vento_Lab/CVID/202009_new_analysis_revision/CITE_all_samples_analysis/CVID/scTranscriptomics_CITE/cellphonedb_analysis/hsa_uniprot.txt', 
                         sep = '\t')

In [4]:
genes_cpDB

Unnamed: 0,uniprot,Entry,gene_name
0,P01611,KVD12_HUMAN,IGKV1D-12
1,P01615,KVD28_HUMAN,IGKV2D-28
2,Q15334,L2GL1_HUMAN,LLGL1
3,Q6ZP29,LAAT1_HUMAN,PQLC2
4,Q9GZZ8,LACRT_HUMAN,LACRT
...,...,...,...
20311,Q9H900,ZWILC_HUMAN,ZWILCH
20312,P98169,ZXDB_HUMAN,ZXDB
20313,Q2QGD7,ZXDC_HUMAN,ZXDC
20314,Q15942,ZYX_HUMAN,ZYX


In [5]:
# Complexes members
com_cpDB = pd.read_csv('/home/jovyan/notebooks/Vento_Lab/CVID/202009_new_analysis_revision/CITE_all_samples_analysis/CVID/scTranscriptomics_CITE/cellphonedb_analysis/database_20210218/complex_generated.csv')
com_cpDB['complex_name'] = ['complex:' + complex_name for complex_name in com_cpDB['complex_name']]

In [6]:
com_cpDB

Unnamed: 0,complex_name,uniprot_1,uniprot_2,uniprot_3,uniprot_4,transmembrane,peripheral,secreted,secreted_desc,secreted_highlight,receptor,receptor_desc,integrin,other,other_desc,pdb_id,pdb_structure,stoichiometry,comments_complex
0,complex:contactin complex II,Q12860,Q92823,,,True,False,False,,False,False,,False,False,,,FALSE,,NRCAM bind in cis and in trans to contactin-1
1,complex:IL6 receptor,P08887,P40189,,,True,False,False,,False,True,Cytokine receptor IL6 family,False,False,,1p9m,binding,IL6;IL6;IL6R;IL6R;IL6ST;IL6ST,Signal activation necessitate an association w...
2,complex:AT8B4CC50B complex,Q8TF62,Q3MIR4,,,True,False,False,,False,False,,False,False,,,FALSE,,Interacts with beta subunits TMEM30A and TMEM30B
3,complex:KCNV1KCNB2 complex,Q6PIU1,Q92953,,,True,False,False,,False,False,,False,False,,,FALSE,,Has to be associated with another potassium ch...
4,complex:LRFN3LRFN5 complex,Q9BTN0,Q96NI6,,,True,False,False,,False,False,,False,False,,,FALSE,,"Can form heteromeric complexes with LRFN1, LRF..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
614,complex:FZD8_LRP6,O75581,Q9H461,,,True,False,False,,False,False,,False,False,,,False,,
615,complex:FZD9_LRP5,O75197,O00144,,,True,False,False,,False,False,,False,False,,,False,,
616,complex:FZD9_LRP6,O75581,O00144,,,True,False,False,,False,False,,False,False,,,False,,
617,complex:FZD10_LRP5,O75197,Q9ULW2,,,True,False,False,,False,False,,False,False,,,False,,


In [7]:
#'complex:FZD8_LRP6'[8:]

In [8]:
#com_cpDB[(com_cpDB['complex_name'] == 'complex:IL6 receptor')].loc[:, ['uniprot_1', 'uniprot_2', 'uniprot_3', 'uniprot_4']].values

In [9]:
#genes_cpDB[genes_cpDB['uniprot'].isin(['P08887','P40189'])]['gene_name']

In [10]:
# Generate complexes2gene symbol dictionary
Com2Gene = {}


for complex_name in np.unique(com_cpDB['complex_name']):
    #print(complex_name)
    
    # getting rid of 'complex:' in the beginning
    #complex_name = complex_name[8:]
    #print(complex_name)
    
    curr_complex_proteins = list(com_cpDB[(com_cpDB['complex_name'] == complex_name)].loc[:, ['uniprot_1', 'uniprot_2', 'uniprot_3', 'uniprot_4']].values.tolist())
    # flatten list 
    curr_complex_proteins = [item for sublist in curr_complex_proteins for item in sublist]
    #print(curr_complex_proteins)
    # remove nans
    curr_complex_proteins = [x for x in curr_complex_proteins if str(x) != 'nan']
    #print('after removing nans:', curr_complex_proteins)
    
    # getting corresponding gene names from the gene table
    Com2Gene[complex_name] = list(genes_cpDB[genes_cpDB['uniprot'].isin(curr_complex_proteins)]['gene_name'])
    

In [11]:
list(Com2Gene.items())[:10]

[('complex:12oxoLeukotrieneB4_byPTGR1', ['PTGR1']),
 ('complex:17aHydroxyprogesterone_byCYP17A1', ['CYP17A1']),
 ('complex:22Hydroxycholesterol_byCYP11A1', ['CYP11A1']),
 ('complex:22Hydroxycholesterol_byCYP3A4', ['CYP3A4']),
 ('complex:2arachidonoylglycerol_byDAGLA', ['DAGLA']),
 ('complex:2arachidonoylglycerol_byDAGLB', ['DAGLB']),
 ('complex:5-alpha-Dihydroprogesterone_byDHRS9', ['DHRS9']),
 ('complex:5HT3C5HT3A complex', ['HTR3A', 'HTR3C']),
 ('complex:5HT3C5HT3A_complex', ['HTR3A', 'HTR3C']),
 ('complex:5HT3D receptor', ['HTR3A', 'HTR3D'])]

In [12]:
# Load interactions from cellphoneDB/out/means.txt output file                   
int_cpDB = pd.read_csv('/home/jovyan/notebooks/Vento_Lab/CVID/202009_new_analysis_revision/CITE_all_samples_analysis/CVID/scTranscriptomics_CITE/reanalysis_10X_twin_data/cellphone_analysis_twins/out_20210322/means.txt',
                      sep='\t')

# disregarding pairwise average expression values
int_cpDB = int_cpDB.loc[:, list(int_cpDB.columns)[:11]]
int_cpDB

Unnamed: 0,id_cp_interaction,interacting_pair,partner_a,partner_b,gene_a,gene_b,secreted,receptor_a,receptor_b,annotation_strategy,is_integrin
0,CPI-SS0A7B487D4,KLRG2_WNT11,simple:A4D1S0,simple:O96014,KLRG2,WNT11,True,True,False,InnateDB-All,False
1,CPI-CS0481C1F9A,FZD1_LRP5_WNT11,complex:FZD1_LRP5,simple:O96014,,WNT11,True,False,False,user_curated,False
2,CPI-CS0F29C6285,FZD1_LRP6_WNT11,complex:FZD1_LRP6,simple:O96014,,WNT11,True,False,False,user_curated,False
3,CPI-CS0372FC240,FZD2_LRP5_WNT11,complex:FZD2_LRP5,simple:O96014,,WNT11,True,False,False,user_curated,False
4,CPI-CS031A2034E,FZD2_LRP6_WNT11,complex:FZD2_LRP6,simple:O96014,,WNT11,True,False,False,user_curated,False
...,...,...,...,...,...,...,...,...,...,...,...
928,CPI-SC090068F7B,TSLP_TSLPR,simple:Q969D9,complex:TSLPR,TSLP,,True,False,True,user_curated,False
929,CPI-SC047CEF2DD,CRLF2_TSLPR,simple:Q9HC73,complex:TSLPR,CRLF2,,True,True,True,user_curated,False
930,CPI-SS04C672963,ESAM_ESAM,simple:Q96AP7,simple:Q96AP7,ESAM,ESAM,False,False,False,user_curated,False
931,CPI-SC001AFA16D,NRTN_RET receptor 2,simple:Q99748,complex:RET receptor 2,NRTN,,True,False,True,curated,False


In [13]:
np.unique(int_cpDB['annotation_strategy'], return_counts=True)

(array(['I2D', 'I2D,IMEx,InnateDB,InnateDB-All,IntAct,MINT',
        'I2D,IMEx,InnateDB-All,IntAct', 'I2D,IMEx,InnateDB-All,MINT',
        'I2D,InnateDB', 'I2D,InnateDB-All', 'I2D,IntAct',
        'IMEx,InnateDB-All,IntAct', 'IMEx,InnateDB-All,IntAct,MatrixDB',
        'IMEx,InnateDB-All,MINT', 'IMEx,InnateDB-All,UniProt',
        'IMEx,IntAct', 'IMEx,MINT', 'InnateDB', 'InnateDB-All', 'curated',
        'guidetopharmacology.org', 'user_curated'], dtype=object),
 array([ 27,   1,   1,   1,   1,  13,   2,   2,   1,   6,   1,  18,   2,
          2,  58, 203,  39, 555]))

In [14]:
# MANDATORY: remove "curated" because we have cleaned and renamed them (this is a long story, just do it)
# these interactions have either been renamed or excluded so best not to use them
int_cpDB = int_cpDB[int_cpDB['annotation_strategy'] != 'curated']

In [15]:
# OPTIONAL: Use only user_curated interactions?
if filter_int_user_curated:
    int_cpDB = int_cpDB[int_cpDB['annotation_strategy'] == 'user_curated']

In [16]:
int_cpDB

Unnamed: 0,id_cp_interaction,interacting_pair,partner_a,partner_b,gene_a,gene_b,secreted,receptor_a,receptor_b,annotation_strategy,is_integrin
1,CPI-CS0481C1F9A,FZD1_LRP5_WNT11,complex:FZD1_LRP5,simple:O96014,,WNT11,True,False,False,user_curated,False
2,CPI-CS0F29C6285,FZD1_LRP6_WNT11,complex:FZD1_LRP6,simple:O96014,,WNT11,True,False,False,user_curated,False
3,CPI-CS0372FC240,FZD2_LRP5_WNT11,complex:FZD2_LRP5,simple:O96014,,WNT11,True,False,False,user_curated,False
4,CPI-CS031A2034E,FZD2_LRP6_WNT11,complex:FZD2_LRP6,simple:O96014,,WNT11,True,False,False,user_curated,False
5,CPI-CS02643715E,FZD3_LRP5_WNT11,complex:FZD3_LRP5,simple:O96014,,WNT11,True,False,False,user_curated,False
...,...,...,...,...,...,...,...,...,...,...,...
925,CPI-SS025C8F785,CLEC2B_KLRF1,simple:Q92478,simple:Q9NZS2,CLEC2B,KLRF1,False,True,True,user_curated,False
928,CPI-SC090068F7B,TSLP_TSLPR,simple:Q969D9,complex:TSLPR,TSLP,,True,False,True,user_curated,False
929,CPI-SC047CEF2DD,CRLF2_TSLPR,simple:Q9HC73,complex:TSLPR,CRLF2,,True,True,True,user_curated,False
930,CPI-SS04C672963,ESAM_ESAM,simple:Q96AP7,simple:Q96AP7,ESAM,ESAM,False,False,False,user_curated,False


In [17]:
int_cpDB.loc[1,:]

id_cp_interaction        CPI-CS0481C1F9A
interacting_pair         FZD1_LRP5_WNT11
partner_a              complex:FZD1_LRP5
partner_b                  simple:O96014
gene_a                               NaN
gene_b                             WNT11
secreted                            True
receptor_a                         False
receptor_b                         False
annotation_strategy         user_curated
is_integrin                        False
Name: 1, dtype: object

In [18]:
'complex:FZD1_LRP5' in list(Com2Gene.keys())

True

In [19]:
list(Com2Gene.keys())[:5]

['complex:12oxoLeukotrieneB4_byPTGR1',
 'complex:17aHydroxyprogesterone_byCYP17A1',
 'complex:22Hydroxycholesterol_byCYP11A1',
 'complex:22Hydroxycholesterol_byCYP3A4',
 'complex:2arachidonoylglycerol_byDAGLA']

In [20]:
'complex:FZD1_LRP5' in list(com_cpDB['complex_name'])

True

In [21]:
# Generate Int2Gene dictionary
Int2Gene = {}

for i in int_cpDB.index:
    #print('row number:', i)
    curr_df_row = int_cpDB.loc[i,:]
    #print('row:', curr_df_row)
    
    # if partnerA is complex (aka np.isnan(table['gene_a']) == True), then retrieve members from dictionary
    # complex name will be in table['partner_a']
    if str(curr_df_row['gene_a']) == 'nan':
        partner_A = Com2Gene[curr_df_row['partner_a']]
    else:
        # if it's not a complex, then get the partner A from table['gene_a']
        partner_A = [curr_df_row['gene_a']]
        
    # if partnerB is complex, then retrieve members from dictionary
    # complex name will be in table['partner_b']
    if str(curr_df_row['gene_b']) == 'nan':
        partner_B = Com2Gene[curr_df_row['partner_b']]
    else:
        # if it's not a complex, then get the partner B from table['gene_b']
        partner_B = [curr_df_row['gene_b']]
        
    interaction_id = curr_df_row['interacting_pair']

    Int2Gene[interaction_id] = {'partner_a': partner_A,
                  'partner_b': partner_B}

In [22]:
list(Int2Gene.items())[:10]

[('FZD1_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD1'], 'partner_b': ['WNT11']}),
 ('FZD1_LRP6_WNT11', {'partner_a': ['FZD1', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD2_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD2'], 'partner_b': ['WNT11']}),
 ('FZD2_LRP6_WNT11', {'partner_a': ['FZD2', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD3_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD3'], 'partner_b': ['WNT11']}),
 ('FZD3_LRP6_WNT11', {'partner_a': ['FZD3', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD5_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD5'], 'partner_b': ['WNT11']}),
 ('FZD5_LRP6_WNT11', {'partner_a': ['FZD5', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD6_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD6'], 'partner_b': ['WNT11']}),
 ('FZD6_LRP6_WNT11', {'partner_a': ['FZD6', 'LRP6'], 'partner_b': ['WNT11']})]

## Load cluster's gene percentage expression

Prepared in S2 notebook

In [23]:
# Load percentage expression info
# Matrix of genes (rows) per celltypes (columns) containing the proportion [0-1] of cells 
# in a celltype expressing the gene
path_Exp = '/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/202102_twins_reanalysis/PercentExpressed_for_cellphone_20210322.csv'

# Load matrix
Per_df = pd.read_csv(path_Exp, index_col=0) 

# Dictionary of celltypes2expressed genes
genes_expr_per_cell_type = {} 

for ct in Per_df.columns:
    print(ct)
    curr_table = pd.DataFrame(Per_df.loc[:, ct])
    
    # only leave in genes expressed in this cell type according to a threshold declared in the beginnning of this notebook
    mask = (curr_table[ct] > per_cutoff)
    genes_expr_per_cell_type[ct] = list(curr_table[ct][mask].index)

B_cells_memory
B_cells_memory_activated
B_cells_naive
B_cells_naive_activated
B_cells_oligoclonal
MAIT_cells
MAIT_cells_activated
Myeloid_cells
NK_CD16_bright
NK_CD16_bright_activated
NK_CD56_bright
NK_CD56_bright_activated
T4_activated
T4_memory
T4_naive
T8_activated_1
T8_activated_2
T8_memory
T8_naive
T_gd
T_regs


In [24]:
len(genes_expr_per_cell_type['B_cells_memory'])

5577

## Load DE expression info

In [25]:
path_DE = '/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/202102_twins_reanalysis/joint_DEGs_list_all_cell_types_for_cellphone_20210322.csv'
DE_df_full = pd.read_csv(path_DE, #row.names = 0
                )

DE_df_full


Unnamed: 0.1,Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_cluster,AveExpr_rest,percentExpr_cluster,percentExpr_rest,cluster
0,0,IGHM,0.946059,9.981746e-14,1.365403e-09,1.625146,0.679087,0.754902,0.383673,B_cells_memory
1,1,AC090498.1,0.781893,6.434098e-12,4.400601e-08,2.060740,1.278847,0.892157,0.693878,B_cells_memory
2,2,IGHD,0.252772,6.550533e-09,2.656846e-05,0.284881,0.032109,0.215686,0.032653,B_cells_memory
3,3,MT-ATP8,0.547031,7.769124e-09,2.656846e-05,3.347746,2.800715,1.000000,0.951020,B_cells_memory
4,4,POLD4,0.515404,4.388788e-08,8.830916e-05,1.173324,0.657919,0.784314,0.485714,B_cells_memory
...,...,...,...,...,...,...,...,...,...,...
287363,13608,TTC25,-0.000002,9.996478e-01,9.999088e-01,0.002725,0.002727,0.006944,0.004098,T_regs
287364,13609,SMOX,-0.000003,9.997161e-01,9.999088e-01,0.007821,0.007825,0.006944,0.008197,T_regs
287365,13610,POLR1E,-0.000014,9.997619e-01,9.999088e-01,0.234196,0.234210,0.256944,0.245902,T_regs
287366,13611,ZBTB11,-0.000005,9.998874e-01,9.999608e-01,0.123312,0.123318,0.131944,0.139344,T_regs


In [26]:
'PTPRC' in list(DE_df_full['Gene'])

True

In [27]:
DE_df_full

Unnamed: 0.1,Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_cluster,AveExpr_rest,percentExpr_cluster,percentExpr_rest,cluster
0,0,IGHM,0.946059,9.981746e-14,1.365403e-09,1.625146,0.679087,0.754902,0.383673,B_cells_memory
1,1,AC090498.1,0.781893,6.434098e-12,4.400601e-08,2.060740,1.278847,0.892157,0.693878,B_cells_memory
2,2,IGHD,0.252772,6.550533e-09,2.656846e-05,0.284881,0.032109,0.215686,0.032653,B_cells_memory
3,3,MT-ATP8,0.547031,7.769124e-09,2.656846e-05,3.347746,2.800715,1.000000,0.951020,B_cells_memory
4,4,POLD4,0.515404,4.388788e-08,8.830916e-05,1.173324,0.657919,0.784314,0.485714,B_cells_memory
...,...,...,...,...,...,...,...,...,...,...
287363,13608,TTC25,-0.000002,9.996478e-01,9.999088e-01,0.002725,0.002727,0.006944,0.004098,T_regs
287364,13609,SMOX,-0.000003,9.997161e-01,9.999088e-01,0.007821,0.007825,0.006944,0.008197,T_regs
287365,13610,POLR1E,-0.000014,9.997619e-01,9.999088e-01,0.234196,0.234210,0.256944,0.245902,T_regs
287366,13611,ZBTB11,-0.000005,9.998874e-01,9.999608e-01,0.123312,0.123318,0.131944,0.139344,T_regs


In [28]:
# filter the DE table according to cutoffs declared in the beginning of the notebook
# separately for upreg and downreg genes

DE_df = DE_df_full[(abs(DE_df_full['logFC']) > logFC_cutoff)
             & (DE_df_full['adj.P.Val'] < pval_cutoff)
             & (DE_df_full['percentExpr_cluster'] > per_cutoff)]

DE_df_upreg = DE_df[DE_df['logFC'] > 0]
DE_df_downreg = DE_df[DE_df['logFC'] < 0]

In [29]:
DE_df.shape

(10919, 10)

In [30]:
DE_df_upreg.shape

(8613, 10)

In [31]:
DE_df_downreg.shape

(2306, 10)

In [32]:
np.unique(DE_df_downreg['cluster'])

array(['B_cells_memory', 'B_cells_memory_activated', 'B_cells_naive',
       'B_cells_naive_activated', 'B_cells_oligoclonal', 'MAIT_cells',
       'MAIT_cells_activated', 'Myeloid_cells', 'NK_CD16_bright',
       'NK_CD16_bright_activated', 'NK_CD56_bright',
       'NK_CD56_bright_activated', 'T4_activated', 'T4_memory',
       'T4_naive', 'T8_activated_1', 'T8_memory', 'T8_naive', 'T_gd',
       'T_regs'], dtype=object)

In [33]:
# Build dictionary clusters2DE_genes
# separately for upreg and downreg genes

clusters_upreg = list(np.unique(DE_df_upreg['cluster'])) # any clusters in which there are upreg genes?
clusters_downreg = list(np.unique(DE_df_downreg['cluster'])) # any clusters in which there are downreg genes?

is_DE_upreg = {}
is_DE_downreg = {}

for cluster in clusters_upreg:
    is_DE_upreg[cluster] = list(DE_df_upreg[DE_df_upreg['cluster'] == cluster]['Gene'])
    
for cluster in clusters_downreg:
    is_DE_downreg[cluster] = list(DE_df_downreg[DE_df_downreg['cluster'] == cluster]['Gene'])

In [34]:
list(is_DE_upreg.keys())

['B_cells_memory',
 'B_cells_memory_activated',
 'B_cells_naive',
 'B_cells_naive_activated',
 'B_cells_oligoclonal',
 'MAIT_cells',
 'MAIT_cells_activated',
 'Myeloid_cells',
 'NK_CD16_bright',
 'NK_CD16_bright_activated',
 'NK_CD56_bright',
 'NK_CD56_bright_activated',
 'T4_activated',
 'T4_memory',
 'T4_naive',
 'T8_activated_1',
 'T8_activated_2',
 'T8_memory',
 'T8_naive',
 'T_gd',
 'T_regs']

In [35]:
len(list(is_DE_upreg.keys()))

21

In [36]:
len(list(is_DE_downreg.keys()))

20

In [37]:
set(list(is_DE_upreg.keys())) - set(list(is_DE_downreg.keys()))

{'T8_activated_2'}

In [38]:
# indeed, no downteg genes here
DE_df[DE_df['cluster'] == 'T8_activated_2']

Unnamed: 0.1,Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_cluster,AveExpr_rest,percentExpr_cluster,percentExpr_rest,cluster
219709,0,TRBV3-1,0.376654,8.645199e-07,0.011373,0.376654,0.0,0.129032,0.0,T8_activated_2
219710,1,RPL36AL,0.516651,2.07238e-06,0.013631,2.288892,1.772241,0.956989,0.841026,T8_activated_2
219711,2,RAB14,0.301769,1.259406e-05,0.049165,0.546634,0.244865,0.516129,0.230769,T8_activated_2
219712,3,STK24,0.248138,1.690054e-05,0.049165,0.3598,0.111662,0.344086,0.123077,T8_activated_2
219713,4,AC090498.1,0.469804,1.868687e-05,0.049165,1.205116,0.735311,0.666667,0.507692,T8_activated_2


In [39]:
len(is_DE_upreg['B_cells_memory'])

121

In [40]:
len(is_DE_downreg['B_cells_memory'])

2

In [41]:
for ct in list(is_DE_upreg.keys()):
    print(ct)
    print(len(is_DE_upreg[ct]), '\n')

B_cells_memory
121 

B_cells_memory_activated
4272 

B_cells_naive
721 

B_cells_naive_activated
719 

B_cells_oligoclonal
16 

MAIT_cells
27 

MAIT_cells_activated
16 

Myeloid_cells
396 

NK_CD16_bright
119 

NK_CD16_bright_activated
261 

NK_CD56_bright
30 

NK_CD56_bright_activated
23 

T4_activated
627 

T4_memory
442 

T4_naive
57 

T8_activated_1
119 

T8_activated_2
5 

T8_memory
101 

T8_naive
48 

T_gd
424 

T_regs
69 



In [42]:
for ct in list(is_DE_downreg.keys()):
    print(ct)
    print(len(is_DE_downreg[ct]), '\n')

B_cells_memory
2 

B_cells_memory_activated
133 

B_cells_naive
20 

B_cells_naive_activated
7 

B_cells_oligoclonal
1 

MAIT_cells
2 

MAIT_cells_activated
2 

Myeloid_cells
269 

NK_CD16_bright
7 

NK_CD16_bright_activated
31 

NK_CD56_bright
4 

NK_CD56_bright_activated
1 

T4_activated
1239 

T4_memory
30 

T4_naive
2 

T8_activated_1
41 

T8_memory
42 

T8_naive
4 

T_gd
418 

T_regs
51 



## Define cell pairs to test

In [43]:
len(list(genes_expr_per_cell_type.keys()))

21

In [44]:
list(genes_expr_per_cell_type.keys())

['B_cells_memory',
 'B_cells_memory_activated',
 'B_cells_naive',
 'B_cells_naive_activated',
 'B_cells_oligoclonal',
 'MAIT_cells',
 'MAIT_cells_activated',
 'Myeloid_cells',
 'NK_CD16_bright',
 'NK_CD16_bright_activated',
 'NK_CD56_bright',
 'NK_CD56_bright_activated',
 'T4_activated',
 'T4_memory',
 'T4_naive',
 'T8_activated_1',
 'T8_activated_2',
 'T8_memory',
 'T8_naive',
 'T_gd',
 'T_regs']

In [45]:
list(genes_expr_per_cell_type.keys())

['B_cells_memory',
 'B_cells_memory_activated',
 'B_cells_naive',
 'B_cells_naive_activated',
 'B_cells_oligoclonal',
 'MAIT_cells',
 'MAIT_cells_activated',
 'Myeloid_cells',
 'NK_CD16_bright',
 'NK_CD16_bright_activated',
 'NK_CD56_bright',
 'NK_CD56_bright_activated',
 'T4_activated',
 'T4_memory',
 'T4_naive',
 'T8_activated_1',
 'T8_activated_2',
 'T8_memory',
 'T8_naive',
 'T_gd',
 'T_regs']

In [46]:
# Get all pairwise cluster combinations: A--B, B--A
# Consider the reverse interaction too: A--B but B--A as well
pairwise_cluster_combinations = list(itertools.permutations(list(genes_expr_per_cell_type.keys()), 2))
len(pairwise_cluster_combinations)


420

In [47]:
pairwise_cluster_combinations[:5]

[('B_cells_memory', 'B_cells_memory_activated'),
 ('B_cells_memory', 'B_cells_naive'),
 ('B_cells_memory', 'B_cells_naive_activated'),
 ('B_cells_memory', 'B_cells_oligoclonal'),
 ('B_cells_memory', 'MAIT_cells')]

In [48]:
# add self interactions
self_inter_combinations = [(ct, ct) for ct in list(genes_expr_per_cell_type.keys())]
pairwise_cluster_combinations = pairwise_cluster_combinations + self_inter_combinations
len(pairwise_cluster_combinations)


441

In [49]:
len(list(is_DE_upreg.keys()))

21

In [50]:
len(list(is_DE_downreg.keys()))

20

In [52]:
list(is_DE_upreg.keys())

['B_cells_memory',
 'B_cells_memory_activated',
 'B_cells_naive',
 'B_cells_naive_activated',
 'B_cells_oligoclonal',
 'MAIT_cells',
 'MAIT_cells_activated',
 'Myeloid_cells',
 'NK_CD16_bright',
 'NK_CD16_bright_activated',
 'NK_CD56_bright',
 'NK_CD56_bright_activated',
 'T4_activated',
 'T4_memory',
 'T4_naive',
 'T8_activated_1',
 'T8_activated_2',
 'T8_memory',
 'T8_naive',
 'T_gd',
 'T_regs']

In [53]:
# We only want to test pairs including at least one celltype in the DE folder

pairwise_cluster_combinations_upreg = [elem for elem in pairwise_cluster_combinations if elem[0] in list(is_DE_upreg.keys()) or elem[1] in list(is_DE_upreg.keys())]
pairwise_cluster_combinations_downreg = [elem for elem in pairwise_cluster_combinations if elem[0] in list(is_DE_downreg.keys()) or elem[1] in list(is_DE_downreg.keys())]

In [54]:
len(pairwise_cluster_combinations_upreg)

441

In [55]:
len(pairwise_cluster_combinations_downreg)

440

In [56]:
# Make cluster pair labels: celltypeA--celltypeB             
cluster_combinations_labels_upreg = [comb[0] + '---' + comb[1] for comb in pairwise_cluster_combinations_upreg]
cluster_combinations_labels_downreg = [comb[0] + '---' + comb[1] for comb in pairwise_cluster_combinations_downreg]

In [57]:
len(cluster_combinations_labels_upreg)

441

In [58]:
len(cluster_combinations_labels_downreg)

440

# Retrieve CellphoneDB L/R interactions

A relevant interaction shoudl have

1. All their participants expressed in the corresponding celltypes
2. At least one participant is a DEG

In [59]:
len(Int2Gene.keys())

555

In [60]:
# Make scaffold matrix: L/R interactions (rows) x celltype pairs (columns)
df_Exrp_LR_in_celltype_pairs_upreg = pd.DataFrame(index = list(Int2Gene.keys()),
                                          columns = cluster_combinations_labels_upreg,
                                          data = np.zeros( (len(list(Int2Gene.keys())), 
                                                            len(cluster_combinations_labels_upreg)) )
                                         )

df_Exrp_LR_in_celltype_pairs_downreg = pd.DataFrame(index = list(Int2Gene.keys()),
                                          columns = cluster_combinations_labels_downreg,
                                          data = np.zeros( (len(list(Int2Gene.keys())), 
                                                            len(cluster_combinations_labels_downreg)) )
                                         )

In [61]:
df_Exrp_LR_in_celltype_pairs_upreg

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---B_cells_naive,B_cells_memory---B_cells_naive_activated,B_cells_memory---B_cells_oligoclonal,B_cells_memory---MAIT_cells,B_cells_memory---MAIT_cells_activated,B_cells_memory---Myeloid_cells,B_cells_memory---NK_CD16_bright,B_cells_memory---NK_CD16_bright_activated,B_cells_memory---NK_CD56_bright,...,NK_CD56_bright_activated---NK_CD56_bright_activated,T4_activated---T4_activated,T4_memory---T4_memory,T4_naive---T4_naive,T8_activated_1---T8_activated_1,T8_activated_2---T8_activated_2,T8_memory---T8_memory,T8_naive---T8_naive,T_gd---T_gd,T_regs---T_regs
FZD1_LRP5_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD1_LRP6_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD2_LRP5_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD2_LRP6_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD3_LRP5_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CLEC2B_KLRF1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TSLP_TSLPR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CRLF2_TSLPR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ESAM_ESAM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [62]:
df_Exrp_LR_in_celltype_pairs_downreg

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---B_cells_naive,B_cells_memory---B_cells_naive_activated,B_cells_memory---B_cells_oligoclonal,B_cells_memory---MAIT_cells,B_cells_memory---MAIT_cells_activated,B_cells_memory---Myeloid_cells,B_cells_memory---NK_CD16_bright,B_cells_memory---NK_CD16_bright_activated,B_cells_memory---NK_CD56_bright,...,NK_CD56_bright---NK_CD56_bright,NK_CD56_bright_activated---NK_CD56_bright_activated,T4_activated---T4_activated,T4_memory---T4_memory,T4_naive---T4_naive,T8_activated_1---T8_activated_1,T8_memory---T8_memory,T8_naive---T8_naive,T_gd---T_gd,T_regs---T_regs
FZD1_LRP5_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD1_LRP6_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD2_LRP5_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD2_LRP6_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD3_LRP5_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CLEC2B_KLRF1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TSLP_TSLPR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CRLF2_TSLPR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ESAM_ESAM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [63]:
%%time

# LR_pairs_celltype_pairs_df will be a binary matrix with 1 indicating that all the genes in the interaction are expressed in the corresponding celltype
# So, fill 1 if all genes are expressed in all clusters

for interaction in list(df_Exrp_LR_in_celltype_pairs_upreg.index):
    
    #if interaction == 'COL19A1_integrin_a1b1_complex':
        #print('interaction', interaction, list(df_Exrp_LR_in_celltype_pairs_upreg.index).index(interaction)+1, 
        #  'out of', len(list(df_Exrp_LR_in_celltype_pairs_upreg.index)))
    for ct_pair in list(df_Exrp_LR_in_celltype_pairs_upreg.columns):
        #print(ct_pair)
        
        ct_A = ct_pair.split('---')[0]
        ct_B = ct_pair.split('---')[1]
        
        partner_A_genes = Int2Gene[interaction]['partner_a']
        partner_B_genes = Int2Gene[interaction]['partner_b']
        
        # are all partner_A genes expressed in celltype_A and are all partner_B genes expressed in celltype_B?
        are_all_expressed = all(elem in genes_expr_per_cell_type[ct_A] for elem in partner_A_genes) & all(elem in genes_expr_per_cell_type[ct_B] for elem in partner_B_genes)
        
        #if interaction == 'COL19A1_integrin_a1b1_complex':
            #print('cell type pair:', ct_pair)
            #print('curr partner_A_genes', partner_A_genes)
            #print('curr partner_B_genes', partner_B_genes)
            #print('are_all_expressed?', are_all_expressed)
            
        
        if are_all_expressed:
            df_Exrp_LR_in_celltype_pairs_upreg.loc[interaction, ct_pair] = 1


CPU times: user 32 s, sys: 3.25 ms, total: 32 s
Wall time: 32 s


In [64]:
%%time

# LR_pairs_celltype_pairs_df will be a binary matrix with 1 indicating that all the genes in the interaction are expressed in the corresponding celltype
# So, fill 1 if all genes are expressed in all clusters

for interaction in list(df_Exrp_LR_in_celltype_pairs_downreg.index):
    #print('interaction', interaction, list(df_Exrp_LR_in_celltype_pairs_downreg.index).index(interaction)+1, 
    #      'out of', len(list(df_Exrp_LR_in_celltype_pairs_downreg.index)))
    for ct_pair in list(df_Exrp_LR_in_celltype_pairs_downreg.columns):
        #print(ct_pair)
        
        ct_A = ct_pair.split('---')[0]
        ct_B = ct_pair.split('---')[1]
        
        partner_A_genes = Int2Gene[interaction]['partner_a']
        partner_B_genes = Int2Gene[interaction]['partner_b']
        
        # are all partner_A genes expressed in celltype_A and are all partner_B genes expressed in celltype_B?
        are_all_expressed = all(elem in genes_expr_per_cell_type[ct_A] for elem in partner_A_genes) & all(elem in genes_expr_per_cell_type[ct_B] for elem in partner_B_genes)
        
        #if interaction == 'CD40LG_integrin_a5b1_complex' and ct_A == 'T4_activated' and ct_B == 'B_cells_oligoclonal':
            #print('cell type pair:', ct_pair)
            #print('interaction', interaction)
            #print('curr partner_A_genes', partner_A_genes)
            #print('curr partner_B_genes', partner_B_genes)
            #print('are all the partner_A_genes expressed?', all(elem in genes_expr_per_cell_type[ct_A] for elem in partner_A_genes))
            #print('are all the partner_B_genes expressed?', all(elem in genes_expr_per_cell_type[ct_B] for elem in partner_B_genes))
            #print('are_all_expressed?', are_all_expressed)
        
        
        if are_all_expressed:
            df_Exrp_LR_in_celltype_pairs_downreg.loc[interaction, ct_pair] = 1


CPU times: user 31.3 s, sys: 11.1 ms, total: 31.4 s
Wall time: 31.4 s


In [65]:
np.unique(df_Exrp_LR_in_celltype_pairs_upreg.values, return_counts=True)

(array([0., 1.]), array([238243,   6512]))

In [66]:
np.unique(df_Exrp_LR_in_celltype_pairs_downreg.values, return_counts=True)

(array([0., 1.]), array([237705,   6495]))

In [67]:
np.unique(df_Exrp_LR_in_celltype_pairs_upreg.sum(axis=0))

array([ 2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13., 14.,
       15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27.,
       28., 29., 30., 31., 33., 34., 37., 38., 39., 42., 43., 45.])

In [68]:
np.unique(df_Exrp_LR_in_celltype_pairs_downreg.sum(axis=0))

array([ 2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13., 14.,
       15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27.,
       28., 29., 30., 31., 33., 34., 37., 38., 39., 42., 43., 45.])

In [69]:
df_Exrp_LR_in_celltype_pairs_upreg.shape

(555, 441)

In [70]:
df_Exrp_LR_in_celltype_pairs_downreg.shape

(555, 440)

In [71]:
# keep celltype pairs with at least one expressed interaction

df_Exrp_LR_in_celltype_pairs_upreg = df_Exrp_LR_in_celltype_pairs_upreg.loc[:, (df_Exrp_LR_in_celltype_pairs_upreg != 0).any(axis=0)]
print(df_Exrp_LR_in_celltype_pairs_upreg.shape)

df_Exrp_LR_in_celltype_pairs_downreg = df_Exrp_LR_in_celltype_pairs_downreg.loc[:, (df_Exrp_LR_in_celltype_pairs_downreg != 0).any(axis=0)]
print(df_Exrp_LR_in_celltype_pairs_downreg.shape)

(555, 441)
(555, 440)


In [72]:
# keep interactions with at least one celltype_pair
df_Exrp_LR_in_celltype_pairs_upreg = df_Exrp_LR_in_celltype_pairs_upreg.loc[(df_Exrp_LR_in_celltype_pairs_upreg != 0).any(axis=1), :]
print(df_Exrp_LR_in_celltype_pairs_upreg.shape)

df_Exrp_LR_in_celltype_pairs_downreg = df_Exrp_LR_in_celltype_pairs_downreg.loc[(df_Exrp_LR_in_celltype_pairs_downreg != 0).any(axis=1), :]
print(df_Exrp_LR_in_celltype_pairs_downreg.shape)

(102, 441)
(102, 440)


In [73]:
df_Exrp_LR_in_celltype_pairs_downreg

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---B_cells_naive,B_cells_memory---B_cells_naive_activated,B_cells_memory---B_cells_oligoclonal,B_cells_memory---MAIT_cells,B_cells_memory---MAIT_cells_activated,B_cells_memory---Myeloid_cells,B_cells_memory---NK_CD16_bright,B_cells_memory---NK_CD16_bright_activated,B_cells_memory---NK_CD56_bright,...,NK_CD56_bright---NK_CD56_bright,NK_CD56_bright_activated---NK_CD56_bright_activated,T4_activated---T4_activated,T4_memory---T4_memory,T4_naive---T4_naive,T8_activated_1---T8_activated_1,T8_memory---T8_memory,T8_naive---T8_naive,T_gd---T_gd,T_regs---T_regs
PVR_CD96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_CD226,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_TIGIT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NOTCH1_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
NOTCH2_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LTBR_LTB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CCR4_CCL17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRPG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
LAIR1_LILRB4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [74]:
list(df_Exrp_LR_in_celltype_pairs_downreg.index) == list(df_Exrp_LR_in_celltype_pairs_upreg.index)

True

In [75]:
np.unique(df_Exrp_LR_in_celltype_pairs_upreg.values, return_counts=True)

(array([0., 1.]), array([38470,  6512]))

In [76]:
np.unique(df_Exrp_LR_in_celltype_pairs_downreg.values, return_counts=True)

(array([0., 1.]), array([38385,  6495]))

In [77]:
np.sum(df_Exrp_LR_in_celltype_pairs_upreg.values)

6512.0

In [78]:
np.sum(df_Exrp_LR_in_celltype_pairs_downreg.values)

6495.0

In [79]:
# Initialize DE matrix from LR_pairs_celltype_pairs_df and set all values to 0
# DE will be a binary matrix with 1 indicating that all the genes in the interaction are expressed in the corresponding celltype
# and one is a DE in the celltypes of interests
# Make scaffold matrix: L/R interactions (rows) x celltype pairs (columns)
df_Exrp_LR_in_celltype_pairs_upreg_DE = pd.DataFrame(index = list(df_Exrp_LR_in_celltype_pairs_upreg.index),
                                          columns = list(df_Exrp_LR_in_celltype_pairs_upreg.columns),
                                            data = np.zeros( (len(list(df_Exrp_LR_in_celltype_pairs_upreg.index)), 
                                                            len(list(df_Exrp_LR_in_celltype_pairs_upreg.columns))))
                                            )
df_Exrp_LR_in_celltype_pairs_downreg_DE = pd.DataFrame(index = list(df_Exrp_LR_in_celltype_pairs_downreg.index),
                                          columns = list(df_Exrp_LR_in_celltype_pairs_downreg.columns),
                                            data = np.zeros( (len(list(df_Exrp_LR_in_celltype_pairs_downreg.index)), 
                                                            len(list(df_Exrp_LR_in_celltype_pairs_downreg.columns))))
                                            )

In [80]:
%%time
# So, fill 1 if at least one gene in the interaction is DE
for interaction in list(df_Exrp_LR_in_celltype_pairs_upreg_DE.index):
    #print('interaction', interaction, list(df_Exrp_LR_in_celltype_pairs_upreg_DE.index).index(interaction)+1, 
    #      'out of', len(list(df_Exrp_LR_in_celltype_pairs_upreg_DE.index)))
    for ct_pair in list(df_Exrp_LR_in_celltype_pairs_upreg_DE.columns):
        #print(ct_pair)
        
        ct_A = ct_pair.split('---')[0]
        ct_B = ct_pair.split('---')[1]
        
        partner_A_genes = Int2Gene[interaction]['partner_a']
        partner_B_genes = Int2Gene[interaction]['partner_b']
        
        # are partner_A genes DE in celltype_A OR are partner_B genes DE in celltype_B?
        are_any_DE = all(elem in is_DE_upreg[ct_A] for elem in partner_A_genes) | all(elem in is_DE_upreg[ct_B] for elem in partner_B_genes)  
        
        if are_any_DE & (df_Exrp_LR_in_celltype_pairs_upreg.loc[interaction, ct_pair] == 1):
            df_Exrp_LR_in_celltype_pairs_upreg_DE.loc[interaction, ct_pair] = 1

CPU times: user 1.14 s, sys: 27 µs, total: 1.14 s
Wall time: 1.14 s


In [81]:
df_Exrp_LR_in_celltype_pairs_downreg_DE

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---B_cells_naive,B_cells_memory---B_cells_naive_activated,B_cells_memory---B_cells_oligoclonal,B_cells_memory---MAIT_cells,B_cells_memory---MAIT_cells_activated,B_cells_memory---Myeloid_cells,B_cells_memory---NK_CD16_bright,B_cells_memory---NK_CD16_bright_activated,B_cells_memory---NK_CD56_bright,...,NK_CD56_bright---NK_CD56_bright,NK_CD56_bright_activated---NK_CD56_bright_activated,T4_activated---T4_activated,T4_memory---T4_memory,T4_naive---T4_naive,T8_activated_1---T8_activated_1,T8_memory---T8_memory,T8_naive---T8_naive,T_gd---T_gd,T_regs---T_regs
PVR_CD96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_CD226,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_TIGIT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NOTCH1_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NOTCH2_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LTBR_LTB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CCR4_CCL17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRPG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LAIR1_LILRB4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [85]:
%%time
# So, fill 1 if at least one gene in the interaction is DE
for interaction in list(df_Exrp_LR_in_celltype_pairs_downreg_DE.index):
    #print('interaction', interaction, list(df_Exrp_LR_in_celltype_pairs_downreg_DE.index).index(interaction)+1, 
    #      'out of', len(list(df_Exrp_LR_in_celltype_pairs_downreg_DE.index)))
    for ct_pair in list(df_Exrp_LR_in_celltype_pairs_downreg_DE.columns):
        #print(ct_pair)
        
        ct_A = ct_pair.split('---')[0]
        ct_B = ct_pair.split('---')[1]
        
        partner_A_genes = Int2Gene[interaction]['partner_a']
        partner_B_genes = Int2Gene[interaction]['partner_b']
        
        # so cell type T8_activated_2 will throw an error here because it doesn't have downreg DE genes, so needs a special if
        if ct_A == 'T8_activated_2':
            # if ct_A is T8_activated_2, we only care about if partner_B_genes are downreg in this case
            are_any_DE = all(elem in is_DE_downreg[ct_B] for elem in partner_B_genes)
        elif ct_B == 'T8_activated_2':
            # if ct_B is T8_activated_2, we only care about if partner_A_genes are downreg in this case
            are_any_DE = all(elem in is_DE_downreg[ct_A] for elem in partner_A_genes)
        else:
            # if it's not T8_activated_2, proceed as normal
            # are partner_A genes DE in celltype_A OR are partner_B genes DE in celltype_B?
            are_any_DE = all(elem in is_DE_downreg[ct_A] for elem in partner_A_genes) | all(elem in is_DE_downreg[ct_B] for elem in partner_B_genes)
        
        if are_any_DE & (df_Exrp_LR_in_celltype_pairs_downreg.loc[interaction, ct_pair] == 1):
            df_Exrp_LR_in_celltype_pairs_downreg_DE.loc[interaction, ct_pair] = 1

CPU times: user 662 ms, sys: 0 ns, total: 662 ms
Wall time: 666 ms


In [86]:
df_Exrp_LR_in_celltype_pairs_upreg_DE

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---B_cells_naive,B_cells_memory---B_cells_naive_activated,B_cells_memory---B_cells_oligoclonal,B_cells_memory---MAIT_cells,B_cells_memory---MAIT_cells_activated,B_cells_memory---Myeloid_cells,B_cells_memory---NK_CD16_bright,B_cells_memory---NK_CD16_bright_activated,B_cells_memory---NK_CD56_bright,...,NK_CD56_bright_activated---NK_CD56_bright_activated,T4_activated---T4_activated,T4_memory---T4_memory,T4_naive---T4_naive,T8_activated_1---T8_activated_1,T8_activated_2---T8_activated_2,T8_memory---T8_memory,T8_naive---T8_naive,T_gd---T_gd,T_regs---T_regs
PVR_CD96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_CD226,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_TIGIT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NOTCH1_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NOTCH2_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LTBR_LTB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CCR4_CCL17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRPG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LAIR1_LILRB4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [87]:
df_Exrp_LR_in_celltype_pairs_downreg_DE

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---B_cells_naive,B_cells_memory---B_cells_naive_activated,B_cells_memory---B_cells_oligoclonal,B_cells_memory---MAIT_cells,B_cells_memory---MAIT_cells_activated,B_cells_memory---Myeloid_cells,B_cells_memory---NK_CD16_bright,B_cells_memory---NK_CD16_bright_activated,B_cells_memory---NK_CD56_bright,...,NK_CD56_bright---NK_CD56_bright,NK_CD56_bright_activated---NK_CD56_bright_activated,T4_activated---T4_activated,T4_memory---T4_memory,T4_naive---T4_naive,T8_activated_1---T8_activated_1,T8_memory---T8_memory,T8_naive---T8_naive,T_gd---T_gd,T_regs---T_regs
PVR_CD96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_CD226,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_TIGIT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NOTCH1_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NOTCH2_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LTBR_LTB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CCR4_CCL17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRPG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LAIR1_LILRB4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [88]:
# UPREG interactions

# keep interactions expressed in at least one celltype pair
df_Exrp_LR_in_celltype_pairs_upreg_DE = df_Exrp_LR_in_celltype_pairs_upreg_DE.loc[:, (df_Exrp_LR_in_celltype_pairs_upreg_DE != 0).any(axis=0)]
print('shape after filtering interactions')
print(df_Exrp_LR_in_celltype_pairs_upreg_DE.shape, '\n')

# keep celltypepairs with at least one interaction

df_Exrp_LR_in_celltype_pairs_upreg_DE = df_Exrp_LR_in_celltype_pairs_upreg_DE.loc[(df_Exrp_LR_in_celltype_pairs_upreg_DE != 0).any(axis=1),:]
print('shape after filtering cell type pairs')
print(df_Exrp_LR_in_celltype_pairs_upreg_DE.shape, '\n')

shape after filtering interactions
(102, 320) 

shape after filtering cell type pairs
(77, 320) 



In [89]:
# DOWNREG interactions

# keep interactions expressed in at least one celltype pair
df_Exrp_LR_in_celltype_pairs_downreg_DE = df_Exrp_LR_in_celltype_pairs_downreg_DE.loc[:, (df_Exrp_LR_in_celltype_pairs_downreg_DE != 0).any(axis=0)]
print('shape after filtering interactions')
print(df_Exrp_LR_in_celltype_pairs_downreg_DE.shape, '\n')

# keep celltypepairs with at least one interaction

df_Exrp_LR_in_celltype_pairs_downreg_DE = df_Exrp_LR_in_celltype_pairs_downreg_DE.loc[(df_Exrp_LR_in_celltype_pairs_downreg_DE != 0).any(axis=1),:]
print('shape after filtering cell type pairs')
print(df_Exrp_LR_in_celltype_pairs_downreg_DE.shape, '\n')

shape after filtering interactions
(102, 198) 

shape after filtering cell type pairs
(39, 198) 



In [90]:
np.unique(df_Exrp_LR_in_celltype_pairs_upreg_DE.values, return_counts=True)

(array([0., 1.]), array([23332,  1308]))

In [91]:
np.unique(df_Exrp_LR_in_celltype_pairs_downreg_DE.values, return_counts=True)

(array([0., 1.]), array([7003,  719]))

In [92]:
df_Exrp_LR_in_celltype_pairs_upreg_DE

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---B_cells_naive_activated,B_cells_memory---MAIT_cells,B_cells_memory---MAIT_cells_activated,B_cells_memory---Myeloid_cells,B_cells_memory---NK_CD16_bright,B_cells_memory---NK_CD16_bright_activated,B_cells_memory---NK_CD56_bright,B_cells_memory---NK_CD56_bright_activated,B_cells_memory---T4_activated,...,B_cells_memory---B_cells_memory,B_cells_memory_activated---B_cells_memory_activated,B_cells_naive_activated---B_cells_naive_activated,Myeloid_cells---Myeloid_cells,NK_CD16_bright---NK_CD16_bright,NK_CD16_bright_activated---NK_CD16_bright_activated,T4_activated---T4_activated,T4_memory---T4_memory,T8_memory---T8_memory,T_gd---T_gd
PVR_CD96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_TIGIT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NOTCH1_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
SIRPA_CD47,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
LGALS9_HAVCR2,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CD52_SIGLEC10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LTBR_LTB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRPG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LAIR1_LILRB4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [93]:
df_Exrp_LR_in_celltype_pairs_downreg_DE

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---T4_activated,B_cells_memory_activated---B_cells_memory,B_cells_memory_activated---B_cells_naive,B_cells_memory_activated---B_cells_naive_activated,B_cells_memory_activated---B_cells_oligoclonal,B_cells_memory_activated---MAIT_cells,B_cells_memory_activated---MAIT_cells_activated,B_cells_memory_activated---Myeloid_cells,B_cells_memory_activated---NK_CD16_bright,...,T_regs---T8_naive,T_regs---T_gd,B_cells_memory_activated---B_cells_memory_activated,NK_CD16_bright_activated---NK_CD16_bright_activated,T4_activated---T4_activated,T4_memory---T4_memory,T8_activated_1---T8_activated_1,T8_memory---T8_memory,T_gd---T_gd,T_regs---T_regs
CD40LG_integrin_a5b1_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FCER2_integrin_aMb2_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ICAM1_integrin_aMb2_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FCER2_integrin_aXb2_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ICAM1_integrin_aXb2_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FCER2_CR2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CXCR3_CXCL9,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
DPP4_CXCL9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD94:NKG2A_HLA-E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ICAM1_SPN,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Save results

In [94]:
save_path = '/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/202102_twins_reanalysis/'

In [95]:
# So DE is our output matrix
# Filter it accordingly for visualization
df_Exrp_LR_in_celltype_pairs_upreg_DE.to_csv(save_path + '20210416_cellphone_interactions_upreg_in_CVID_twin_no_logFC_cutoff.csv')
df_Exrp_LR_in_celltype_pairs_downreg_DE.to_csv(save_path + '20210416_cellphone_interactions_downreg_in_CVID_twin_no_logFC_cutoff.csv')

In [96]:
save_path

'/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/202102_twins_reanalysis/'

## Save results in a more readable format

Gene by gene breakdown with added DEG stats

### Upreg interactions

In [97]:
faulty_index_count = 0

vec2_append_upreg = {}

# row count
curr_count = 0

for interaction in df_Exrp_LR_in_celltype_pairs_upreg_DE.index:
    
    print(interaction, list(df_Exrp_LR_in_celltype_pairs_upreg_DE.index).index(interaction) + 1, 
          'out of', len(df_Exrp_LR_in_celltype_pairs_upreg_DE.index))
    # current row
    curr_table = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_upreg_DE.loc[interaction])
    curr_table = curr_table[curr_table[interaction] > 0]
    
    for celltype_pair in list(curr_table.index):
        
        # row by row
        vec2_append_upreg[str(curr_count)] = {}
        
        vec2_append_upreg[str(curr_count)]['interaction'] = interaction
        
        #print(celltype_pair)
        
        # getting genes, these are lists of length 1 for simple interactions and > 1 for complexes
        curr_partner_A_genes = Int2Gene[interaction]['partner_a']
        curr_partner_B_genes = Int2Gene[interaction]['partner_b']
        
        #print('curr partner A genes', curr_partner_A_genes, 'len:', len(curr_partner_A_genes))
        #print('curr partner B genes', curr_partner_B_genes, 'len:', len(curr_partner_B_genes))
        
        vec2_append_upreg[str(curr_count)]['partner_A_genes'] = curr_partner_A_genes
        vec2_append_upreg[str(curr_count)]['partner_B_genes'] = curr_partner_B_genes
        
        curr_celltype_A = celltype_pair.split('---')[0]
        curr_celltype_B = celltype_pair.split('---')[1]
        
        vec2_append_upreg[str(curr_count)]['celltype_A'] = curr_celltype_A
        vec2_append_upreg[str(curr_count)]['celltype_B'] = curr_celltype_B
        
        
        # are all partner_A genes DE in celltype_A and are all partner_B genes DE in celltype_B?
        # these DE table subsets have been filtered already according to cutoffs declared in the beginning of the notebook
        curr_celltype_A_DE_table_subset = DE_df_upreg[DE_df_upreg['cluster'] == curr_celltype_A]
        curr_celltype_A_DE_table_subset.set_index('Gene', inplace=True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_A_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_A])
        
        curr_celltype_B_DE_table_subset = DE_df_upreg[DE_df_upreg['cluster'] == curr_celltype_B]
        curr_celltype_B_DE_table_subset.set_index('Gene', inplace=True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_B_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_B])

        
        # if partner A is DE, add stats
        if all(elem in list(curr_celltype_A_DE_table_subset.index) for elem in curr_partner_A_genes):
            vec2_append_upreg[str(curr_count)]['is_partner_A_DE'] = True
            vec2_append_upreg[str(curr_count)]['logFC_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'logFC'])
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'adj.P.Val'])
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'percentExpr_cluster'])
            
        else: # if not DE, add 'NA'
            vec2_append_upreg[str(curr_count)]['is_partner_A_DE'] = False
            vec2_append_upreg[str(curr_count)]['logFC_gene_A'] = 'NA'
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_A'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_per_df_full.loc[curr_partner_A_genes,curr_celltype_A])
            
            
            
        # if partner B is DE, add stats
        if all(elem in list(curr_celltype_B_DE_table_subset.index) for elem in curr_partner_B_genes):
            vec2_append_upreg[str(curr_count)]['is_partner_B_DE'] = True
            vec2_append_upreg[str(curr_count)]['logFC_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'logFC'])
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'adj.P.Val'])
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'percentExpr_cluster'])
        else: # if not DE, add 'NA'
            vec2_append_upreg[str(curr_count)]['is_partner_B_DE'] = False
            vec2_append_upreg[str(curr_count)]['logFC_gene_B'] = 'NA'
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_B'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_per_df_full.loc[curr_partner_B_genes,curr_celltype_B])
            
        curr_count += 1
    
    

PVR_CD96 1 out of 77
PVR_TIGIT 2 out of 77
NOTCH1_DLL3 3 out of 77
SIRPA_CD47 4 out of 77
LGALS9_HAVCR2 5 out of 77
COL19A1_integrin_a1b1_complex 6 out of 77
PLAUR_integrin_a4b1_complex 7 out of 77
TGFB1_TGFBR3 8 out of 77
C3_integrin_aMb2_complex 9 out of 77
FCER2_integrin_aXb2_complex 10 out of 77
ICAM1_integrin_aXb2_complex 11 out of 77
FCER2_CR2 12 out of 77
CXCR3_CXCL9 13 out of 77
CD8_receptor_LCK 14 out of 77
CD94:NKG2A_HLA-E 15 out of 77
TNFRSF13B_TNFSF13B 16 out of 77
TNFRSF17_TNFSF13B 17 out of 77
TNFRSF13C_TNFSF13B 18 out of 77
CD74_APP 19 out of 77
ICAM1_ITGAL 20 out of 77
ICAM1_integrin_aLb2_complex 21 out of 77
ICAM2_integrin_aLb2_complex 22 out of 77
ICAM3_integrin_aLb2_complex 23 out of 77
FAS_FASLG 24 out of 77
HLA-F_LILRB2 25 out of 77
HLA-G_LILRB2 26 out of 77
HLA-F_LILRB1 27 out of 77
HLA-G_LILRB1 28 out of 77
CCL3_CCR5 29 out of 77
CCL5_CCR5 30 out of 77
KLRB1_CLEC2D 31 out of 77
TNF_TNFRSF1A 32 out of 77
LTA_TNFRSF1A 33 out of 77
TNF_TNFRSF1B 34 out of 77
LTA_TNFR

In [98]:
# outlining the final table format
df_output_upreg = pd.DataFrame(columns = ['interaction',
                                     'partner_A_genes',
                                     'partner_B_genes',
                                     'celltype_A',
                                     'celltype_B',
                                     'is_partner_A_DE',
                                     'logFC_gene_A',
                                     'adj_pval_gene_A',
                                     'percent_expr_gene_A',
                                     'is_partner_B_DE',
                                     'logFC_gene_B',
                                     'adj_pval_gene_B',
                                     'percent_expr_gene_B'],
                              index = list(vec2_append_upreg.keys())
                            )
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,,,,,,,,,,,,,
1,,,,,,,,,,,,,
2,,,,,,,,,,,,,
3,,,,,,,,,,,,,
4,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1303,,,,,,,,,,,,,
1304,,,,,,,,,,,,,
1305,,,,,,,,,,,,,
1306,,,,,,,,,,,,,


In [99]:
list(df_output_upreg.columns) == list(vec2_append_upreg['0'].keys())

True

In [100]:
len(vec2_append_upreg.keys())

1308

In [101]:
vec2_append_upreg['0'].keys()

dict_keys(['interaction', 'partner_A_genes', 'partner_B_genes', 'celltype_A', 'celltype_B', 'is_partner_A_DE', 'logFC_gene_A', 'adj_pval_gene_A', 'percent_expr_gene_A', 'is_partner_B_DE', 'logFC_gene_B', 'adj_pval_gene_B', 'percent_expr_gene_B'])

In [102]:
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,,,,,,,,,,,,,
1,,,,,,,,,,,,,
2,,,,,,,,,,,,,
3,,,,,,,,,,,,,
4,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1303,,,,,,,,,,,,,
1304,,,,,,,,,,,,,
1305,,,,,,,,,,,,,
1306,,,,,,,,,,,,,


In [103]:
%%time

for i in list(vec2_append_upreg.keys()):
    #print(i)
    curr_keys = list(vec2_append_upreg[i].keys())
    for col in curr_keys:
        df_output_upreg.loc[i,col] = vec2_append_upreg[i][col]

CPU times: user 973 ms, sys: 84 µs, total: 973 ms
Wall time: 975 ms


In [104]:
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,PVR_CD96,[PVR],[CD96],Myeloid_cells,B_cells_memory_activated,False,,,[0.1017543859649122],True,[0.0430018529061859],[2.78149258992389e-07],[0.18287]
1,PVR_CD96,[PVR],[CD96],Myeloid_cells,T4_activated,False,,,[0.1017543859649122],True,[0.117721809647336],[1.2630769083619e-07],[0.265455]
2,PVR_CD96,[PVR],[CD96],Myeloid_cells,T_gd,False,,,[0.1017543859649122],True,[0.2227618919960119],[0.023121295879651],[0.489971]
3,PVR_TIGIT,[PVR],[TIGIT],Myeloid_cells,T4_activated,False,,,[0.1017543859649122],True,[0.12805564040011],[4.3144925507570896e-05],[0.261818]
4,PVR_TIGIT,[PVR],[TIGIT],Myeloid_cells,T4_memory,False,,,[0.1017543859649122],True,[0.207892137944771],[1.2808793567639798e-06],[0.227229]
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1303,CLEC2B_KLRF1,[CLEC2B],[KLRF1],T4_naive,NK_CD16_bright_activated,False,,,[0.2266666666666666],True,[0.18635361631676],[0.036152375098562],[0.297048]
1304,CLEC2B_KLRF1,[CLEC2B],[KLRF1],T8_memory,NK_CD16_bright_activated,False,,,[0.4068100358422939],True,[0.18635361631676],[0.036152375098562],[0.297048]
1305,CLEC2B_KLRF1,[CLEC2B],[KLRF1],T8_naive,NK_CD16_bright_activated,False,,,[0.2328431372549019],True,[0.18635361631676],[0.036152375098562],[0.297048]
1306,CLEC2B_KLRF1,[CLEC2B],[KLRF1],T_gd,NK_CD16_bright_activated,False,,,[0.3802008608321377],True,[0.18635361631676],[0.036152375098562],[0.297048]


In [105]:
# getting rid of the square parentheses [] in all the values

cols2correct = ['partner_A_genes', 'partner_B_genes', 'logFC_gene_A', 'adj_pval_gene_A',
       'percent_expr_gene_A', 'logFC_gene_B',
       'adj_pval_gene_B', 'percent_expr_gene_B']

for row in list(df_output_upreg.index):
    #print('row', row)
    for col in cols2correct:
        #print('column', col)
        curr_value = df_output_upreg.loc[row, col] # with []
        #print(curr_value)
        if (curr_value != 'NA') & (len(curr_value) == 1): # ignoring NAs and lists of length > 1 - complex genes that is
        #if not isinstance(curr_value, list) & (curr_value != 'NA'): # ignoring NAs and lists of length > 1 - complex genes that is
            df_output_upreg.loc[row, col] = curr_value[0] # this just get the element - string if a gene, numerical value if it's a stat
            

### Splitting tables into 2 tables: simple interactions and complex interactions, latter being deconvoluted into pseudo-interactions for each subunit of a complex

In [106]:
# getting indices of complex interactions
complex_interaction_rows_upreg = []

for n_row in list(df_output_upreg.index):
    #print('row', n_row)
    
    curr_partner_A_genes = df_output_upreg.loc[n_row, 'partner_A_genes']
    curr_partner_B_genes = df_output_upreg.loc[n_row, 'partner_B_genes']
    
    if isinstance(curr_partner_A_genes, list) or isinstance(curr_partner_B_genes, list): # if partner A or B is a complex
        complex_interaction_rows_upreg.append(n_row)
        

In [107]:
len(complex_interaction_rows_upreg)

235

In [108]:
# splitting simple and complex interactions into 2 separate tables
df_output_upreg_complex = df_output_upreg.loc[complex_interaction_rows_upreg,:]
df_output_upreg_simple = df_output_upreg.drop(complex_interaction_rows_upreg, axis=0)

In [109]:
df_output_upreg_simple.shape

(1073, 13)

In [110]:
df_output_upreg_complex.shape

(235, 13)

In [111]:
df_output_upreg.shape

(1308, 13)

In [112]:
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,PVR_CD96,PVR,CD96,Myeloid_cells,B_cells_memory_activated,False,,,0.101754,True,0.043002,0.0,0.18287
1,PVR_CD96,PVR,CD96,Myeloid_cells,T4_activated,False,,,0.101754,True,0.117722,0.0,0.265455
2,PVR_CD96,PVR,CD96,Myeloid_cells,T_gd,False,,,0.101754,True,0.222762,0.023121,0.489971
3,PVR_TIGIT,PVR,TIGIT,Myeloid_cells,T4_activated,False,,,0.101754,True,0.128056,0.000043,0.261818
4,PVR_TIGIT,PVR,TIGIT,Myeloid_cells,T4_memory,False,,,0.101754,True,0.207892,0.000001,0.227229
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1303,CLEC2B_KLRF1,CLEC2B,KLRF1,T4_naive,NK_CD16_bright_activated,False,,,0.226667,True,0.186354,0.036152,0.297048
1304,CLEC2B_KLRF1,CLEC2B,KLRF1,T8_memory,NK_CD16_bright_activated,False,,,0.40681,True,0.186354,0.036152,0.297048
1305,CLEC2B_KLRF1,CLEC2B,KLRF1,T8_naive,NK_CD16_bright_activated,False,,,0.232843,True,0.186354,0.036152,0.297048
1306,CLEC2B_KLRF1,CLEC2B,KLRF1,T_gd,NK_CD16_bright_activated,False,,,0.380201,True,0.186354,0.036152,0.297048


In [113]:
df_output_upreg_complex

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
57,COL19A1_integrin_a1b1_complex,COL19A1,"[ITGA1, ITGB1]",B_cells_memory_activated,MAIT_cells_activated,True,0.060116,0.000017,0.233796,False,,,"[0.1050228310502283, 0.2146118721461187]"
58,COL19A1_integrin_a1b1_complex,COL19A1,"[ITGA1, ITGB1]",B_cells_memory_activated,NK_CD56_bright_activated,True,0.060116,0.000017,0.233796,False,,,"[0.125, 0.205]"
59,COL19A1_integrin_a1b1_complex,COL19A1,"[ITGA1, ITGB1]",B_cells_naive_activated,MAIT_cells_activated,True,0.138726,0.005849,0.187643,False,,,"[0.1050228310502283, 0.2146118721461187]"
60,COL19A1_integrin_a1b1_complex,COL19A1,"[ITGA1, ITGB1]",B_cells_naive_activated,NK_CD56_bright_activated,True,0.138726,0.005849,0.187643,False,,,"[0.125, 0.205]"
61,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Myeloid_cells,B_cells_memory,True,0.448549,0.002648,0.596154,False,,,"[0.3256484149855908, 0.3285302593659942]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
898,IFNG_Type_II_IFNR,IFNG,"[IFNGR1, IFNGR2]",T4_activated,B_cells_memory,True,0.220505,0.000079,0.254545,False,,,"[0.2161383285302593, 0.2651296829971181]"
899,IFNG_Type_II_IFNR,IFNG,"[IFNGR1, IFNGR2]",T4_activated,B_cells_memory_activated,True,0.220505,0.000079,0.254545,False,,,"[0.2777777777777778, 0.1645622895622895]"
900,IFNG_Type_II_IFNR,IFNG,"[IFNGR1, IFNGR2]",T4_activated,B_cells_naive,True,0.220505,0.000079,0.254545,False,,,"[0.1751188589540412, 0.187797147385103]"
901,IFNG_Type_II_IFNR,IFNG,"[IFNGR1, IFNGR2]",T4_activated,B_cells_oligoclonal,True,0.220505,0.000079,0.254545,False,,,"[0.231578947368421, 0.1684210526315789]"


In [114]:
# checking whether any complexes interact with other complexes and how many subunits each complex contains here

n_subunits_upreg = []

for n_row in list(df_output_upreg.index):
    #print('row', n_row)
    
    curr_partner_A_genes = df_output_upreg.loc[n_row, 'partner_A_genes']
    curr_partner_B_genes = df_output_upreg.loc[n_row, 'partner_B_genes']
    
    if isinstance(curr_partner_A_genes, list) and isinstance(curr_partner_B_genes, list): # if partner A AND B is a complex
        print('row', n_row)
        print('both are complexes')
        
    if isinstance(curr_partner_A_genes, list): # if partner A is a complex
        #print('partner A is a complex, len is:', len(curr_partner_A_genes))
        n_subunits_upreg.append(len(curr_partner_A_genes))
        
    if isinstance(curr_partner_B_genes, list): # if partner B is a complex
        #print('partner B is a complex, len is:', len(curr_partner_B_genes))
        n_subunits_upreg.append(len(curr_partner_B_genes))

np.unique(n_subunits_upreg, return_counts=True)

(array([2]), array([235]))

#### So 1 more scenario to ignore: there are no interactions of a complex with a complex
#### And max complex size is 2 subunits here

In [115]:
df_output_upreg_complex

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
57,COL19A1_integrin_a1b1_complex,COL19A1,"[ITGA1, ITGB1]",B_cells_memory_activated,MAIT_cells_activated,True,0.060116,0.000017,0.233796,False,,,"[0.1050228310502283, 0.2146118721461187]"
58,COL19A1_integrin_a1b1_complex,COL19A1,"[ITGA1, ITGB1]",B_cells_memory_activated,NK_CD56_bright_activated,True,0.060116,0.000017,0.233796,False,,,"[0.125, 0.205]"
59,COL19A1_integrin_a1b1_complex,COL19A1,"[ITGA1, ITGB1]",B_cells_naive_activated,MAIT_cells_activated,True,0.138726,0.005849,0.187643,False,,,"[0.1050228310502283, 0.2146118721461187]"
60,COL19A1_integrin_a1b1_complex,COL19A1,"[ITGA1, ITGB1]",B_cells_naive_activated,NK_CD56_bright_activated,True,0.138726,0.005849,0.187643,False,,,"[0.125, 0.205]"
61,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Myeloid_cells,B_cells_memory,True,0.448549,0.002648,0.596154,False,,,"[0.3256484149855908, 0.3285302593659942]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
898,IFNG_Type_II_IFNR,IFNG,"[IFNGR1, IFNGR2]",T4_activated,B_cells_memory,True,0.220505,0.000079,0.254545,False,,,"[0.2161383285302593, 0.2651296829971181]"
899,IFNG_Type_II_IFNR,IFNG,"[IFNGR1, IFNGR2]",T4_activated,B_cells_memory_activated,True,0.220505,0.000079,0.254545,False,,,"[0.2777777777777778, 0.1645622895622895]"
900,IFNG_Type_II_IFNR,IFNG,"[IFNGR1, IFNGR2]",T4_activated,B_cells_naive,True,0.220505,0.000079,0.254545,False,,,"[0.1751188589540412, 0.187797147385103]"
901,IFNG_Type_II_IFNR,IFNG,"[IFNGR1, IFNGR2]",T4_activated,B_cells_oligoclonal,True,0.220505,0.000079,0.254545,False,,,"[0.231578947368421, 0.1684210526315789]"


In [116]:
# Duplicating the table and then choosing only 0th or 1st values for the complexes
df_output_upreg_complex_member_1 = df_output_upreg_complex.copy()
df_output_upreg_complex_member_2 = df_output_upreg_complex.copy()

In [117]:
# splitting complex interaction entries by subunits / members
# for any values in these tables, if it's a list, save the 0th for df_output_upreg_complex_member_1 and 1st for df_output_upreg_complex_member_2
for n_row in list(df_output_upreg_complex.index):
    #print('row', n_row)
    
    for col in df_output_upreg_complex.columns:
        #print('col', col)
        if isinstance(df_output_upreg_complex.loc[n_row, col], list):
            df_output_upreg_complex_member_1.loc[n_row, col] = df_output_upreg_complex.loc[n_row, col][0]
            df_output_upreg_complex_member_2.loc[n_row, col] = df_output_upreg_complex.loc[n_row, col][1]
    

In [118]:
df_output_upreg_complex_member_2.head(10)

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
57,COL19A1_integrin_a1b1_complex,COL19A1,ITGB1,B_cells_memory_activated,MAIT_cells_activated,True,0.060116,1.7e-05,0.233796,False,,,0.214612
58,COL19A1_integrin_a1b1_complex,COL19A1,ITGB1,B_cells_memory_activated,NK_CD56_bright_activated,True,0.060116,1.7e-05,0.233796,False,,,0.205
59,COL19A1_integrin_a1b1_complex,COL19A1,ITGB1,B_cells_naive_activated,MAIT_cells_activated,True,0.138726,0.005849,0.187643,False,,,0.214612
60,COL19A1_integrin_a1b1_complex,COL19A1,ITGB1,B_cells_naive_activated,NK_CD56_bright_activated,True,0.138726,0.005849,0.187643,False,,,0.205
61,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,Myeloid_cells,B_cells_memory,True,0.448549,0.002648,0.596154,False,,,0.32853
62,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,Myeloid_cells,B_cells_memory_activated,True,0.448549,0.002648,0.596154,False,,,0.337963
63,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,Myeloid_cells,MAIT_cells_activated,True,0.448549,0.002648,0.596154,False,,,0.324201
64,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,Myeloid_cells,NK_CD16_bright,True,0.448549,0.002648,0.596154,False,,,0.21363
65,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,Myeloid_cells,NK_CD16_bright_activated,True,0.448549,0.002648,0.596154,False,,,0.131174
66,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,Myeloid_cells,NK_CD56_bright,True,0.448549,0.002648,0.596154,False,,,0.231092


In [119]:
# making indices uniques for concatenantion later
df_output_upreg_complex_member_1.index = [idx + '_member_1' for idx in df_output_upreg_complex_member_1.index]
df_output_upreg_complex_member_2.index = [idx + '_member_2' for idx in df_output_upreg_complex_member_2.index]

# combined order of indices in the concatenated table
inx_concat = [[i,j] for i,j in zip(list(df_output_upreg_complex_member_1.index),
                                   list(df_output_upreg_complex_member_2.index)) ]
inx_concat = [item for sublist in inx_concat for item in sublist]

In [120]:
df_output_upreg_complex_deconv = pd.concat([df_output_upreg_complex_member_1,df_output_upreg_complex_member_2])

In [121]:
df_output_upreg_complex_deconv.head(10)

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
57_member_1,COL19A1_integrin_a1b1_complex,COL19A1,ITGA1,B_cells_memory_activated,MAIT_cells_activated,True,0.060116,1.7e-05,0.233796,False,,,0.105023
58_member_1,COL19A1_integrin_a1b1_complex,COL19A1,ITGA1,B_cells_memory_activated,NK_CD56_bright_activated,True,0.060116,1.7e-05,0.233796,False,,,0.125
59_member_1,COL19A1_integrin_a1b1_complex,COL19A1,ITGA1,B_cells_naive_activated,MAIT_cells_activated,True,0.138726,0.005849,0.187643,False,,,0.105023
60_member_1,COL19A1_integrin_a1b1_complex,COL19A1,ITGA1,B_cells_naive_activated,NK_CD56_bright_activated,True,0.138726,0.005849,0.187643,False,,,0.125
61_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Myeloid_cells,B_cells_memory,True,0.448549,0.002648,0.596154,False,,,0.325648
62_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Myeloid_cells,B_cells_memory_activated,True,0.448549,0.002648,0.596154,False,,,0.176347
63_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Myeloid_cells,MAIT_cells_activated,True,0.448549,0.002648,0.596154,False,,,0.214612
64_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Myeloid_cells,NK_CD16_bright,True,0.448549,0.002648,0.596154,False,,,0.166448
65_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Myeloid_cells,NK_CD16_bright_activated,True,0.448549,0.002648,0.596154,False,,,0.129555
66_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Myeloid_cells,NK_CD56_bright,True,0.448549,0.002648,0.596154,False,,,0.252101


In [122]:
# organising entries so that member 1 entry is followed by member 2 entry
df_output_upreg_complex_deconv = df_output_upreg_complex_deconv.loc[inx_concat,:]

In [123]:
df_output_upreg_complex_deconv

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
57_member_1,COL19A1_integrin_a1b1_complex,COL19A1,ITGA1,B_cells_memory_activated,MAIT_cells_activated,True,0.060116,0.000017,0.233796,False,,,0.105023
57_member_2,COL19A1_integrin_a1b1_complex,COL19A1,ITGB1,B_cells_memory_activated,MAIT_cells_activated,True,0.060116,0.000017,0.233796,False,,,0.214612
58_member_1,COL19A1_integrin_a1b1_complex,COL19A1,ITGA1,B_cells_memory_activated,NK_CD56_bright_activated,True,0.060116,0.000017,0.233796,False,,,0.125
58_member_2,COL19A1_integrin_a1b1_complex,COL19A1,ITGB1,B_cells_memory_activated,NK_CD56_bright_activated,True,0.060116,0.000017,0.233796,False,,,0.205
59_member_1,COL19A1_integrin_a1b1_complex,COL19A1,ITGA1,B_cells_naive_activated,MAIT_cells_activated,True,0.138726,0.005849,0.187643,False,,,0.105023
...,...,...,...,...,...,...,...,...,...,...,...,...,...
900_member_2,IFNG_Type_II_IFNR,IFNG,IFNGR2,T4_activated,B_cells_naive,True,0.220505,0.000079,0.254545,False,,,0.187797
901_member_1,IFNG_Type_II_IFNR,IFNG,IFNGR1,T4_activated,B_cells_oligoclonal,True,0.220505,0.000079,0.254545,False,,,0.231579
901_member_2,IFNG_Type_II_IFNR,IFNG,IFNGR2,T4_activated,B_cells_oligoclonal,True,0.220505,0.000079,0.254545,False,,,0.168421
902_member_1,IFNG_Type_II_IFNR,IFNG,IFNGR1,T4_activated,Myeloid_cells,True,0.220505,0.000079,0.254545,False,,,0.45614


In [124]:
# saving these deconvoluted complex interactions
df_output_upreg_complex_deconv.to_csv(save_path + '20210416_cellphone_interactions_table_with_gene_stats_upreg_in_CVID_twin_no_logFC_cutoff_complexes_deconv_into_pseduinteractions.csv')

In [125]:
# saving the simple interactions table
df_output_upreg_simple.to_csv(save_path + '20210416_cellphone_interactions_table_with_gene_stats_upreg_in_CVID_twin_no_logFC_cutoff_simple_interactions.csv')

In [126]:
save_path

'/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/202102_twins_reanalysis/'

In [127]:
#df_output_upreg.to_csv(save_path + '20210322_cellphone_interactions_table_with_gene_stats_upreg_in_CVID_twin_no_logFC_cutoff.csv')

### Downreg interactions

In [128]:
faulty_index_count = 0

vec2_append_downreg = {}

# row count
curr_count = 0

for interaction in df_Exrp_LR_in_celltype_pairs_downreg_DE.index:
    
    print(interaction, list(df_Exrp_LR_in_celltype_pairs_downreg_DE.index).index(interaction) + 1, 
          'out of', len(df_Exrp_LR_in_celltype_pairs_downreg_DE.index))
    # current row
    curr_table = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_downreg_DE.loc[interaction])
    curr_table = curr_table[curr_table[interaction] > 0]
    
    for celltype_pair in list(curr_table.index):
        
        #print(curr_count)
        
        # row by row
        vec2_append_downreg[str(curr_count)] = {}
        
        vec2_append_downreg[str(curr_count)]['interaction'] = interaction
        
        #print(celltype_pair)
        
        # getting genes, these are lists of length 1 for simple interactions and > 1 for complexes
        curr_partner_A_genes = Int2Gene[interaction]['partner_a']
        curr_partner_B_genes = Int2Gene[interaction]['partner_b']
        
        #print('curr partner A genes', curr_partner_A_genes, 'len:', len(curr_partner_A_genes))
        #print('curr partner B genes', curr_partner_B_genes, 'len:', len(curr_partner_B_genes))
        
        vec2_append_downreg[str(curr_count)]['partner_A_genes'] = curr_partner_A_genes
        vec2_append_downreg[str(curr_count)]['partner_B_genes'] = curr_partner_B_genes
        
        curr_celltype_A = celltype_pair.split('---')[0]
        curr_celltype_B = celltype_pair.split('---')[1]
        
        vec2_append_downreg[str(curr_count)]['celltype_A'] = curr_celltype_A
        vec2_append_downreg[str(curr_count)]['celltype_B'] = curr_celltype_B
        
        
        # are all partner_A genes DE in celltype_A and are all partner_B genes DE in celltype_B?
        # these DE table subsets have been filtered already according to cutoffs declared in the beginning of the notebook
        curr_celltype_A_DE_table_subset = DE_df_downreg[DE_df_downreg['cluster'] == curr_celltype_A]
        curr_celltype_A_DE_table_subset.set_index('Gene', inplace=True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_A_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_A])
        
        curr_celltype_B_DE_table_subset = DE_df_downreg[DE_df_downreg['cluster'] == curr_celltype_B]
        curr_celltype_B_DE_table_subset.set_index('Gene', inplace=True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_B_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_B])

        # if partner A is DE, add stats
        if all(elem in list(curr_celltype_A_DE_table_subset.index) for elem in curr_partner_A_genes):
            vec2_append_downreg[str(curr_count)]['is_partner_A_DE'] = True
            vec2_append_downreg[str(curr_count)]['logFC_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'logFC'])
            vec2_append_downreg[str(curr_count)]['adj_pval_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'adj.P.Val'])
            vec2_append_downreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'percentExpr_cluster'])
            
        else: # if not DE, add 'NA'
            vec2_append_downreg[str(curr_count)]['is_partner_A_DE'] = False
            vec2_append_downreg[str(curr_count)]['logFC_gene_A'] = 'NA'
            vec2_append_downreg[str(curr_count)]['adj_pval_gene_A'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_downreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_per_df_full.loc[curr_partner_A_genes,curr_celltype_A])
            
            
        # if partner B is DE, add stats
        if all(elem in list(curr_celltype_B_DE_table_subset.index) for elem in curr_partner_B_genes):
            vec2_append_downreg[str(curr_count)]['is_partner_B_DE'] = True
            vec2_append_downreg[str(curr_count)]['logFC_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'logFC'])
            vec2_append_downreg[str(curr_count)]['adj_pval_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'adj.P.Val'])
            vec2_append_downreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'percentExpr_cluster'])
        else: # if not DE, add 'NA'
            vec2_append_downreg[str(curr_count)]['is_partner_B_DE'] = False
            vec2_append_downreg[str(curr_count)]['logFC_gene_B'] = 'NA'
            vec2_append_downreg[str(curr_count)]['adj_pval_gene_B'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_downreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_per_df_full.loc[curr_partner_B_genes,curr_celltype_B])
            
        curr_count += 1
    
    

CD40LG_integrin_a5b1_complex 1 out of 39
FCER2_integrin_aMb2_complex 2 out of 39
ICAM1_integrin_aMb2_complex 3 out of 39
FCER2_integrin_aXb2_complex 4 out of 39
ICAM1_integrin_aXb2_complex 5 out of 39
FCER2_CR2 6 out of 39
CXCR3_CXCL9 7 out of 39
DPP4_CXCL9 8 out of 39
CD94:NKG2A_HLA-E 9 out of 39
ICAM1_SPN 10 out of 39
ICAM1_ITGAL 11 out of 39
ICAM1_integrin_aLb2_complex 12 out of 39
FAS_FASLG 13 out of 39
CCL4_CCR5 14 out of 39
CCL3_CCR5 15 out of 39
KLRB1_CLEC2D 16 out of 39
TNF_TNFRSF1A 17 out of 39
LTA_TNFRSF1A 18 out of 39
TNF_TNFRSF1B 19 out of 39
LTA_TNFRSF1B 20 out of 39
CD27_CD70 21 out of 39
CD40_CD40LG 22 out of 39
IL2_receptor_HA_IL2 23 out of 39
IL2_receptor_I_IL2 24 out of 39
CCL3_CCR1 25 out of 39
CCL22_CCR4 26 out of 39
CCL22_DPP4 27 out of 39
CXCL10_DPP4 28 out of 39
CXCL10_CXCR3 29 out of 39
TNFSF14_LTBR 30 out of 39
LTA_TNFRSF14 31 out of 39
TNFSF14_TNFRSF14 32 out of 39
CD55_ADGRE5 33 out of 39
CD28_CD86 34 out of 39
CTLA4_CD86 35 out of 39
HLA-E_KLRC1 36 out of 39

In [129]:
# outlining the final table format
df_output_downreg = pd.DataFrame(columns = ['interaction',
                                     'partner_A_genes',
                                     'partner_B_genes',
                                     'celltype_A',
                                     'celltype_B',
                                     'is_partner_A_DE',
                                     'logFC_gene_A',
                                     'adj_pval_gene_A',
                                     'percent_expr_gene_A',
                                     'is_partner_B_DE',
                                     'logFC_gene_B',
                                     'adj_pval_gene_B',
                                     'percent_expr_gene_B'],
                              index = list(vec2_append_downreg.keys())
                            )
df_output_downreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,,,,,,,,,,,,,
1,,,,,,,,,,,,,
2,,,,,,,,,,,,,
3,,,,,,,,,,,,,
4,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
714,,,,,,,,,,,,,
715,,,,,,,,,,,,,
716,,,,,,,,,,,,,
717,,,,,,,,,,,,,


In [130]:
list(df_output_downreg.columns) == list(vec2_append_downreg['0'].keys())

True

In [131]:
len(vec2_append_downreg.keys())

719

In [132]:
vec2_append_downreg['0'].keys()

dict_keys(['interaction', 'partner_A_genes', 'partner_B_genes', 'celltype_A', 'celltype_B', 'is_partner_A_DE', 'logFC_gene_A', 'adj_pval_gene_A', 'percent_expr_gene_A', 'is_partner_B_DE', 'logFC_gene_B', 'adj_pval_gene_B', 'percent_expr_gene_B'])

In [133]:
df_output_downreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,,,,,,,,,,,,,
1,,,,,,,,,,,,,
2,,,,,,,,,,,,,
3,,,,,,,,,,,,,
4,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
714,,,,,,,,,,,,,
715,,,,,,,,,,,,,
716,,,,,,,,,,,,,
717,,,,,,,,,,,,,


In [134]:
%%time

for i in list(vec2_append_downreg.keys()):
    #print(i)
    curr_keys = list(vec2_append_downreg[i].keys())
    for col in curr_keys:
        df_output_downreg.loc[i,col] = vec2_append_downreg[i][col]

CPU times: user 549 ms, sys: 3.82 ms, total: 553 ms
Wall time: 552 ms


In [135]:
df_output_downreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,CD40LG_integrin_a5b1_complex,[CD40LG],"[ITGA5, ITGB1]",T4_activated,Myeloid_cells,True,[-0.18247463247318],[5.900370567720141e-05],[0.450909],False,,,"[0.3368421052631579, 0.5368421052631579]"
1,CD40LG_integrin_a5b1_complex,[CD40LG],"[ITGA5, ITGB1]",T4_activated,NK_CD16_bright,True,[-0.18247463247318],[5.900370567720141e-05],[0.450909],False,,,"[0.1887287024901703, 0.1664482306684141]"
2,CD40LG_integrin_a5b1_complex,[CD40LG],"[ITGA5, ITGB1]",T4_activated,NK_CD16_bright_activated,True,[-0.18247463247318],[5.900370567720141e-05],[0.450909],False,,,"[0.1028340080971659, 0.1295546558704453]"
3,CD40LG_integrin_a5b1_complex,[CD40LG],"[ITGA5, ITGB1]",T4_activated,NK_CD56_bright,True,[-0.18247463247318],[5.900370567720141e-05],[0.450909],False,,,"[0.1260504201680672, 0.2521008403361344]"
4,CD40LG_integrin_a5b1_complex,[CD40LG],"[ITGA5, ITGB1]",T4_activated,T8_memory,True,[-0.18247463247318],[5.900370567720141e-05],[0.450909],False,,,"[0.1344086021505376, 0.2813620071684587]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
714,LTBR_LTB,[LTBR],[LTB],Myeloid_cells,T_gd,False,,,[0.2982456140350877],True,[-0.369451337403812],[0.0005790742968125],[0.2464179999999999]
715,CCR4_CCL17,[CCR4],[CCL17],MAIT_cells_activated,B_cells_memory_activated,False,,,[0.1735159817351598],True,[-0.162645061357469],[4.15134895682146e-05],[0.1111109999999999]
716,CCR4_CCL17,[CCR4],[CCL17],T4_activated,B_cells_memory_activated,False,,,[0.3621455415100786],True,[-0.162645061357469],[4.15134895682146e-05],[0.1111109999999999]
717,CCR4_CCL17,[CCR4],[CCL17],T8_activated_1,B_cells_memory_activated,False,,,[0.1096385542168674],True,[-0.162645061357469],[4.15134895682146e-05],[0.1111109999999999]


In [136]:
cols2correct = ['partner_A_genes', 'partner_B_genes', 'logFC_gene_A', 'adj_pval_gene_A',
       'percent_expr_gene_A', 'logFC_gene_B',
       'adj_pval_gene_B', 'percent_expr_gene_B']

for row in list(df_output_downreg.index):
    #print('row', row)
    for col in cols2correct:
        #print('column', col)
        curr_value = df_output_downreg.loc[row, col] # with []
        #print(curr_value)
        if (curr_value != 'NA') & (len(curr_value) == 1): # ignoring NAs and lists of length > 1 - complex genes that is
        #if not isinstance(curr_value, list) & (curr_value != 'NA'): # ignoring NAs and lists of length > 1 - complex genes that is
            df_output_downreg.loc[row, col] = curr_value[0] # this just get the element - string if a gene, numerical value if it's a stat
            

### Splitting tables into 2 tables: simple interactions and complex interactions, latter being deconvoluted into pseudo-interactions for each subunit of a complex

In [137]:
# getting indices of complex interactions
complex_interaction_rows_downreg = []

for n_row in list(df_output_downreg.index):
    #print('row', n_row)
    
    curr_partner_A_genes = df_output_downreg.loc[n_row, 'partner_A_genes']
    curr_partner_B_genes = df_output_downreg.loc[n_row, 'partner_B_genes']
    
    if isinstance(curr_partner_A_genes, list) or isinstance(curr_partner_B_genes, list): # if partner A or B is a complex
        complex_interaction_rows_downreg.append(n_row)
        

In [138]:
len(complex_interaction_rows_downreg)

83

In [139]:
# splitting simple and complex interactions into 2 separate tables
df_output_downreg_complex = df_output_downreg.loc[complex_interaction_rows_downreg,:]
df_output_downreg_simple = df_output_downreg.drop(complex_interaction_rows_downreg, axis=0)

In [140]:
df_output_downreg_simple.shape

(636, 13)

In [141]:
df_output_downreg_complex.shape

(83, 13)

In [142]:
df_output_downreg.shape

(719, 13)

In [143]:
df_output_downreg_simple

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
11,FCER2_CR2,FCER2,CR2,B_cells_memory_activated,B_cells_memory_activated,True,-0.333493,0.0,0.290509,False,,,0.129209
12,CXCR3_CXCL9,CXCR3,CXCL9,B_cells_memory,B_cells_memory_activated,False,,,0.123919,True,-0.739335,0.0,0.273148
13,CXCR3_CXCL9,CXCR3,CXCL9,MAIT_cells,B_cells_memory_activated,False,,,0.464883,True,-0.739335,0.0,0.273148
14,CXCR3_CXCL9,CXCR3,CXCL9,MAIT_cells_activated,B_cells_memory_activated,False,,,0.269406,True,-0.739335,0.0,0.273148
15,CXCR3_CXCL9,CXCR3,CXCL9,NK_CD16_bright,B_cells_memory_activated,False,,,0.31848,True,-0.739335,0.0,0.273148
...,...,...,...,...,...,...,...,...,...,...,...,...,...
714,LTBR_LTB,LTBR,LTB,Myeloid_cells,T_gd,False,,,0.298246,True,-0.369451,0.000579,0.246418
715,CCR4_CCL17,CCR4,CCL17,MAIT_cells_activated,B_cells_memory_activated,False,,,0.173516,True,-0.162645,0.000042,0.111111
716,CCR4_CCL17,CCR4,CCL17,T4_activated,B_cells_memory_activated,False,,,0.362146,True,-0.162645,0.000042,0.111111
717,CCR4_CCL17,CCR4,CCL17,T8_activated_1,B_cells_memory_activated,False,,,0.109639,True,-0.162645,0.000042,0.111111


In [144]:
df_output_downreg_complex

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,CD40LG_integrin_a5b1_complex,CD40LG,"[ITGA5, ITGB1]",T4_activated,Myeloid_cells,True,-0.182475,0.000059,0.450909,False,,,"[0.3368421052631579, 0.5368421052631579]"
1,CD40LG_integrin_a5b1_complex,CD40LG,"[ITGA5, ITGB1]",T4_activated,NK_CD16_bright,True,-0.182475,0.000059,0.450909,False,,,"[0.1887287024901703, 0.1664482306684141]"
2,CD40LG_integrin_a5b1_complex,CD40LG,"[ITGA5, ITGB1]",T4_activated,NK_CD16_bright_activated,True,-0.182475,0.000059,0.450909,False,,,"[0.1028340080971659, 0.1295546558704453]"
3,CD40LG_integrin_a5b1_complex,CD40LG,"[ITGA5, ITGB1]",T4_activated,NK_CD56_bright,True,-0.182475,0.000059,0.450909,False,,,"[0.1260504201680672, 0.2521008403361344]"
4,CD40LG_integrin_a5b1_complex,CD40LG,"[ITGA5, ITGB1]",T4_activated,T8_memory,True,-0.182475,0.000059,0.450909,False,,,"[0.1344086021505376, 0.2813620071684587]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
461,IL2_receptor_I_IL2,"[IL2RG, IL2RB]",IL2,T8_memory,T4_activated,False,,,"[0.5842293906810035, 0.460573476702509]",True,-0.22707,0.000176,0.170909
462,IL2_receptor_I_IL2,"[IL2RG, IL2RB]",IL2,T8_naive,T4_activated,False,,,"[0.5147058823529411, 0.159313725490196]",True,-0.22707,0.000176,0.170909
463,IL2_receptor_I_IL2,"[IL2RG, IL2RB]",IL2,T_gd,T4_activated,False,,,"[0.6327116212338594, 0.4734576757532281]",True,-0.22707,0.000176,0.170909
464,IL2_receptor_I_IL2,"[IL2RG, IL2RB]",IL2,T_regs,T4_activated,False,,,"[0.7757731958762887, 0.5902061855670103]",True,-0.22707,0.000176,0.170909


In [145]:
# checking whether any complexes interact with other complexes and how many subunits each complex contains here

n_subunits_downreg = []

for n_row in list(df_output_downreg.index):
    #print('row', n_row)
    
    curr_partner_A_genes = df_output_downreg.loc[n_row, 'partner_A_genes']
    curr_partner_B_genes = df_output_downreg.loc[n_row, 'partner_B_genes']
    
    if isinstance(curr_partner_A_genes, list) and isinstance(curr_partner_B_genes, list): # if partner A AND B is a complex
        print('row', n_row)
        print('both are complexes')
        
    if isinstance(curr_partner_A_genes, list): # if partner A is a complex
        print('row', n_row)
        print('curr_partner_A_genes',curr_partner_A_genes)
        print('partner A is a complex, len is:', len(curr_partner_A_genes))
        n_subunits_downreg.append(len(curr_partner_A_genes))
        
    if isinstance(curr_partner_B_genes, list): # if partner B is a complex
        print('row', n_row)
        print('curr_partner_B_genes',curr_partner_B_genes)
        print('partner B is a complex, len is:', len(curr_partner_B_genes))
        n_subunits_downreg.append(len(curr_partner_B_genes))

np.unique(n_subunits_downreg, return_counts=True)

row 0
curr_partner_B_genes ['ITGA5', 'ITGB1']
partner B is a complex, len is: 2
row 1
curr_partner_B_genes ['ITGA5', 'ITGB1']
partner B is a complex, len is: 2
row 2
curr_partner_B_genes ['ITGA5', 'ITGB1']
partner B is a complex, len is: 2
row 3
curr_partner_B_genes ['ITGA5', 'ITGB1']
partner B is a complex, len is: 2
row 4
curr_partner_B_genes ['ITGA5', 'ITGB1']
partner B is a complex, len is: 2
row 5
curr_partner_B_genes ['ITGB2', 'ITGAM']
partner B is a complex, len is: 2
row 6
curr_partner_B_genes ['ITGB2', 'ITGAM']
partner B is a complex, len is: 2
row 7
curr_partner_B_genes ['ITGB2', 'ITGAM']
partner B is a complex, len is: 2
row 8
curr_partner_B_genes ['ITGB2', 'ITGAX']
partner B is a complex, len is: 2
row 9
curr_partner_B_genes ['ITGB2', 'ITGAX']
partner B is a complex, len is: 2
row 10
curr_partner_B_genes ['ITGB2', 'ITGAX']
partner B is a complex, len is: 2
row 30
curr_partner_A_genes ['KLRC1', 'KLRD1']
partner A is a complex, len is: 2
row 31
curr_partner_A_genes ['KLRC1', 

(array([2, 3]), array([71, 12]))

#### So 1 more scenario to ignore: there are no interactions of a complex with a complex
#### And max complex size is 3 subunits - in case of IL2R (indeed, there are subunits: alpha, beta and gamma)

In [146]:
df_output_downreg_complex

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,CD40LG_integrin_a5b1_complex,CD40LG,"[ITGA5, ITGB1]",T4_activated,Myeloid_cells,True,-0.182475,0.000059,0.450909,False,,,"[0.3368421052631579, 0.5368421052631579]"
1,CD40LG_integrin_a5b1_complex,CD40LG,"[ITGA5, ITGB1]",T4_activated,NK_CD16_bright,True,-0.182475,0.000059,0.450909,False,,,"[0.1887287024901703, 0.1664482306684141]"
2,CD40LG_integrin_a5b1_complex,CD40LG,"[ITGA5, ITGB1]",T4_activated,NK_CD16_bright_activated,True,-0.182475,0.000059,0.450909,False,,,"[0.1028340080971659, 0.1295546558704453]"
3,CD40LG_integrin_a5b1_complex,CD40LG,"[ITGA5, ITGB1]",T4_activated,NK_CD56_bright,True,-0.182475,0.000059,0.450909,False,,,"[0.1260504201680672, 0.2521008403361344]"
4,CD40LG_integrin_a5b1_complex,CD40LG,"[ITGA5, ITGB1]",T4_activated,T8_memory,True,-0.182475,0.000059,0.450909,False,,,"[0.1344086021505376, 0.2813620071684587]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
461,IL2_receptor_I_IL2,"[IL2RG, IL2RB]",IL2,T8_memory,T4_activated,False,,,"[0.5842293906810035, 0.460573476702509]",True,-0.22707,0.000176,0.170909
462,IL2_receptor_I_IL2,"[IL2RG, IL2RB]",IL2,T8_naive,T4_activated,False,,,"[0.5147058823529411, 0.159313725490196]",True,-0.22707,0.000176,0.170909
463,IL2_receptor_I_IL2,"[IL2RG, IL2RB]",IL2,T_gd,T4_activated,False,,,"[0.6327116212338594, 0.4734576757532281]",True,-0.22707,0.000176,0.170909
464,IL2_receptor_I_IL2,"[IL2RG, IL2RB]",IL2,T_regs,T4_activated,False,,,"[0.7757731958762887, 0.5902061855670103]",True,-0.22707,0.000176,0.170909


In [147]:
# Duplicating the table and then choosing only 0th or 1st or 2nd values for the complexes
df_output_downreg_complex_member_1 = df_output_downreg_complex.copy()
df_output_downreg_complex_member_2 = df_output_downreg_complex.copy()
df_output_downreg_complex_member_3 = df_output_downreg_complex.copy()

In [148]:
# splitting complex interaction entries by subunits / members
# for any values in these tables, if it's a list, save the 0th for df_output_upreg_complex_member_1 and 1st for df_output_upreg_complex_member_2

# which rows contain interaction with a 3-subunit complex? to then subset df_output_downreg_complex_member_3
subunit_3_rows = []

for n_row in list(df_output_downreg_complex.index):
    #print('outside for loop, row', n_row)
    
    for col in df_output_downreg_complex.columns:
        #print('col', col)
        if isinstance(df_output_downreg_complex.loc[n_row, col], list):
            df_output_downreg_complex_member_1.loc[n_row, col] = df_output_downreg_complex.loc[n_row, col][0]
            df_output_downreg_complex_member_2.loc[n_row, col] = df_output_downreg_complex.loc[n_row, col][1]
            
            # additionally, if there are 3 subunits, separate into 3 entries
            if len(df_output_downreg_complex.loc[n_row, col]) == 3:
                df_output_downreg_complex_member_3.loc[n_row, col] = df_output_downreg_complex.loc[n_row, col][2]
                #print('3-subunit complex')
                #print('row', n_row)
                #print('adding to subunit_3_rows')
                subunit_3_rows.append(n_row)


In [149]:
np.unique(subunit_3_rows, return_counts=True)

(array(['437', '438', '439', '440', '441', '442', '443', '444', '445',
        '446', '447', '448'], dtype='<U3'),
 array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]))

In [150]:
subunit_3_rows = list(set(subunit_3_rows))

In [151]:
np.unique(subunit_3_rows, return_counts=True)

(array(['437', '438', '439', '440', '441', '442', '443', '444', '445',
        '446', '447', '448'], dtype='<U3'),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]))

In [153]:
df_output_downreg_complex.loc['437',:]

interaction                                          IL2_receptor_HA_IL2
partner_A_genes                                    [IL2RA, IL2RG, IL2RB]
partner_B_genes                                                      IL2
celltype_A                                                B_cells_memory
celltype_B                                                  T4_activated
is_partner_A_DE                                                    False
logFC_gene_A                                                          NA
adj_pval_gene_A                                                       NA
percent_expr_gene_A    [0.2680115273775216, 0.659942363112392, 0.1642...
is_partner_B_DE                                                     True
logFC_gene_B                                                    -0.22707
adj_pval_gene_B                                                 0.000176
percent_expr_gene_B                                             0.170909
Name: 437, dtype: object

In [154]:
df_output_downreg_complex.loc['440',:]

interaction                                          IL2_receptor_HA_IL2
partner_A_genes                                    [IL2RA, IL2RG, IL2RB]
partner_B_genes                                                      IL2
celltype_A                                                 Myeloid_cells
celltype_B                                                  T4_activated
is_partner_A_DE                                                    False
logFC_gene_A                                                          NA
adj_pval_gene_A                                                       NA
percent_expr_gene_A    [0.119298245614035, 0.4350877192982456, 0.1017...
is_partner_B_DE                                                     True
logFC_gene_B                                                    -0.22707
adj_pval_gene_B                                                 0.000176
percent_expr_gene_B                                             0.170909
Name: 440, dtype: object

In [155]:
len(subunit_3_rows)

12

In [156]:
subunit_3_rows

['439',
 '444',
 '445',
 '437',
 '442',
 '443',
 '446',
 '440',
 '438',
 '441',
 '448',
 '447']

In [157]:
# in df_output_downreg_complex_member_3 entries of 3rd members are correct
# removing other entries of interactions with less than 3 subunit complexes
df_output_downreg_complex_member_3 = df_output_downreg_complex_member_3.loc[subunit_3_rows,:]

In [158]:
df_output_downreg_complex_member_3

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
439,IL2_receptor_HA_IL2,IL2RB,IL2,MAIT_cells_activated,T4_activated,False,,,0.561644,True,-0.22707,0.000176,0.170909
444,IL2_receptor_HA_IL2,IL2RB,IL2,T8_activated_2,T4_activated,False,,,0.395833,True,-0.22707,0.000176,0.170909
445,IL2_receptor_HA_IL2,IL2RB,IL2,T8_memory,T4_activated,False,,,0.460573,True,-0.22707,0.000176,0.170909
437,IL2_receptor_HA_IL2,IL2RB,IL2,B_cells_memory,T4_activated,False,,,0.164265,True,-0.22707,0.000176,0.170909
442,IL2_receptor_HA_IL2,IL2RB,IL2,NK_CD56_bright_activated,T4_activated,False,,,0.595,True,-0.22707,0.000176,0.170909
443,IL2_receptor_HA_IL2,IL2RB,IL2,T8_activated_1,T4_activated,False,,,0.63253,True,-0.22707,0.000176,0.170909
446,IL2_receptor_HA_IL2,IL2RB,IL2,T_gd,T4_activated,False,,,0.473458,True,-0.22707,0.000176,0.170909
440,IL2_receptor_HA_IL2,IL2RB,IL2,Myeloid_cells,T4_activated,False,,,0.101754,True,-0.22707,0.000176,0.170909
438,IL2_receptor_HA_IL2,IL2RB,IL2,B_cells_memory_activated,T4_activated,False,,,0.108165,True,-0.22707,0.000176,0.170909
441,IL2_receptor_HA_IL2,IL2RB,IL2,NK_CD16_bright_activated,T4_activated,False,,,0.533603,True,-0.22707,0.000176,0.170909


In [159]:
# making indices uniques for concatenantion later
df_output_downreg_complex_member_1.index = [idx + '_member_1' for idx in df_output_downreg_complex_member_1.index]
df_output_downreg_complex_member_2.index = [idx + '_member_2' for idx in df_output_downreg_complex_member_2.index]
df_output_downreg_complex_member_3.index = [idx + '_member_3' for idx in df_output_downreg_complex_member_3.index]

In [160]:
# getting all indices
idx_concat = list(df_output_downreg_complex_member_1.index) + list(df_output_downreg_complex_member_2.index) + list(df_output_downreg_complex_member_3.index)

# sorting by original index number, so that the order is: member 1, member 2 and (where applicable) member 3
idx_concat.sort(key = lambda x: x.split('_')[0])
idx_concat

['0_member_1',
 '0_member_2',
 '1_member_1',
 '1_member_2',
 '10_member_1',
 '10_member_2',
 '105_member_1',
 '105_member_2',
 '106_member_1',
 '106_member_2',
 '107_member_1',
 '107_member_2',
 '108_member_1',
 '108_member_2',
 '109_member_1',
 '109_member_2',
 '110_member_1',
 '110_member_2',
 '111_member_1',
 '111_member_2',
 '112_member_1',
 '112_member_2',
 '113_member_1',
 '113_member_2',
 '114_member_1',
 '114_member_2',
 '115_member_1',
 '115_member_2',
 '116_member_1',
 '116_member_2',
 '117_member_1',
 '117_member_2',
 '118_member_1',
 '118_member_2',
 '119_member_1',
 '119_member_2',
 '120_member_1',
 '120_member_2',
 '121_member_1',
 '121_member_2',
 '122_member_1',
 '122_member_2',
 '123_member_1',
 '123_member_2',
 '124_member_1',
 '124_member_2',
 '125_member_1',
 '125_member_2',
 '126_member_1',
 '126_member_2',
 '127_member_1',
 '127_member_2',
 '128_member_1',
 '128_member_2',
 '129_member_1',
 '129_member_2',
 '130_member_1',
 '130_member_2',
 '131_member_1',
 '131_m

In [161]:
df_output_downreg_complex_deconv = pd.concat([df_output_downreg_complex_member_1, df_output_downreg_complex_member_2, df_output_downreg_complex_member_3])

In [162]:
df_output_downreg_complex_deconv

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0_member_1,CD40LG_integrin_a5b1_complex,CD40LG,ITGA5,T4_activated,Myeloid_cells,True,-0.182475,0.000059,0.450909,False,,,0.336842
1_member_1,CD40LG_integrin_a5b1_complex,CD40LG,ITGA5,T4_activated,NK_CD16_bright,True,-0.182475,0.000059,0.450909,False,,,0.188729
2_member_1,CD40LG_integrin_a5b1_complex,CD40LG,ITGA5,T4_activated,NK_CD16_bright_activated,True,-0.182475,0.000059,0.450909,False,,,0.102834
3_member_1,CD40LG_integrin_a5b1_complex,CD40LG,ITGA5,T4_activated,NK_CD56_bright,True,-0.182475,0.000059,0.450909,False,,,0.12605
4_member_1,CD40LG_integrin_a5b1_complex,CD40LG,ITGA5,T4_activated,T8_memory,True,-0.182475,0.000059,0.450909,False,,,0.134409
...,...,...,...,...,...,...,...,...,...,...,...,...,...
440_member_3,IL2_receptor_HA_IL2,IL2RB,IL2,Myeloid_cells,T4_activated,False,,,0.101754,True,-0.22707,0.000176,0.170909
438_member_3,IL2_receptor_HA_IL2,IL2RB,IL2,B_cells_memory_activated,T4_activated,False,,,0.108165,True,-0.22707,0.000176,0.170909
441_member_3,IL2_receptor_HA_IL2,IL2RB,IL2,NK_CD16_bright_activated,T4_activated,False,,,0.533603,True,-0.22707,0.000176,0.170909
448_member_3,IL2_receptor_HA_IL2,IL2RB,IL2,T4_activated,T4_activated,False,,,0.546976,True,-0.22707,0.000176,0.170909


In [163]:
# organising entries so that member 1 entry is followed by member 2 entry and then member 3 entry if applicable
df_output_downreg_complex_deconv = df_output_downreg_complex_deconv.loc[idx_concat,:]

In [164]:
df_output_downreg_complex_deconv

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0_member_1,CD40LG_integrin_a5b1_complex,CD40LG,ITGA5,T4_activated,Myeloid_cells,True,-0.182475,0.000059,0.450909,False,,,0.336842
0_member_2,CD40LG_integrin_a5b1_complex,CD40LG,ITGB1,T4_activated,Myeloid_cells,True,-0.182475,0.000059,0.450909,False,,,0.536842
1_member_1,CD40LG_integrin_a5b1_complex,CD40LG,ITGA5,T4_activated,NK_CD16_bright,True,-0.182475,0.000059,0.450909,False,,,0.188729
1_member_2,CD40LG_integrin_a5b1_complex,CD40LG,ITGB1,T4_activated,NK_CD16_bright,True,-0.182475,0.000059,0.450909,False,,,0.166448
10_member_1,ICAM1_integrin_aXb2_complex,ICAM1,ITGB2,B_cells_naive_activated,Myeloid_cells,True,-0.180361,0.045116,0.196796,False,,,0.673684
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7_member_2,ICAM1_integrin_aMb2_complex,ICAM1,ITGAM,B_cells_naive_activated,Myeloid_cells,True,-0.180361,0.045116,0.196796,False,,,0.140351
8_member_1,FCER2_integrin_aXb2_complex,FCER2,ITGB2,B_cells_memory_activated,Myeloid_cells,True,-0.333493,0.0,0.290509,False,,,0.673684
8_member_2,FCER2_integrin_aXb2_complex,FCER2,ITGAX,B_cells_memory_activated,Myeloid_cells,True,-0.333493,0.0,0.290509,False,,,0.4
9_member_1,ICAM1_integrin_aXb2_complex,ICAM1,ITGB2,B_cells_memory_activated,Myeloid_cells,True,-0.251722,0.0,0.561343,False,,,0.673684


In [165]:
# saving these deconvoluted complex interactions
df_output_downreg_complex_deconv.to_csv(save_path + '20210416_cellphone_interactions_table_with_gene_stats_downreg_in_CVID_twin_no_logFC_cutoff_complexes_deconv_into_pseduinteractions.csv')

In [166]:
# saving the simple interactions table
df_output_downreg_simple.to_csv(save_path + '20210416_cellphone_interactions_table_with_gene_stats_downreg_in_CVID_twin_no_logFC_cutoff_simple_interactions.csv')

In [167]:
save_path

'/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/202102_twins_reanalysis/'

In [91]:
save_path = '/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/202102_twins_reanalysis/'

In [92]:
df_output_downreg = pd.read_csv(save_path + '20210318_cellphone_interactions_table_with_gene_stats_downreg_in_CVID_twin_no_logFC_cutoff.csv',
                               index_col=0)

In [93]:
df_output_downreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,CD40LG_integrin_a5b1_complex,CD40LG,"['ITGA5', 'ITGB1']",T4_activated,B_cells_memory,True,-0.182475,0.000059,0.450909,False,,,"[0.0, 0.3256484149855908]"
1,CD40LG_integrin_a5b1_complex,CD40LG,"['ITGA5', 'ITGB1']",T4_activated,B_cells_memory_activated,True,-0.182475,0.000059,0.450909,False,,,"[0.0029461279461279, 0.1763468013468013]"
2,CD40LG_integrin_a5b1_complex,CD40LG,"['ITGA5', 'ITGB1']",T4_activated,B_cells_naive,True,-0.182475,0.000059,0.450909,False,,,"[0.0071105365223012, 0.0711053652230122]"
3,CD40LG_integrin_a5b1_complex,CD40LG,"['ITGA5', 'ITGB1']",T4_activated,B_cells_naive_activated,True,-0.182475,0.000059,0.450909,False,,,"[0.0030349013657056, 0.0819423368740516]"
4,CD40LG_integrin_a5b1_complex,CD40LG,"['ITGA5', 'ITGB1']",T4_activated,B_cells_oligoclonal,True,-0.182475,0.000059,0.450909,False,,,"[0.0, 0.1]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1434,CCR4_CCL17,CCR4,CCL17,T8_memory,B_cells_memory_activated,False,,,0.0555555555555555,True,-0.162645,0.000042,0.111111
1435,CCR4_CCL17,CCR4,CCL17,T8_naive,B_cells_memory_activated,False,,,0.017156862745098,True,-0.162645,0.000042,0.111111
1436,CCR4_CCL17,CCR4,CCL17,T_gd,B_cells_memory_activated,False,,,0.078909612625538,True,-0.162645,0.000042,0.111111
1437,CCR4_CCL17,CCR4,CCL17,T_regs,B_cells_memory_activated,False,,,0.2396907216494845,True,-0.162645,0.000042,0.111111


In [96]:
'ITGA5' in genes_expr_per_cell_type['B_cells_memory']

False

In [97]:
'ITGB1' in genes_expr_per_cell_type['B_cells_memory']

True

In [94]:
Per_df.loc[:,'B_cells_memory']

Unnamed: 0_level_0,B_cells_memory,B_cells_memory_activated,B_cells_naive,B_cells_naive_activated,B_cells_oligoclonal,MAIT_cells,MAIT_cells_activated,Myeloid_cells,NK_CD16_bright,NK_CD16_bright_activated,...,NK_CD56_bright_activated,T4_activated,T4_memory,T4_naive,T8_activated_1,T8_activated_2,T8_memory,T8_naive,T_gd,T_regs
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
RP11-34P13.7,0.000000,0.001263,0.000646,0.000000,0.000000,0.000000,0.000000,0.007018,0.001311,0.000000,...,0.000,0.002733,0.000564,0.000000,0.001205,0.000000,0.000000,0.000000,0.000000,0.000000
FO538757.3,0.002882,0.003367,0.001939,0.001517,0.000000,0.006689,0.004566,0.007018,0.000000,0.000000,...,0.000,0.001025,0.001129,0.000000,0.003614,0.000000,0.000000,0.000000,0.000000,0.002577
FO538757.2,0.129683,0.103114,0.076277,0.053111,0.047368,0.113712,0.068493,0.217544,0.058978,0.039676,...,0.055,0.104202,0.082957,0.038519,0.116867,0.079861,0.086022,0.071078,0.074605,0.087629
AP006222.2,0.037464,0.038721,0.020685,0.010622,0.026316,0.016722,0.036530,0.070175,0.018349,0.007287,...,0.020,0.066963,0.016366,0.020741,0.063855,0.024306,0.016129,0.019608,0.012912,0.028351
RP5-857K21.4,0.002882,0.003367,0.000000,0.000000,0.000000,0.000000,0.009132,0.007018,0.000000,0.000000,...,0.000,0.009566,0.001129,0.001481,0.022892,0.003472,0.000000,0.000000,0.001435,0.007732
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
RP4-681N20.5,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000,0.002733,0.000000,0.000000,0.002410,0.003472,0.000000,0.000000,0.000000,0.000000
LINC00659,0.000000,0.000421,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.002621,0.000810,...,0.000,0.001708,0.000000,0.002963,0.002410,0.000000,0.000000,0.007353,0.000000,0.002577
PON3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000,0.000342,0.000000,0.000000,0.001205,0.000000,0.000000,0.000000,0.000000,0.000000
USP43,0.002882,0.001684,0.000000,0.000000,0.000000,0.000000,0.013699,0.000000,0.000000,0.000810,...,0.000,0.000342,0.000000,0.000000,0.000000,0.000000,0.001792,0.000000,0.002869,0.000000


In [81]:
# manually checking some hits from previous version of analysis of twins
for interaction in list(df_Exrp_LR_in_celltype_pairs_upreg_DE.index):
    if 'TNF' in interaction:
        print(interaction)
        curr_subset = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_upreg_DE.loc[interaction])
        curr_subset_nonzero_interacting_celltype_pairs = list(curr_subset[curr_subset[interaction] > 0].index)
        #print('this interaction is detected in following celltype pairs:', curr_subset_nonzero_interacting_celltype_pairs)

TNFRSF13B_TNFSF13B
TNFRSF17_TNFSF13B
TNFRSF13C_TNFSF13B
TNF_TNFRSF1A
LTA_TNFRSF1A
TNF_TNFRSF1B
LTA_TNFRSF1B
LTA_TNFRSF14
TNFSF14_TNFRSF14


In [144]:
# manually checking some hits from previous version of analysis of twins
for interaction in list(df_Exrp_LR_in_celltype_pairs_downreg_DE.index):
    if 'CD' in interaction:
        print(interaction)
        curr_subset = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_downreg_DE.loc[interaction])
        curr_subset_nonzero_interacting_celltype_pairs = list(curr_subset[curr_subset[interaction] > 0].index)
        #print('this interaction is detected in following celltype pairs:', curr_subset_nonzero_interacting_celltype_pairs)

CD40_CD40LG
CD55_ADGRE5


### Checking some stuff

In [29]:
# reading the user curated database starting files to see what interactions haven't made it here

path = '/home/jovyan/notebooks/Vento_Lab/CVID/202009_new_analysis_revision/CITE_all_samples_analysis/CVID/scTranscriptomics_CITE/cellphonedb_analysis/'

# saving them into .tsv files now
interactions_curated = pd.read_csv(path + 'interactions_curated_subset_notLuz.tsv', sep='\t', index_col=0)
complexes_curated = pd.read_csv(path + 'complex_curated.tsv', sep='\t', index_col=0)

In [30]:
interactions_curated.columns

Index(['partner_a', 'partner_b', 'protein_name_a', 'protein_name_b',
       'annotation_strategy', 'source', 'is_ppi', 'reactome_complex',
       'reactome_reaction', 'reactome_pathway', 'complexPortal_complex',
       'curator', 'comments'],
      dtype='object')

In [31]:
np.unique(interactions_curated['curator'], return_counts=True)

(array(['JRodriguezUbreva', 'RVentoTormo'], dtype=object), array([   1, 1339]))

In [32]:
np.unique(interactions_curated['annotation_strategy'], return_counts=True)

(array(['curated'], dtype=object), array([1340]))

In [65]:
interactions_curated#[interactions_curated['partner_a'] == 'Q92478']

Unnamed: 0_level_0,partner_a,partner_b,protein_name_a,protein_name_b,annotation_strategy,source,is_ppi,reactome_complex,reactome_reaction,reactome_pathway,complexPortal_complex,curator,comments
id_cp_interaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
,Q9Y275,Q96RJ3,TN13B_HUMAN,TR13C_HUMAN,curated,uniprot;reactome,True,R-HSA-5676540,R-HSA-5676599,R-HSA-1280215,,JRodriguezUbreva,
CPI-CC0041E1D30,IL12,IL12_receptor,,,curated,uniprot,True,,,,,RVentoTormo,
CPI-CC0104F2A96,ACVR_1B2A_receptor,Activin_ligand_ab,,,curated,PMID:22710174;PMID:22991378,True,,,,,RVentoTormo,
CPI-CC045C36F28,ACVR_1A2A_receptor,Activin_ligand_ab,,,curated,less_common_binding;PMID:22710174;PMID:22991378_,True,,,,,RVentoTormo,
CPI-CC051643E98,IL23,IL23_receptor,,,curated,uniprot,True,,,,,RVentoTormo,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
,O14905,Q6FHJ7,WNT9B_HUMAN,SFRP4_HUMAN,curated,PMID:12775774,True,,,,,RVentoTormo,Inhibition WNT. Soluble proteins
,O14905,Q8N474,WNT9B_HUMAN,SFRP1_HUMAN,curated,PMID:12775774,True,,,,,RVentoTormo,Inhibition WNT. Soluble proteins
,O14905,Q92765,WNT9B_HUMAN,SFRP3_HUMAN,curated,PMID:12775774,True,,,,,RVentoTormo,Inhibition WNT. Soluble proteins
,O14905,Q96HF1,WNT9B_HUMAN,SFRP2_HUMAN,curated,PMID:12775774,True,,,,,RVentoTormo,Inhibition WNT. Soluble proteins


In [28]:
#for interaction in int_cpDB['interacting_pair']:
#    if 'IL' in interaction:
#        print(interaction)
#        print(int_cpDB[int_cpDB['interacting_pair'] == interaction])

In [68]:
interactions_curated.columns

Index(['partner_a', 'partner_b', 'protein_name_a', 'protein_name_b',
       'annotation_strategy', 'source', 'is_ppi', 'reactome_complex',
       'reactome_reaction', 'reactome_pathway', 'complexPortal_complex',
       'curator', 'comments'],
      dtype='object')

In [33]:
list(interactions_curated['partner_a'])[:10]

['Q9Y275',
 'IL12',
 'ACVR_1B2A_receptor',
 'ACVR_1A2A_receptor',
 'IL23',
 'ACVR_1B2B_receptor',
 'integrin_aMb2_complex',
 'ACVR_1C2A_receptor',
 'ACVR_1A2B_receptor',
 'IL27']

For example, IL12 and IL12_receptor interaction is in the initial table but is not in the final, going to see if it made it into the expr table

In [70]:
for compl in list(complexes_curated.index):
    if 'OSMR' in compl:
        print(compl)
        print(complexes_curated.loc[compl,:])

OSMR
uniprot_1                                                           Q99650
uniprot_2                                                           P40189
uniprot_3                                                              NaN
uniprot_4                                                              NaN
transmembrane                                                         True
peripheral                                                           False
secreted                                                             False
secreted_desc                                                          NaN
secreted_highlight                                                   False
receptor                                                              True
receptor_desc                                 Cytokine_receptor_IL6_family
integrin                                                             False
other                                                                False
other_desc          

In [24]:
# database generated from 1.3K odd interactions
database_file = '/home/jovyan/notebooks/Vento_Lab/CVID/202009_new_analysis_revision/CITE_all_samples_analysis/CVID/scTranscriptomics_CITE/cellphonedb_analysis/database_20210218/cellphonedb_user_2021-02-18-14_26.db'

import sqlite3

def importdb(file_path):
    conn = sqlite3.connect(file_path)
    c = conn.cursor()
    c.execute("SELECT name FROM sqlite_master WHERE type='table';")
    for table in c.fetchall():
        yield list(c.execute('SELECT * from ?;', (table[0],)))

In [26]:
database = importdb(database_file)

In [30]:
database

AttributeError: 'generator' object has no attribute 'keys'