# Looking at the L/R interactinos enriched in particular pairs of cell types

CellphoneDB

Validation cohort
18.02.2021

This code uses DEGs computed for each cluster to identify relevant L/R interactions between the cells in a microenviroment

Code from Luz rewritten in python

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import itertools

In [2]:
# Define cutoff variables
filter_int_user_curated = True # Use only user_curated interactions?
per_cutoff = 0.1 # min % of cells in the cluster required  with expression > 0 for the gene
pval_cutoff = 0.05 # max adjusted p-value requeired to consider a gene as DEG

# as of 18.03.2021, not using the logFC cutoff at all! --> 0 here
logFC_cutoff = 0 # min logFC to consider a gene as DEG

## Load cellphone database


In [3]:
# Gene names
genes_cpDB = pd.read_csv('/home/jovyan/notebooks/Vento_Lab/CVID/202009_new_analysis_revision/CITE_all_samples_analysis/CVID/scTranscriptomics_CITE/cellphonedb_analysis/hsa_uniprot.txt', 
                         sep = '\t')

In [4]:
genes_cpDB

Unnamed: 0,uniprot,Entry,gene_name
0,P01611,KVD12_HUMAN,IGKV1D-12
1,P01615,KVD28_HUMAN,IGKV2D-28
2,Q15334,L2GL1_HUMAN,LLGL1
3,Q6ZP29,LAAT1_HUMAN,PQLC2
4,Q9GZZ8,LACRT_HUMAN,LACRT
...,...,...,...
20311,Q9H900,ZWILC_HUMAN,ZWILCH
20312,P98169,ZXDB_HUMAN,ZXDB
20313,Q2QGD7,ZXDC_HUMAN,ZXDC
20314,Q15942,ZYX_HUMAN,ZYX


In [5]:
# Complexes members
com_cpDB = pd.read_csv('/home/jovyan/notebooks/Vento_Lab/CVID/202009_new_analysis_revision/CITE_all_samples_analysis/CVID/scTranscriptomics_CITE/cellphonedb_analysis/database_20210218/complex_generated.csv')
com_cpDB['complex_name'] = ['complex:' + complex_name for complex_name in com_cpDB['complex_name']]

In [6]:
com_cpDB

Unnamed: 0,complex_name,uniprot_1,uniprot_2,uniprot_3,uniprot_4,transmembrane,peripheral,secreted,secreted_desc,secreted_highlight,receptor,receptor_desc,integrin,other,other_desc,pdb_id,pdb_structure,stoichiometry,comments_complex
0,complex:contactin complex II,Q12860,Q92823,,,True,False,False,,False,False,,False,False,,,FALSE,,NRCAM bind in cis and in trans to contactin-1
1,complex:IL6 receptor,P08887,P40189,,,True,False,False,,False,True,Cytokine receptor IL6 family,False,False,,1p9m,binding,IL6;IL6;IL6R;IL6R;IL6ST;IL6ST,Signal activation necessitate an association w...
2,complex:AT8B4CC50B complex,Q8TF62,Q3MIR4,,,True,False,False,,False,False,,False,False,,,FALSE,,Interacts with beta subunits TMEM30A and TMEM30B
3,complex:KCNV1KCNB2 complex,Q6PIU1,Q92953,,,True,False,False,,False,False,,False,False,,,FALSE,,Has to be associated with another potassium ch...
4,complex:LRFN3LRFN5 complex,Q9BTN0,Q96NI6,,,True,False,False,,False,False,,False,False,,,FALSE,,"Can form heteromeric complexes with LRFN1, LRF..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
614,complex:FZD8_LRP6,O75581,Q9H461,,,True,False,False,,False,False,,False,False,,,False,,
615,complex:FZD9_LRP5,O75197,O00144,,,True,False,False,,False,False,,False,False,,,False,,
616,complex:FZD9_LRP6,O75581,O00144,,,True,False,False,,False,False,,False,False,,,False,,
617,complex:FZD10_LRP5,O75197,Q9ULW2,,,True,False,False,,False,False,,False,False,,,False,,


In [7]:
#'complex:FZD8_LRP6'[8:]

In [8]:
#com_cpDB[(com_cpDB['complex_name'] == 'complex:IL6 receptor')].loc[:, ['uniprot_1', 'uniprot_2', 'uniprot_3', 'uniprot_4']].values

In [9]:
#genes_cpDB[genes_cpDB['uniprot'].isin(['P08887','P40189'])]['gene_name']

In [10]:
# Generate complexes2gene symbol dictionary
Com2Gene = {}


for complex_name in np.unique(com_cpDB['complex_name']):
    #print(complex_name)
    
    # getting rid of 'complex:' in the beginning
    #complex_name = complex_name[8:]
    #print(complex_name)
    
    curr_complex_proteins = list(com_cpDB[(com_cpDB['complex_name'] == complex_name)].loc[:, ['uniprot_1', 'uniprot_2', 'uniprot_3', 'uniprot_4']].values.tolist())
    # flatten list 
    curr_complex_proteins = [item for sublist in curr_complex_proteins for item in sublist]
    #print(curr_complex_proteins)
    # remove nans
    curr_complex_proteins = [x for x in curr_complex_proteins if str(x) != 'nan']
    #print('after removing nans:', curr_complex_proteins)
    
    # getting corresponding gene names from the gene table
    Com2Gene[complex_name] = list(genes_cpDB[genes_cpDB['uniprot'].isin(curr_complex_proteins)]['gene_name'])
    

In [11]:
list(Com2Gene.items())[:10]

[('complex:12oxoLeukotrieneB4_byPTGR1', ['PTGR1']),
 ('complex:17aHydroxyprogesterone_byCYP17A1', ['CYP17A1']),
 ('complex:22Hydroxycholesterol_byCYP11A1', ['CYP11A1']),
 ('complex:22Hydroxycholesterol_byCYP3A4', ['CYP3A4']),
 ('complex:2arachidonoylglycerol_byDAGLA', ['DAGLA']),
 ('complex:2arachidonoylglycerol_byDAGLB', ['DAGLB']),
 ('complex:5-alpha-Dihydroprogesterone_byDHRS9', ['DHRS9']),
 ('complex:5HT3C5HT3A complex', ['HTR3A', 'HTR3C']),
 ('complex:5HT3C5HT3A_complex', ['HTR3A', 'HTR3C']),
 ('complex:5HT3D receptor', ['HTR3A', 'HTR3D'])]

In [12]:
# Load interactions from cellphoneDB/out/means.txt output file                   
int_cpDB = pd.read_csv('/home/jovyan/notebooks/Vento_Lab/CVID/202009_new_analysis_revision/CITE_all_samples_analysis/CVID/scTranscriptomics_CITE/cellphonedb_analysis/out/means.txt',
                      sep='\t')

# disregarding pairwise average expression values
int_cpDB = int_cpDB.loc[:, list(int_cpDB.columns)[:11]]
int_cpDB

Unnamed: 0,id_cp_interaction,interacting_pair,partner_a,partner_b,gene_a,gene_b,secreted,receptor_a,receptor_b,annotation_strategy,is_integrin
0,CPI-SS0A7B487D4,KLRG2_WNT11,simple:A4D1S0,simple:O96014,KLRG2,WNT11,True,True,False,InnateDB-All,False
1,CPI-CS0481C1F9A,FZD1_LRP5_WNT11,complex:FZD1_LRP5,simple:O96014,,WNT11,True,False,False,user_curated,False
2,CPI-CS0F29C6285,FZD1_LRP6_WNT11,complex:FZD1_LRP6,simple:O96014,,WNT11,True,False,False,user_curated,False
3,CPI-CS0372FC240,FZD2_LRP5_WNT11,complex:FZD2_LRP5,simple:O96014,,WNT11,True,False,False,user_curated,False
4,CPI-CS031A2034E,FZD2_LRP6_WNT11,complex:FZD2_LRP6,simple:O96014,,WNT11,True,False,False,user_curated,False
...,...,...,...,...,...,...,...,...,...,...,...
1351,CPI-SC047CEF2DD,CRLF2_TSLPR,simple:Q9HC73,complex:TSLPR,CRLF2,,True,True,True,user_curated,False
1352,CPI-SS04C672963,ESAM_ESAM,simple:Q96AP7,simple:Q96AP7,ESAM,ESAM,False,False,False,user_curated,False
1353,CPI-SC001AFA16D,NRTN_RET receptor 2,simple:Q99748,complex:RET receptor 2,NRTN,,True,False,True,curated,False
1354,CPI-SC060C69786,NRTN_RET_receptor_2,simple:Q99748,complex:RET_receptor_2,NRTN,,True,False,True,user_curated,False


In [13]:
np.unique(int_cpDB['annotation_strategy'], return_counts=True)

(array(['I2D', 'I2D,IMEx,InnateDB,InnateDB-All,IntAct,MINT',
        'I2D,IMEx,InnateDB,IntAct', 'I2D,IMEx,InnateDB-All,IntAct',
        'I2D,IMEx,InnateDB-All,IntAct,MINT', 'I2D,IMEx,InnateDB-All,MINT',
        'I2D,InnateDB', 'I2D,InnateDB-All', 'I2D,InnateDB-All,IntAct',
        'I2D,IntAct', 'IMEx', 'IMEx,InnateDB-All,IntAct',
        'IMEx,InnateDB-All,IntAct,MatrixDB', 'IMEx,InnateDB-All,MINT',
        'IMEx,InnateDB-All,UniProt', 'IMEx,IntAct', 'IMEx,MINT',
        'InnateDB', 'InnateDB-All', 'InnateDB-All,MINT', 'curated',
        'guidetopharmacology.org', 'user_curated'], dtype=object),
 array([ 42,   1,   1,   4,   1,   1,   2,  21,   1,   2,   2,   4,   1,
          6,   1,  26,   2,   2,  67,   2, 279,  63, 825]))

In [14]:
# MANDATORY: remove "curated" because we have cleaned and renamed them (this is a long story, just do it)
# these interactions have either been renamed or excluded so best not to use them
int_cpDB = int_cpDB[int_cpDB['annotation_strategy'] != 'curated']

In [15]:
# OPTIONAL: Use only user_curated interactions?
if filter_int_user_curated:
    int_cpDB = int_cpDB[int_cpDB['annotation_strategy'] == 'user_curated']

In [16]:
int_cpDB

Unnamed: 0,id_cp_interaction,interacting_pair,partner_a,partner_b,gene_a,gene_b,secreted,receptor_a,receptor_b,annotation_strategy,is_integrin
1,CPI-CS0481C1F9A,FZD1_LRP5_WNT11,complex:FZD1_LRP5,simple:O96014,,WNT11,True,False,False,user_curated,False
2,CPI-CS0F29C6285,FZD1_LRP6_WNT11,complex:FZD1_LRP6,simple:O96014,,WNT11,True,False,False,user_curated,False
3,CPI-CS0372FC240,FZD2_LRP5_WNT11,complex:FZD2_LRP5,simple:O96014,,WNT11,True,False,False,user_curated,False
4,CPI-CS031A2034E,FZD2_LRP6_WNT11,complex:FZD2_LRP6,simple:O96014,,WNT11,True,False,False,user_curated,False
5,CPI-CS02643715E,FZD3_LRP5_WNT11,complex:FZD3_LRP5,simple:O96014,,WNT11,True,False,False,user_curated,False
...,...,...,...,...,...,...,...,...,...,...,...
1350,CPI-SC090068F7B,TSLP_TSLPR,simple:Q969D9,complex:TSLPR,TSLP,,True,False,True,user_curated,False
1351,CPI-SC047CEF2DD,CRLF2_TSLPR,simple:Q9HC73,complex:TSLPR,CRLF2,,True,True,True,user_curated,False
1352,CPI-SS04C672963,ESAM_ESAM,simple:Q96AP7,simple:Q96AP7,ESAM,ESAM,False,False,False,user_curated,False
1354,CPI-SC060C69786,NRTN_RET_receptor_2,simple:Q99748,complex:RET_receptor_2,NRTN,,True,False,True,user_curated,False


In [17]:
int_cpDB.loc[1,:]

id_cp_interaction        CPI-CS0481C1F9A
interacting_pair         FZD1_LRP5_WNT11
partner_a              complex:FZD1_LRP5
partner_b                  simple:O96014
gene_a                               NaN
gene_b                             WNT11
secreted                            True
receptor_a                         False
receptor_b                         False
annotation_strategy         user_curated
is_integrin                        False
Name: 1, dtype: object

In [18]:
'complex:FZD1_LRP5' in list(Com2Gene.keys())

True

In [19]:
list(Com2Gene.keys())[:5]

['complex:12oxoLeukotrieneB4_byPTGR1',
 'complex:17aHydroxyprogesterone_byCYP17A1',
 'complex:22Hydroxycholesterol_byCYP11A1',
 'complex:22Hydroxycholesterol_byCYP3A4',
 'complex:2arachidonoylglycerol_byDAGLA']

In [20]:
'complex:FZD1_LRP5' in list(com_cpDB['complex_name'])

True

In [21]:
# Generate Int2Gene dictionary
Int2Gene = {}

for i in int_cpDB.index:
    #print('row number:', i)
    curr_df_row = int_cpDB.loc[i,:]
    #print('row:', curr_df_row)
    
    # if partnerA is complex (aka np.isnan(table['gene_a']) == True), then retrieve members from dictionary
    # complex name will be in table['partner_a']
    if str(curr_df_row['gene_a']) == 'nan':
        partner_A = Com2Gene[curr_df_row['partner_a']]
    else:
        # if it's not a complex, then get the partner A from table['gene_a']
        partner_A = [curr_df_row['gene_a']]
        
    # if partnerB is complex, then retrieve members from dictionary
    # complex name will be in table['partner_b']
    if str(curr_df_row['gene_b']) == 'nan':
        partner_B = Com2Gene[curr_df_row['partner_b']]
    else:
        # if it's not a complex, then get the partner B from table['gene_b']
        partner_B = [curr_df_row['gene_b']]
        
    interaction_id = curr_df_row['interacting_pair']

    Int2Gene[interaction_id] = {'partner_a': partner_A,
                  'partner_b': partner_B}

In [22]:
list(Int2Gene.items())[:10]

[('FZD1_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD1'], 'partner_b': ['WNT11']}),
 ('FZD1_LRP6_WNT11', {'partner_a': ['FZD1', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD2_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD2'], 'partner_b': ['WNT11']}),
 ('FZD2_LRP6_WNT11', {'partner_a': ['FZD2', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD3_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD3'], 'partner_b': ['WNT11']}),
 ('FZD3_LRP6_WNT11', {'partner_a': ['FZD3', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD4_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD4'], 'partner_b': ['WNT11']}),
 ('FZD4_LRP6_WNT11', {'partner_a': ['FZD4', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD5_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD5'], 'partner_b': ['WNT11']}),
 ('FZD5_LRP6_WNT11', {'partner_a': ['FZD5', 'LRP6'], 'partner_b': ['WNT11']})]

## Load cluster's gene percentage expression

Prepared in S2 notebook

In [23]:
# Load percentage expression info
# Matrix of genes (rows) per celltypes (columns) containing the proportion [0-1] of cells 
# in a celltype expressing the gene
path_Exp = '/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/PercentExpressed_for_cellphone_20210218.csv'

# Load matrix
Per_df = pd.read_csv(path_Exp, index_col=0) 

# Dictionary of celltypes2expressed genes
genes_expr_per_cell_type = {} 

for ct in Per_df.columns:
    print(ct)
    curr_table = pd.DataFrame(Per_df.loc[:, ct])
    
    # only leave in genes expressed in this cell type according to a threshold declared in the beginnning of this notebook
    mask = (curr_table[ct] > per_cutoff)
    genes_expr_per_cell_type[ct] = list(curr_table[ct][mask].index)

B_cells_memory
B_cells_memory_activated
B_cells_naive
B_cells_naive_activated
MAIT_cells
Macrophages
Monocytes_classical
Monocytes_intermediate
Monocytes_non-classical
NK_CD16_bright
NK_CD16_bright_activated
NK_CD56_bright
NK_CD56_bright_activated
Plasma_cells
Precursor_cells
T4_activated
T4_memory
T4_naive
T8_activated
T8_naive
TCM_CD8+
TEM_CD8+
TMRA_CD8+
T_gd
T_regs
cDC1
cDC2
iNKT_cells
pDC


In [24]:
Per_df

Unnamed: 0_level_0,B_cells_memory,B_cells_memory_activated,B_cells_naive,B_cells_naive_activated,MAIT_cells,Macrophages,Monocytes_classical,Monocytes_intermediate,Monocytes_non-classical,NK_CD16_bright,...,T8_naive,TCM_CD8+,TEM_CD8+,TMRA_CD8+,T_gd,T_regs,cDC1,cDC2,iNKT_cells,pDC
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
RP11-34P13.7,0.000000,0.000378,0.000590,0.000000,0.000000,0.000526,0.000183,0.000238,0.000000,0.000000,...,0.000321,0.000305,0.000000,0.000000,0.000721,0.000139,0.000847,0.000000,0.000177,0.000000
FO538757.2,0.118336,0.242155,0.086777,0.072884,0.071611,0.256835,0.220497,0.168689,0.205003,0.065168,...,0.068678,0.102259,0.056984,0.076870,0.075703,0.166805,0.197018,0.099831,0.091022,0.067669
AP006222.2,0.001951,0.003403,0.000590,0.000784,0.000853,0.006046,0.004202,0.004759,0.002441,0.000378,...,0.000642,0.001221,0.000485,0.000693,0.000721,0.001249,0.002710,0.000000,0.000532,0.000000
RP4-669L17.10,0.001951,0.002836,0.002361,0.000784,0.000853,0.001577,0.000365,0.000714,0.002441,0.000189,...,0.000963,0.000305,0.000485,0.002078,0.002163,0.001804,0.001355,0.000000,0.001065,0.000000
RP5-857K21.4,0.000650,0.000000,0.000000,0.000000,0.000000,0.000000,0.000548,0.000000,0.000000,0.000000,...,0.000321,0.000305,0.000000,0.000000,0.000000,0.000278,0.000339,0.000000,0.000177,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CTD-2541M15.3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
THEGL,0.000000,0.000000,0.000000,0.000000,0.000000,0.000263,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000169,0.000000,0.000000,0.000000
KIAA1644,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000610,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000508,0.006768,0.000000,0.000000
RP11-132A1.3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000139,0.000000,0.000000,0.000355,0.000000


In [25]:
len(genes_expr_per_cell_type['B_cells_memory'])

4038

## Load DE expression info

In [26]:
path_DE = '/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/joint_DEGs_list_all_cell_types_for_cellphone_20210218.csv'
DE_df = pd.read_csv(path_DE, #row.names = 0
                )

DE_df


Unnamed: 0.1,Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_cluster,AveExpr_rest,percentExpr_cluster,percentExpr_rest,cluster
0,0,CD73(Ecto-5'-nucleotidase),-0.972167,1.004392e-34,1.603813e-30,1.075824,2.047991,0.532946,0.735178,B_cells_memory
1,1,IgD,0.892972,2.104098e-32,1.679912e-28,2.001557,1.108585,0.783915,0.565217,B_cells_memory
2,2,CD32,0.858412,1.258348e-22,6.697768e-19,4.160845,3.302433,0.936047,0.849802,B_cells_memory
3,3,IGHG3,0.405874,3.555500e-22,1.419356e-18,0.599779,0.193905,0.371124,0.128458,B_cells_memory
4,4,IGHM,0.439509,9.379230e-17,2.995351e-13,1.090960,0.651451,0.611434,0.371542,B_cells_memory
...,...,...,...,...,...,...,...,...,...,...
465692,11554,ABCF3,-0.000026,9.993354e-01,9.996097e-01,0.024162,0.024187,0.014085,0.032258,pDC
465693,11555,DDX31,0.000032,9.993502e-01,9.996097e-01,0.038317,0.038285,0.028169,0.032258,pDC
465694,11556,CACUL1,0.000019,9.997470e-01,9.998807e-01,0.087572,0.087553,0.056338,0.080645,pDC
465695,11557,NUCB1,-0.000024,9.998530e-01,9.998807e-01,0.490545,0.490569,0.323944,0.338710,pDC


In [28]:
'PTPRC' in list(DE_df['Gene'])

True

In [29]:
logFC_cutoff

0

In [30]:
pval_cutoff

0.05

In [31]:
per_cutoff

0.1

In [32]:
# filter the DE table according to cutoffs declared in the beginning of the notebook
# separately for upreg and downreg genes

DE_df = DE_df[(abs(DE_df['logFC']) > logFC_cutoff)
             & (DE_df['adj.P.Val'] < pval_cutoff)
             & (DE_df['percentExpr_cluster'] > per_cutoff)]

DE_df_upreg = DE_df[DE_df['logFC'] > 0]
DE_df_downreg = DE_df[DE_df['logFC'] < 0]

In [33]:
# without logFC filtering at all
print(DE_df.shape)
print(DE_df_upreg.shape)
print(DE_df_downreg.shape)

(11027, 10)
(8299, 10)
(2728, 10)


In [34]:
np.unique(DE_df_upreg['cluster'])

array(['B_cells_memory', 'B_cells_memory_activated', 'B_cells_naive',
       'B_cells_naive_activated', 'MAIT_cells', 'Macrophages',
       'Monocytes_classical', 'Monocytes_intermediate',
       'Monocytes_non-classical', 'NK_CD16_bright',
       'NK_CD16_bright_activated', 'NK_CD56_bright_activated',
       'Plasma_cells', 'Precursor_cells', 'T4_activated', 'T4_memory',
       'T4_naive', 'T8_activated', 'T8_naive', 'TCM_CD8+', 'TEM_CD8+',
       'TMRA_CD8+', 'T_gd', 'T_regs', 'cDC1', 'cDC2', 'iNKT_cells'],
      dtype=object)

In [35]:
np.unique(DE_df_downreg['cluster'])

array(['B_cells_memory', 'B_cells_memory_activated', 'B_cells_naive',
       'B_cells_naive_activated', 'MAIT_cells', 'Macrophages',
       'Monocytes_classical', 'Monocytes_intermediate',
       'Monocytes_non-classical', 'NK_CD16_bright',
       'NK_CD16_bright_activated', 'NK_CD56_bright_activated',
       'Plasma_cells', 'T4_activated', 'T4_memory', 'T4_naive',
       'T8_activated', 'T8_naive', 'TCM_CD8+', 'TEM_CD8+', 'TMRA_CD8+',
       'T_gd', 'T_regs', 'cDC1', 'iNKT_cells'], dtype=object)

In [36]:
# Build dictionary clusters2DE_genes
# separately for upreg and downreg genes

clusters_upreg = list(np.unique(DE_df_upreg['cluster']))
clusters_downreg = list(np.unique(DE_df_downreg['cluster']))

is_DE_upreg = {}
is_DE_downreg = {}

for cluster in clusters_upreg:
    is_DE_upreg[cluster] = list(DE_df_upreg[DE_df_upreg['cluster'] == cluster]['Gene'])
    
for cluster in clusters_downreg:
    is_DE_downreg[cluster] = list(DE_df_downreg[DE_df_downreg['cluster'] == cluster]['Gene'])

In [37]:
len(is_DE_upreg['B_cells_memory_activated'])

193

In [38]:
'CD40' in is_DE_downreg['B_cells_memory_activated']

True

In [39]:
len(is_DE_downreg['B_cells_memory'])

15

In [40]:
for ct in list(is_DE_upreg.keys()):
    print(ct)
    print(len(is_DE_upreg[ct]), '\n')

B_cells_memory
77 

B_cells_memory_activated
193 

B_cells_naive
22 

B_cells_naive_activated
9 

MAIT_cells
6 

Macrophages
435 

Monocytes_classical
399 

Monocytes_intermediate
126 

Monocytes_non-classical
93 

NK_CD16_bright
89 

NK_CD16_bright_activated
21 

NK_CD56_bright_activated
1 

Plasma_cells
10 

Precursor_cells
1 

T4_activated
1144 

T4_memory
227 

T4_naive
67 

T8_activated
430 

T8_naive
28 

TCM_CD8+
226 

TEM_CD8+
57 

TMRA_CD8+
49 

T_gd
11 

T_regs
1226 

cDC1
280 

cDC2
3 

iNKT_cells
3069 



In [41]:
for ct in list(is_DE_downreg.keys()):
    print(ct)
    print(len(is_DE_downreg[ct]), '\n')

B_cells_memory
15 

B_cells_memory_activated
529 

B_cells_naive
8 

B_cells_naive_activated
8 

MAIT_cells
7 

Macrophages
131 

Monocytes_classical
113 

Monocytes_intermediate
140 

Monocytes_non-classical
85 

NK_CD16_bright
157 

NK_CD16_bright_activated
9 

NK_CD56_bright_activated
1 

Plasma_cells
5 

T4_activated
162 

T4_memory
19 

T4_naive
17 

T8_activated
867 

T8_naive
7 

TCM_CD8+
15 

TEM_CD8+
16 

TMRA_CD8+
6 

T_gd
9 

T_regs
34 

cDC1
227 

iNKT_cells
141 



In [42]:
is_DE_upreg['B_cells_naive_activated']

['CD32',
 'MT-ND6',
 'IgD',
 'RPS4Y1',
 'CD82-1',
 'MT-CO3',
 'CD45RA',
 'CD1c',
 'CD19-1']

In [43]:
is_DE_downreg['B_cells_naive_activated']

["CD73(Ecto-5'-nucleotidase)",
 'HLA-DQB1',
 'RPS10',
 'ARID5B',
 'CD38-1',
 'HLA-B',
 'HLA-DQA1',
 'DUSP2']

## Define cell pairs to test

In [44]:
len(list(genes_expr_per_cell_type.keys()))

29

In [45]:
list(genes_expr_per_cell_type.keys())

['B_cells_memory',
 'B_cells_memory_activated',
 'B_cells_naive',
 'B_cells_naive_activated',
 'MAIT_cells',
 'Macrophages',
 'Monocytes_classical',
 'Monocytes_intermediate',
 'Monocytes_non-classical',
 'NK_CD16_bright',
 'NK_CD16_bright_activated',
 'NK_CD56_bright',
 'NK_CD56_bright_activated',
 'Plasma_cells',
 'Precursor_cells',
 'T4_activated',
 'T4_memory',
 'T4_naive',
 'T8_activated',
 'T8_naive',
 'TCM_CD8+',
 'TEM_CD8+',
 'TMRA_CD8+',
 'T_gd',
 'T_regs',
 'cDC1',
 'cDC2',
 'iNKT_cells',
 'pDC']

In [46]:
# Get all pairwise cluster combinations: A--B, B--A
# Consider the reverse interaction too: A--B but B--A as well
pairwise_cluster_combinations = list(itertools.permutations(list(genes_expr_per_cell_type.keys()), 2))
len(pairwise_cluster_combinations)


812

In [47]:
pairwise_cluster_combinations[:5]

[('B_cells_memory', 'B_cells_memory_activated'),
 ('B_cells_memory', 'B_cells_naive'),
 ('B_cells_memory', 'B_cells_naive_activated'),
 ('B_cells_memory', 'MAIT_cells'),
 ('B_cells_memory', 'Macrophages')]

In [48]:
# add self interactions
self_inter_combinations = [(ct, ct) for ct in list(genes_expr_per_cell_type.keys())]
pairwise_cluster_combinations = pairwise_cluster_combinations + self_inter_combinations
len(pairwise_cluster_combinations)


841

In [49]:
len(is_DE_upreg.keys())

27

In [50]:
len(is_DE_downreg.keys())

25

In [51]:
# We only want to test pairs including at least one celltype in the DE folder

pairwise_cluster_combinations_upreg = [elem for elem in pairwise_cluster_combinations if elem[0] in list(is_DE_upreg.keys()) and elem[1] in list(is_DE_upreg.keys())]
pairwise_cluster_combinations_downreg = [elem for elem in pairwise_cluster_combinations if elem[0] in list(is_DE_downreg.keys()) and elem[1] in list(is_DE_downreg.keys())]

In [52]:
len(pairwise_cluster_combinations_upreg)

729

In [53]:
len(pairwise_cluster_combinations_downreg)

625

In [54]:
# Make cluster pair labels: celltypeA--celltypeB             
cluster_combinations_labels_upreg = [comb[0] + '---' + comb[1] for comb in pairwise_cluster_combinations_upreg]
cluster_combinations_labels_downreg = [comb[0] + '---' + comb[1] for comb in pairwise_cluster_combinations_downreg]

In [55]:
len(cluster_combinations_labels_upreg)

729

In [56]:
len(cluster_combinations_labels_downreg)

625

# Retrieve CellphoneDB L/R interactions

A relevant interaction shoudl have

1. All their participants expressed in the corresponding celltypes
2. At least one participant is a DEG

In [57]:
len(Int2Gene.keys())

825

In [58]:
# Make scaffold matrix: L/R interactions (rows) x celltype pairs (columns)
df_Exrp_LR_in_celltype_pairs_upreg = pd.DataFrame(index = list(Int2Gene.keys()),
                                          columns = cluster_combinations_labels_upreg,
                                          data = np.zeros( (len(list(Int2Gene.keys())), 
                                                            len(cluster_combinations_labels_upreg)) )
                                         )

df_Exrp_LR_in_celltype_pairs_downreg = pd.DataFrame(index = list(Int2Gene.keys()),
                                          columns = cluster_combinations_labels_downreg,
                                          data = np.zeros( (len(list(Int2Gene.keys())), 
                                                            len(cluster_combinations_labels_downreg)) )
                                         )

In [59]:
df_Exrp_LR_in_celltype_pairs_upreg

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---B_cells_naive,B_cells_memory---B_cells_naive_activated,B_cells_memory---MAIT_cells,B_cells_memory---Macrophages,B_cells_memory---Monocytes_classical,B_cells_memory---Monocytes_intermediate,B_cells_memory---Monocytes_non-classical,B_cells_memory---NK_CD16_bright,B_cells_memory---NK_CD16_bright_activated,...,T8_activated---T8_activated,T8_naive---T8_naive,TCM_CD8+---TCM_CD8+,TEM_CD8+---TEM_CD8+,TMRA_CD8+---TMRA_CD8+,T_gd---T_gd,T_regs---T_regs,cDC1---cDC1,cDC2---cDC2,iNKT_cells---iNKT_cells
FZD1_LRP5_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD1_LRP6_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD2_LRP5_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD2_LRP6_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD3_LRP5_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TSLP_TSLPR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CRLF2_TSLPR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ESAM_ESAM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NRTN_RET_receptor_2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [60]:
df_Exrp_LR_in_celltype_pairs_downreg

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---B_cells_naive,B_cells_memory---B_cells_naive_activated,B_cells_memory---MAIT_cells,B_cells_memory---Macrophages,B_cells_memory---Monocytes_classical,B_cells_memory---Monocytes_intermediate,B_cells_memory---Monocytes_non-classical,B_cells_memory---NK_CD16_bright,B_cells_memory---NK_CD16_bright_activated,...,T4_naive---T4_naive,T8_activated---T8_activated,T8_naive---T8_naive,TCM_CD8+---TCM_CD8+,TEM_CD8+---TEM_CD8+,TMRA_CD8+---TMRA_CD8+,T_gd---T_gd,T_regs---T_regs,cDC1---cDC1,iNKT_cells---iNKT_cells
FZD1_LRP5_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD1_LRP6_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD2_LRP5_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD2_LRP6_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD3_LRP5_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TSLP_TSLPR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CRLF2_TSLPR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ESAM_ESAM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NRTN_RET_receptor_2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [61]:
%%time

# LR_pairs_celltype_pairs_df will be a binary matrix with 1 indicating that all the genes in the interaction are expressed in the corresponding celltype
# So, fill 1 if all genes are expressed in all clusters

for interaction in list(df_Exrp_LR_in_celltype_pairs_upreg.index):
    #print('interaction', interaction, list(df_Exrp_LR_in_celltype_pairs_upreg.index).index(interaction)+1, 
    #      'out of', len(list(df_Exrp_LR_in_celltype_pairs_upreg.index)))
    for ct_pair in list(df_Exrp_LR_in_celltype_pairs_upreg.columns):
        #print(ct_pair)
        
        ct_A = ct_pair.split('---')[0]
        ct_B = ct_pair.split('---')[1]
        
        partner_A_genes = Int2Gene[interaction]['partner_a']
        partner_B_genes = Int2Gene[interaction]['partner_b']
        
        # are all partner_A genes expressed in celltype_A and are all partner_B genes expressed in celltype_B?
        are_all_expressed = all(elem in genes_expr_per_cell_type[ct_A] for elem in partner_A_genes) & all(elem in genes_expr_per_cell_type[ct_B] for elem in partner_B_genes)
        
        if are_all_expressed:
            df_Exrp_LR_in_celltype_pairs_upreg.loc[interaction, ct_pair] = 1


CPU times: user 1min 11s, sys: 0 ns, total: 1min 11s
Wall time: 1min 11s


In [62]:
%%time

# LR_pairs_celltype_pairs_df will be a binary matrix with 1 indicating that all the genes in the interaction are expressed in the corresponding celltype
# So, fill 1 if all genes are expressed in all clusters

for interaction in list(df_Exrp_LR_in_celltype_pairs_downreg.index):
    #print('interaction', interaction, list(df_Exrp_LR_in_celltype_pairs_downreg.index).index(interaction)+1, 
    #      'out of', len(list(df_Exrp_LR_in_celltype_pairs_downreg.index)))
    for ct_pair in list(df_Exrp_LR_in_celltype_pairs_downreg.columns):
        #print(ct_pair)
        
        ct_A = ct_pair.split('---')[0]
        ct_B = ct_pair.split('---')[1]
        
        partner_A_genes = Int2Gene[interaction]['partner_a']
        partner_B_genes = Int2Gene[interaction]['partner_b']
        
        # are all partner_A genes expressed in celltype_A and are all partner_B genes expressed in celltype_B?
        are_all_expressed = all(elem in genes_expr_per_cell_type[ct_A] for elem in partner_A_genes) & all(elem in genes_expr_per_cell_type[ct_B] for elem in partner_B_genes)
        
        if are_all_expressed:
            df_Exrp_LR_in_celltype_pairs_downreg.loc[interaction, ct_pair] = 1


CPU times: user 1min, sys: 0 ns, total: 1min
Wall time: 1min


In [63]:
np.unique(df_Exrp_LR_in_celltype_pairs_upreg.values, return_counts=True)

(array([0., 1.]), array([590186,  11239]))

In [64]:
np.unique(df_Exrp_LR_in_celltype_pairs_downreg.values, return_counts=True)

(array([0., 1.]), array([506043,   9582]))

In [65]:
np.unique(df_Exrp_LR_in_celltype_pairs_upreg.sum(axis=0))

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
       27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39.,
       40.])

In [66]:
np.unique(df_Exrp_LR_in_celltype_pairs_downreg.sum(axis=0))

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
       27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39.,
       40.])

In [67]:
df_Exrp_LR_in_celltype_pairs_upreg.shape

(825, 729)

In [68]:
df_Exrp_LR_in_celltype_pairs_downreg.shape

(825, 625)

In [69]:
# keep celltype pairs with at least one expressed interaction

df_Exrp_LR_in_celltype_pairs_upreg = df_Exrp_LR_in_celltype_pairs_upreg.loc[:, (df_Exrp_LR_in_celltype_pairs_upreg != 0).any(axis=0)]
print(df_Exrp_LR_in_celltype_pairs_upreg.shape)

df_Exrp_LR_in_celltype_pairs_downreg = df_Exrp_LR_in_celltype_pairs_downreg.loc[:, (df_Exrp_LR_in_celltype_pairs_downreg != 0).any(axis=0)]
print(df_Exrp_LR_in_celltype_pairs_downreg.shape)

(825, 729)
(825, 625)


In [70]:
# keep interactions with at least one celltype_pair
df_Exrp_LR_in_celltype_pairs_upreg = df_Exrp_LR_in_celltype_pairs_upreg.loc[(df_Exrp_LR_in_celltype_pairs_upreg != 0).any(axis=1), :]
print(df_Exrp_LR_in_celltype_pairs_upreg.shape)

df_Exrp_LR_in_celltype_pairs_downreg = df_Exrp_LR_in_celltype_pairs_downreg.loc[(df_Exrp_LR_in_celltype_pairs_downreg != 0).any(axis=1), :]
print(df_Exrp_LR_in_celltype_pairs_downreg.shape)

(125, 729)
(113, 625)


In [71]:
df_Exrp_LR_in_celltype_pairs_downreg.columns

Index(['B_cells_memory---B_cells_memory_activated',
       'B_cells_memory---B_cells_naive',
       'B_cells_memory---B_cells_naive_activated',
       'B_cells_memory---MAIT_cells', 'B_cells_memory---Macrophages',
       'B_cells_memory---Monocytes_classical',
       'B_cells_memory---Monocytes_intermediate',
       'B_cells_memory---Monocytes_non-classical',
       'B_cells_memory---NK_CD16_bright',
       'B_cells_memory---NK_CD16_bright_activated',
       ...
       'T4_naive---T4_naive', 'T8_activated---T8_activated',
       'T8_naive---T8_naive', 'TCM_CD8+---TCM_CD8+', 'TEM_CD8+---TEM_CD8+',
       'TMRA_CD8+---TMRA_CD8+', 'T_gd---T_gd', 'T_regs---T_regs',
       'cDC1---cDC1', 'iNKT_cells---iNKT_cells'],
      dtype='object', length=625)

In [72]:
df_Exrp_LR_in_celltype_pairs_downreg.loc[:,'B_cells_memory---B_cells_naive_activated']

NOTCH1_DLL3           0.0
NOTCH2_DLL3           0.0
SIRPA_CD47            0.0
LGALS9_HAVCR2         0.0
CADM1_CADM1           0.0
                     ... 
LTBR_LTB              0.0
CCR4_CCL17            0.0
CD47_SIRB1_complex    0.0
LAIR1_LILRB4          0.0
CLEC2B_KLRF1          0.0
Name: B_cells_memory---B_cells_naive_activated, Length: 113, dtype: float64

In [73]:
df_Exrp_LR_in_celltype_pairs_downreg

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---B_cells_naive,B_cells_memory---B_cells_naive_activated,B_cells_memory---MAIT_cells,B_cells_memory---Macrophages,B_cells_memory---Monocytes_classical,B_cells_memory---Monocytes_intermediate,B_cells_memory---Monocytes_non-classical,B_cells_memory---NK_CD16_bright,B_cells_memory---NK_CD16_bright_activated,...,T4_naive---T4_naive,T8_activated---T8_activated,T8_naive---T8_naive,TCM_CD8+---TCM_CD8+,TEM_CD8+---TEM_CD8+,TMRA_CD8+---TMRA_CD8+,T_gd---T_gd,T_regs---T_regs,cDC1---cDC1,iNKT_cells---iNKT_cells
NOTCH1_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NOTCH2_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
SIRPA_CD47,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
LGALS9_HAVCR2,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
CADM1_CADM1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LTBR_LTB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CCR4_CCL17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
CD47_SIRB1_complex,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
LAIR1_LILRB4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [74]:
list(df_Exrp_LR_in_celltype_pairs_downreg.index) == list(df_Exrp_LR_in_celltype_pairs_upreg.index)

False

In [75]:
np.unique(df_Exrp_LR_in_celltype_pairs_upreg.values, return_counts=True)

(array([0., 1.]), array([79886, 11239]))

In [76]:
np.unique(df_Exrp_LR_in_celltype_pairs_downreg.values, return_counts=True)

(array([0., 1.]), array([61043,  9582]))

In [77]:
np.sum(df_Exrp_LR_in_celltype_pairs_upreg.values)

11239.0

In [78]:
np.sum(df_Exrp_LR_in_celltype_pairs_downreg.values)

9582.0

In [79]:
# Initialize DE matrix from LR_pairs_celltype_pairs_df and set all values to 0
# DE will be a binary matrix with 1 indicating that all the genes in the interaction are expressed in the corresponding celltype
# and one is a DE in the celltypes of interests
# Make scaffold matrix: L/R interactions (rows) x celltype pairs (columns)
df_Exrp_LR_in_celltype_pairs_upreg_DE = pd.DataFrame(index = list(df_Exrp_LR_in_celltype_pairs_upreg.index),
                                          columns = list(df_Exrp_LR_in_celltype_pairs_upreg.columns),
                                            data = np.zeros( (len(list(df_Exrp_LR_in_celltype_pairs_upreg.index)), 
                                                            len(list(df_Exrp_LR_in_celltype_pairs_upreg.columns))))
                                            )
df_Exrp_LR_in_celltype_pairs_downreg_DE = pd.DataFrame(index = list(df_Exrp_LR_in_celltype_pairs_downreg.index),
                                          columns = list(df_Exrp_LR_in_celltype_pairs_downreg.columns),
                                            data = np.zeros( (len(list(df_Exrp_LR_in_celltype_pairs_downreg.index)), 
                                                            len(list(df_Exrp_LR_in_celltype_pairs_downreg.columns))))
                                            )

In [80]:
%%time
# So, fill 1 if at least one gene in the interaction is DE
for interaction in list(df_Exrp_LR_in_celltype_pairs_upreg_DE.index):
    #print('interaction', interaction, list(df_Exrp_LR_in_celltype_pairs_upreg_DE.index).index(interaction)+1, 
    #      'out of', len(list(df_Exrp_LR_in_celltype_pairs_upreg_DE.index)))
    for ct_pair in list(df_Exrp_LR_in_celltype_pairs_upreg_DE.columns):
        #print(ct_pair)
        
        ct_A = ct_pair.split('---')[0]
        ct_B = ct_pair.split('---')[1]
        
        partner_A_genes = Int2Gene[interaction]['partner_a']
        partner_B_genes = Int2Gene[interaction]['partner_b']
        
        # are partner_A genes DE in celltype_A OR are partner_B genes DE in celltype_B?
        are_any_DE = all(elem in is_DE_upreg[ct_A] for elem in partner_A_genes) | all(elem in is_DE_upreg[ct_B] for elem in partner_B_genes)  
        
        if are_any_DE & (df_Exrp_LR_in_celltype_pairs_upreg.loc[interaction, ct_pair] == 1):
            df_Exrp_LR_in_celltype_pairs_upreg_DE.loc[interaction, ct_pair] = 1

CPU times: user 2.62 s, sys: 0 ns, total: 2.62 s
Wall time: 2.62 s


In [81]:
%%time
# So, fill 1 if at least one gene in the interaction is DE
for interaction in list(df_Exrp_LR_in_celltype_pairs_downreg_DE.index):
    #print('interaction', interaction, list(df_Exrp_LR_in_celltype_pairs_downreg_DE.index).index(interaction)+1, 
    #      'out of', len(list(df_Exrp_LR_in_celltype_pairs_downreg_DE.index)))
    for ct_pair in list(df_Exrp_LR_in_celltype_pairs_downreg_DE.columns):
        #print(ct_pair)
        
        ct_A = ct_pair.split('---')[0]
        ct_B = ct_pair.split('---')[1]
        
        partner_A_genes = Int2Gene[interaction]['partner_a']
        partner_B_genes = Int2Gene[interaction]['partner_b']
        
        # are partner_A genes DE in celltype_A OR are partner_B genes DE in celltype_B?
        are_any_DE = all(elem in is_DE_downreg[ct_A] for elem in partner_A_genes) | all(elem in is_DE_downreg[ct_B] for elem in partner_B_genes)  
        
        if are_any_DE & (df_Exrp_LR_in_celltype_pairs_downreg.loc[interaction, ct_pair] == 1):
            df_Exrp_LR_in_celltype_pairs_downreg_DE.loc[interaction, ct_pair] = 1

CPU times: user 1.49 s, sys: 0 ns, total: 1.49 s
Wall time: 1.49 s


In [82]:
df_Exrp_LR_in_celltype_pairs_upreg_DE

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---B_cells_naive,B_cells_memory---B_cells_naive_activated,B_cells_memory---MAIT_cells,B_cells_memory---Macrophages,B_cells_memory---Monocytes_classical,B_cells_memory---Monocytes_intermediate,B_cells_memory---Monocytes_non-classical,B_cells_memory---NK_CD16_bright,B_cells_memory---NK_CD16_bright_activated,...,T8_activated---T8_activated,T8_naive---T8_naive,TCM_CD8+---TCM_CD8+,TEM_CD8+---TEM_CD8+,TMRA_CD8+---TMRA_CD8+,T_gd---T_gd,T_regs---T_regs,cDC1---cDC1,cDC2---cDC2,iNKT_cells---iNKT_cells
PVR_CD96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_CD226,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_TIGIT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NOTCH1_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NOTCH2_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CCR4_CCL17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
CD47_SIRB1_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LAIR1_LILRB4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CLEC2B_KLRF1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [83]:
df_Exrp_LR_in_celltype_pairs_downreg_DE

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---B_cells_naive,B_cells_memory---B_cells_naive_activated,B_cells_memory---MAIT_cells,B_cells_memory---Macrophages,B_cells_memory---Monocytes_classical,B_cells_memory---Monocytes_intermediate,B_cells_memory---Monocytes_non-classical,B_cells_memory---NK_CD16_bright,B_cells_memory---NK_CD16_bright_activated,...,T4_naive---T4_naive,T8_activated---T8_activated,T8_naive---T8_naive,TCM_CD8+---TCM_CD8+,TEM_CD8+---TEM_CD8+,TMRA_CD8+---TMRA_CD8+,T_gd---T_gd,T_regs---T_regs,cDC1---cDC1,iNKT_cells---iNKT_cells
NOTCH1_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NOTCH2_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
SIRPA_CD47,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
LGALS9_HAVCR2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CADM1_CADM1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LTBR_LTB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CCR4_CCL17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRB1_complex,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
LAIR1_LILRB4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [84]:
# UPREG interactions

# keep interactions expressed in at least one celltype pair
df_Exrp_LR_in_celltype_pairs_upreg_DE = df_Exrp_LR_in_celltype_pairs_upreg_DE.loc[:, (df_Exrp_LR_in_celltype_pairs_upreg_DE != 0).any(axis=0)]
print('shape after filtering interactions')
print(df_Exrp_LR_in_celltype_pairs_upreg_DE.shape, '\n')

# keep celltypepairs with at least one interaction

df_Exrp_LR_in_celltype_pairs_upreg_DE = df_Exrp_LR_in_celltype_pairs_upreg_DE.loc[(df_Exrp_LR_in_celltype_pairs_upreg_DE != 0).any(axis=1),:]
print('shape after filtering cell type pairs')
print(df_Exrp_LR_in_celltype_pairs_upreg_DE.shape, '\n')

shape after filtering interactions
(125, 531) 

shape after filtering cell type pairs
(96, 531) 



In [85]:
# DOWNREG interactions

# keep interactions expressed in at least one celltype pair
df_Exrp_LR_in_celltype_pairs_downreg_DE = df_Exrp_LR_in_celltype_pairs_downreg_DE.loc[:, (df_Exrp_LR_in_celltype_pairs_downreg_DE != 0).any(axis=0)]
print('shape after filtering interactions')
print(df_Exrp_LR_in_celltype_pairs_downreg_DE.shape, '\n')

# keep celltypepairs with at least one interaction

df_Exrp_LR_in_celltype_pairs_downreg_DE = df_Exrp_LR_in_celltype_pairs_downreg_DE.loc[(df_Exrp_LR_in_celltype_pairs_downreg_DE != 0).any(axis=1),:]
print('shape after filtering cell type pairs')
print(df_Exrp_LR_in_celltype_pairs_downreg_DE.shape, '\n')

shape after filtering interactions
(113, 328) 

shape after filtering cell type pairs
(59, 328) 



In [86]:
np.unique(df_Exrp_LR_in_celltype_pairs_upreg_DE.values, return_counts=True)

(array([0., 1.]), array([48483,  2493]))

In [87]:
np.unique(df_Exrp_LR_in_celltype_pairs_downreg_DE.values, return_counts=True)

(array([0., 1.]), array([18398,   954]))

In [88]:
df_Exrp_LR_in_celltype_pairs_upreg_DE

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---B_cells_naive,B_cells_memory---B_cells_naive_activated,B_cells_memory---Macrophages,B_cells_memory---Monocytes_classical,B_cells_memory---Monocytes_intermediate,B_cells_memory---Monocytes_non-classical,B_cells_memory---NK_CD16_bright,B_cells_memory---NK_CD16_bright_activated,B_cells_memory---NK_CD56_bright_activated,...,NK_CD16_bright---NK_CD16_bright,T4_activated---T4_activated,T4_memory---T4_memory,T8_activated---T8_activated,TCM_CD8+---TCM_CD8+,TEM_CD8+---TEM_CD8+,T_gd---T_gd,T_regs---T_regs,cDC1---cDC1,iNKT_cells---iNKT_cells
PVR_CD96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_CD226,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_TIGIT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NOTCH2_DLL3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LGALS9_HAVCR2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CD52_SIGLEC10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
LTBR_LTB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CCR4_CCL17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
LAIR1_LILRB4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [89]:
df_Exrp_LR_in_celltype_pairs_downreg_DE

Unnamed: 0,B_cells_memory---B_cells_memory_activated,B_cells_memory---B_cells_naive_activated,B_cells_memory---Macrophages,B_cells_memory---Monocytes_non-classical,B_cells_memory---NK_CD16_bright,B_cells_memory---Plasma_cells,B_cells_memory---T4_activated,B_cells_memory---T8_activated,B_cells_memory---T_regs,B_cells_memory---cDC1,...,MAIT_cells---MAIT_cells,Macrophages---Macrophages,Monocytes_classical---Monocytes_classical,Monocytes_intermediate---Monocytes_intermediate,Monocytes_non-classical---Monocytes_non-classical,NK_CD16_bright---NK_CD16_bright,T4_activated---T4_activated,T8_activated---T8_activated,cDC1---cDC1,iNKT_cells---iNKT_cells
SIRPA_CD47,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
PLAUR_integrin_a4b1_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
CD40LG_integrin_a5b1_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TGFB1_TGFBR3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ICAM1_integrin_aMb2_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
C3_integrin_aMb2_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ICAM1_integrin_aXb2_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CXCR3_CXCL9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
DPP4_CXCL9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD8_receptor_LCK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Save results

In [90]:
save_path = '/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/'

In [91]:
# So DE is our output matrix
# Filter it accordingly for visualization
df_Exrp_LR_in_celltype_pairs_upreg_DE.to_csv(save_path + '20210324_cellphone_interactions_upreg_in_CVID_validation_cohort_no_logFC_cutoff.csv')
df_Exrp_LR_in_celltype_pairs_downreg_DE.to_csv(save_path + '20210324_cellphone_interactions_downreg_in_CVID_validation_cohort_no_logFC_cutoff.csv')

## Save results in a more readable format

Gene by gene breakdown with added DEG stats

### Upreg interactions

In [92]:
faulty_index_count = 0

vec2_append_upreg = {}

# row count
curr_count = 0

for interaction in df_Exrp_LR_in_celltype_pairs_upreg_DE.index:
    
    print(interaction, list(df_Exrp_LR_in_celltype_pairs_upreg_DE.index).index(interaction) + 1, 
          'out of', len(df_Exrp_LR_in_celltype_pairs_upreg_DE.index))
    # current row
    curr_table = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_upreg_DE.loc[interaction])
    curr_table = curr_table[curr_table[interaction] > 0]
    
    for celltype_pair in list(curr_table.index):
        
        # row by row
        vec2_append_upreg[str(curr_count)] = {}
        
        vec2_append_upreg[str(curr_count)]['interaction'] = interaction
        
        #print(celltype_pair)
        
        # getting genes, these are lists of length 1 for simple interactions and > 1 for complexes
        curr_partner_A_genes = Int2Gene[interaction]['partner_a']
        curr_partner_B_genes = Int2Gene[interaction]['partner_b']
        
        #print('curr partner A genes', curr_partner_A_genes, 'len:', len(curr_partner_A_genes))
        #print('curr partner B genes', curr_partner_B_genes, 'len:', len(curr_partner_B_genes))
        
        vec2_append_upreg[str(curr_count)]['partner_A_genes'] = curr_partner_A_genes
        vec2_append_upreg[str(curr_count)]['partner_B_genes'] = curr_partner_B_genes
        
        curr_celltype_A = celltype_pair.split('---')[0]
        curr_celltype_B = celltype_pair.split('---')[1]
        
        vec2_append_upreg[str(curr_count)]['celltype_A'] = curr_celltype_A
        vec2_append_upreg[str(curr_count)]['celltype_B'] = curr_celltype_B
        
        
        # are all partner_A genes DE in celltype_A and are all partner_B genes DE in celltype_B?
        # these DE table subsets have been filtered already according to cutoffs declared in the beginning of the notebook
        curr_celltype_A_DE_table_subset = DE_df_upreg[DE_df_upreg['cluster'] == curr_celltype_A]
        curr_celltype_A_DE_table_subset.set_index('Gene', inplace=True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_A_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_A])
        
        curr_celltype_B_DE_table_subset = DE_df_upreg[DE_df_upreg['cluster'] == curr_celltype_B]
        curr_celltype_B_DE_table_subset.set_index('Gene', inplace=True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_B_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_B])

        
        # if partner A is DE, add stats
        if all(elem in list(curr_celltype_A_DE_table_subset.index) for elem in curr_partner_A_genes):
            vec2_append_upreg[str(curr_count)]['is_partner_A_DE'] = True
            vec2_append_upreg[str(curr_count)]['logFC_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'logFC'])
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'adj.P.Val'])
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'percentExpr_cluster'])
            
        else: # if not DE, add 'NA'
            vec2_append_upreg[str(curr_count)]['is_partner_A_DE'] = False
            vec2_append_upreg[str(curr_count)]['logFC_gene_A'] = 'NA'
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_A'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_per_df_full.loc[curr_partner_A_genes,curr_celltype_A])
            
            
            
        # if partner B is DE, add stats
        if all(elem in list(curr_celltype_B_DE_table_subset.index) for elem in curr_partner_B_genes):
            vec2_append_upreg[str(curr_count)]['is_partner_B_DE'] = True
            vec2_append_upreg[str(curr_count)]['logFC_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'logFC'])
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'adj.P.Val'])
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'percentExpr_cluster'])
        else: # if not DE, add 'NA'
            vec2_append_upreg[str(curr_count)]['is_partner_B_DE'] = False
            vec2_append_upreg[str(curr_count)]['logFC_gene_B'] = 'NA'
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_B'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_per_df_full.loc[curr_partner_B_genes,curr_celltype_B])
            
        curr_count += 1
    
    

PVR_CD96 1 out of 96
PVR_CD226 2 out of 96
PVR_TIGIT 3 out of 96
NOTCH2_DLL3 4 out of 96
LGALS9_HAVCR2 5 out of 96
FN1_integrin_a4b1_complex 6 out of 96
SPP1_integrin_a4b1_complex 7 out of 96
PLAUR_integrin_a4b1_complex 8 out of 96
FN1_integrin_a4b7_complex 9 out of 96
FN1_integrin_a5b1_complex 10 out of 96
CD40LG_integrin_a5b1_complex 11 out of 96
FN1_integrin_aVb1_complex 12 out of 96
TGFB1_TGFBR3 13 out of 96
ICAM1_integrin_aMb2_complex 14 out of 96
ICAM1_integrin_aXb2_complex 15 out of 96
CXCR3_CXCL9 16 out of 96
DPP4_CXCL9 17 out of 96
CD8_receptor_LCK 18 out of 96
CD94:NKG2A_HLA-E 19 out of 96
CD94:NKG2E_HLA-E 20 out of 96
TNFRSF13B_TNFSF13B 21 out of 96
TNFRSF17_TNFSF13B 22 out of 96
TNFRSF13C_TNFSF13B 23 out of 96
CD74_APP 24 out of 96
ICAM1_SPN 25 out of 96
ICAM1_ITGAL 26 out of 96
ICAM1_integrin_aLb2_complex 27 out of 96
ICAM2_integrin_aLb2_complex 28 out of 96
ICAM3_integrin_aLb2_complex 29 out of 96
F11R_integrin_aLb2_complex 30 out of 96
FAS_FASLG 31 out of 96
NRP2_SEMA3C 

In [93]:
# outlining the final table format
df_output_upreg = pd.DataFrame(columns = ['interaction',
                                     'partner_A_genes',
                                     'partner_B_genes',
                                     'celltype_A',
                                     'celltype_B',
                                     'is_partner_A_DE',
                                     'logFC_gene_A',
                                     'adj_pval_gene_A',
                                     'percent_expr_gene_A',
                                     'is_partner_B_DE',
                                     'logFC_gene_B',
                                     'adj_pval_gene_B',
                                     'percent_expr_gene_B'],
                              index = list(vec2_append_upreg.keys())
                            )
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,,,,,,,,,,,,,
1,,,,,,,,,,,,,
2,,,,,,,,,,,,,
3,,,,,,,,,,,,,
4,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2488,,,,,,,,,,,,,
2489,,,,,,,,,,,,,
2490,,,,,,,,,,,,,
2491,,,,,,,,,,,,,


In [94]:
list(df_output_upreg.columns) == list(vec2_append_upreg['0'].keys())

True

In [95]:
len(vec2_append_upreg.keys())

2493

In [96]:
vec2_append_upreg['0'].keys()

dict_keys(['interaction', 'partner_A_genes', 'partner_B_genes', 'celltype_A', 'celltype_B', 'is_partner_A_DE', 'logFC_gene_A', 'adj_pval_gene_A', 'percent_expr_gene_A', 'is_partner_B_DE', 'logFC_gene_B', 'adj_pval_gene_B', 'percent_expr_gene_B'])

In [97]:
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,,,,,,,,,,,,,
1,,,,,,,,,,,,,
2,,,,,,,,,,,,,
3,,,,,,,,,,,,,
4,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2488,,,,,,,,,,,,,
2489,,,,,,,,,,,,,
2490,,,,,,,,,,,,,
2491,,,,,,,,,,,,,


In [98]:
%%time

for i in list(vec2_append_upreg.keys()):
    #print(i)
    curr_keys = list(vec2_append_upreg[i].keys())
    for col in curr_keys:
        df_output_upreg.loc[i,col] = vec2_append_upreg[i][col]

CPU times: user 2.72 s, sys: 0 ns, total: 2.72 s
Wall time: 2.73 s


In [99]:
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,PVR_CD96,[PVR],[CD96],cDC2,B_cells_memory_activated,False,,,[0.13367174280879865],True,[0.06231598938035701],[7.66358976792082e-09],[0.167733]
1,PVR_CD96,[PVR],[CD96],cDC2,T8_activated,False,,,[0.13367174280879865],True,[0.0792221910327131],[0.00022060087912304302],[0.228421]
2,PVR_CD226,[PVR],[CD226],cDC2,T4_activated,False,,,[0.13367174280879865],True,[0.0408807841632831],[3.33550702477858e-05],[0.18992]
3,PVR_CD226,[PVR],[CD226],cDC2,T8_activated,False,,,[0.13367174280879865],True,[0.064849917007213],[0.000191475106318186],[0.199869]
4,PVR_CD226,[PVR],[CD226],cDC2,iNKT_cells,False,,,[0.13367174280879865],True,[0.068239211621473],[0.00266465900736355],[0.146573]
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2488,CLEC2B_KLRF1,[CLEC2B],[KLRF1],B_cells_memory,NK_CD16_bright,True,[0.128420381461993],[0.00157140448624008],[0.195736],False,,,[0.2780506233471855]
2489,CLEC2B_KLRF1,[CLEC2B],[KLRF1],B_cells_memory_activated,NK_CD16_bright,True,[0.0628291342881411],[9.512104486131921e-05],[0.345571],False,,,[0.2780506233471855]
2490,CLEC2B_KLRF1,[CLEC2B],[KLRF1],Macrophages,NK_CD16_bright,True,[0.0541176629755394],[0.0424214084646784],[0.423061],False,,,[0.2780506233471855]
2491,CLEC2B_KLRF1,[CLEC2B],[KLRF1],Monocytes_non-classical,NK_CD16_bright,True,[0.11336603310709],[0.00792548425245136],[0.388298],False,,,[0.2780506233471855]


In [100]:
# getting rid of the square parentheses [] in all the values

cols2correct = ['partner_A_genes', 'partner_B_genes', 'logFC_gene_A', 'adj_pval_gene_A',
       'percent_expr_gene_A', 'logFC_gene_B',
       'adj_pval_gene_B', 'percent_expr_gene_B']

for row in list(df_output_upreg.index):
    #print('row', row)
    for col in cols2correct:
        #print('column', col)
        curr_value = df_output_upreg.loc[row, col] # with []
        #print(curr_value)
        if (curr_value != 'NA') & (len(curr_value) == 1): # ignoring NAs and lists of length > 1 - complex genes that is
        #if not isinstance(curr_value, list) & (curr_value != 'NA'): # ignoring NAs and lists of length > 1 - complex genes that is
            df_output_upreg.loc[row, col] = curr_value[0] # this just get the element - string if a gene, numerical value if it's a stat
            

In [101]:
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,PVR_CD96,PVR,CD96,cDC2,B_cells_memory_activated,False,,,0.133672,True,0.062316,7.66359e-09,0.167733
1,PVR_CD96,PVR,CD96,cDC2,T8_activated,False,,,0.133672,True,0.0792222,0.000220601,0.228421
2,PVR_CD226,PVR,CD226,cDC2,T4_activated,False,,,0.133672,True,0.0408808,3.33551e-05,0.18992
3,PVR_CD226,PVR,CD226,cDC2,T8_activated,False,,,0.133672,True,0.0648499,0.000191475,0.199869
4,PVR_CD226,PVR,CD226,cDC2,iNKT_cells,False,,,0.133672,True,0.0682392,0.00266466,0.146573
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2488,CLEC2B_KLRF1,CLEC2B,KLRF1,B_cells_memory,NK_CD16_bright,True,0.12842,0.0015714,0.195736,False,,,0.278051
2489,CLEC2B_KLRF1,CLEC2B,KLRF1,B_cells_memory_activated,NK_CD16_bright,True,0.0628291,9.5121e-05,0.345571,False,,,0.278051
2490,CLEC2B_KLRF1,CLEC2B,KLRF1,Macrophages,NK_CD16_bright,True,0.0541177,0.0424214,0.423061,False,,,0.278051
2491,CLEC2B_KLRF1,CLEC2B,KLRF1,Monocytes_non-classical,NK_CD16_bright,True,0.113366,0.00792548,0.388298,False,,,0.278051


In [102]:
df_output_upreg.columns

Index(['interaction', 'partner_A_genes', 'partner_B_genes', 'celltype_A',
       'celltype_B', 'is_partner_A_DE', 'logFC_gene_A', 'adj_pval_gene_A',
       'percent_expr_gene_A', 'is_partner_B_DE', 'logFC_gene_B',
       'adj_pval_gene_B', 'percent_expr_gene_B'],
      dtype='object')

### Splitting tables into 2 tables: simple interactions and complex interactions, latter being deconvoluted into pseudo-interactions for each subunit of a complex

In [103]:
# getting indices of complex interactions
complex_interaction_rows_upreg = []

for n_row in list(df_output_upreg.index):
    #print('row', n_row)
    
    curr_partner_A_genes = df_output_upreg.loc[n_row, 'partner_A_genes']
    curr_partner_B_genes = df_output_upreg.loc[n_row, 'partner_B_genes']
    
    if isinstance(curr_partner_A_genes, list) or isinstance(curr_partner_B_genes, list): # if partner A or B is a complex
        complex_interaction_rows_upreg.append(n_row)
        

In [104]:
len(complex_interaction_rows_upreg)

405

In [105]:
np.unique(complex_interaction_rows_upreg, return_counts=True)

(array(['100', '101', '102', '103', '104', '105', '106', '107', '108',
        '109', '120', '121', '122', '123', '124', '125', '126', '127',
        '1271', '1272', '1273', '1274', '1275', '1276', '1277', '1278',
        '1279', '128', '1280', '1281', '1282', '1283', '1284', '1285',
        '1286', '1287', '1288', '1289', '1290', '1291', '1292', '1293',
        '1294', '1295', '1296', '1297', '1298', '1299', '1300', '1301',
        '1302', '1303', '1304', '1305', '1306', '1307', '1308', '1309',
        '1310', '1311', '1312', '1313', '1314', '1315', '1316', '1317',
        '1318', '1319', '1320', '1321', '1322', '1323', '1324', '1325',
        '1326', '1327', '1328', '1329', '1330', '1331', '1332', '1333',
        '174', '175', '176', '177', '178', '179', '180', '181', '182',
        '183', '184', '1841', '1842', '1843', '1844', '1845', '1846',
        '1847', '1848', '1849', '185', '1850', '1851', '1852', '1853',
        '1854', '1855', '1856', '1857', '1858', '1859', '186', '1860',


In [106]:
# splitting simple and complex interactions into 2 separate tables
df_output_upreg_complex = df_output_upreg.loc[complex_interaction_rows_upreg,:]
df_output_upreg_simple = df_output_upreg.drop(complex_interaction_rows_upreg, axis=0)

In [107]:
df_output_upreg_simple.shape

(2088, 13)

In [108]:
df_output_upreg_complex.shape

(405, 13)

In [109]:
df_output_upreg.shape

(2493, 13)

In [110]:
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,PVR_CD96,PVR,CD96,cDC2,B_cells_memory_activated,False,,,0.133672,True,0.062316,7.66359e-09,0.167733
1,PVR_CD96,PVR,CD96,cDC2,T8_activated,False,,,0.133672,True,0.0792222,0.000220601,0.228421
2,PVR_CD226,PVR,CD226,cDC2,T4_activated,False,,,0.133672,True,0.0408808,3.33551e-05,0.18992
3,PVR_CD226,PVR,CD226,cDC2,T8_activated,False,,,0.133672,True,0.0648499,0.000191475,0.199869
4,PVR_CD226,PVR,CD226,cDC2,iNKT_cells,False,,,0.133672,True,0.0682392,0.00266466,0.146573
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2488,CLEC2B_KLRF1,CLEC2B,KLRF1,B_cells_memory,NK_CD16_bright,True,0.12842,0.0015714,0.195736,False,,,0.278051
2489,CLEC2B_KLRF1,CLEC2B,KLRF1,B_cells_memory_activated,NK_CD16_bright,True,0.0628291,9.5121e-05,0.345571,False,,,0.278051
2490,CLEC2B_KLRF1,CLEC2B,KLRF1,Macrophages,NK_CD16_bright,True,0.0541177,0.0424214,0.423061,False,,,0.278051
2491,CLEC2B_KLRF1,CLEC2B,KLRF1,Monocytes_non-classical,NK_CD16_bright,True,0.113366,0.00792548,0.388298,False,,,0.278051


In [111]:
df_output_upreg_complex

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
36,FN1_integrin_a4b1_complex,FN1,"[ITGB1, ITGA4]",Macrophages,T8_activated,False,,,0.133807,True,"[0.0859402254234798, 0.0947612832088071]","[2.11382087510341e-06, 4.96771168591477e-05]","[0.215622, 0.351493]"
37,FN1_integrin_a4b1_complex,FN1,"[ITGB1, ITGA4]",Macrophages,T_regs,False,,,0.133807,True,"[0.0463857472576402, 0.13518526418308002]","[0.0115822540066519, 1.11954652107609e-14]","[0.307562, 0.461464]"
38,FN1_integrin_a4b1_complex,FN1,"[ITGB1, ITGA4]",Macrophages,iNKT_cells,False,,,0.133807,True,"[0.0706479330863757, 0.135036011748152]","[0.00781835399692852, 8.104892316104151e-05]","[0.173387, 0.350403]"
39,FN1_integrin_a4b1_complex,FN1,"[ITGB1, ITGA4]",Monocytes_classical,T8_activated,False,,,0.13555,True,"[0.0859402254234798, 0.0947612832088071]","[2.11382087510341e-06, 4.96771168591477e-05]","[0.215622, 0.351493]"
40,FN1_integrin_a4b1_complex,FN1,"[ITGB1, ITGA4]",Monocytes_classical,T_regs,False,,,0.13555,True,"[0.0463857472576402, 0.13518526418308002]","[0.0115822540066519, 1.11954652107609e-14]","[0.307562, 0.461464]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1902,IFNG_Type_II_IFNR,IFNG,"[IFNGR1, IFNGR2]",iNKT_cells,Plasma_cells,True,1.65079,6.16403e-173,0.838105,False,,,"[0.3511450381679389, 0.5057251908396947]"
1903,IFNG_Type_II_IFNR,IFNG,"[IFNGR1, IFNGR2]",iNKT_cells,Precursor_cells,True,1.65079,6.16403e-173,0.838105,False,,,"[0.12135922330097088, 0.25485436893203883]"
1904,IFNG_Type_II_IFNR,IFNG,"[IFNGR1, IFNGR2]",iNKT_cells,cDC1,True,1.65079,6.16403e-173,0.838105,False,,,"[0.5653057767236999, 0.6173132305607318]"
1905,IFNG_Type_II_IFNR,IFNG,"[IFNGR1, IFNGR2]",iNKT_cells,cDC2,True,1.65079,6.16403e-173,0.838105,False,,,"[0.19627749576988154, 0.4263959390862944]"


In [114]:
# checking whether any complexes interact with other complexes and how many subunits each complex contains here

n_subunits_upreg = []

for n_row in list(df_output_upreg.index):
    #print('row', n_row)
    
    curr_partner_A_genes = df_output_upreg.loc[n_row, 'partner_A_genes']
    curr_partner_B_genes = df_output_upreg.loc[n_row, 'partner_B_genes']
    
    if isinstance(curr_partner_A_genes, list) and isinstance(curr_partner_B_genes, list): # if partner A AND B is a complex
        print('row', n_row)
        print('both are complexes')
        
    if isinstance(curr_partner_A_genes, list): # if partner A is a complex
        print('row', n_row)
        print('curr_partner_A_genes',curr_partner_A_genes)
        print('partner A is a complex, len is:', len(curr_partner_A_genes))
        n_subunits_upreg.append(len(curr_partner_A_genes))
        
    if isinstance(curr_partner_B_genes, list): # if partner B is a complex
        print('row', n_row)
        print('curr_partner_B_genes',curr_partner_B_genes)
        print('partner B is a complex, len is:', len(curr_partner_B_genes))
        n_subunits_upreg.append(len(curr_partner_B_genes))

row 36
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 37
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 38
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 39
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 40
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 41
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 42
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 43
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 44
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 45
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 46
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 47
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 48
curr_partner_B_genes 

In [115]:
np.unique(n_subunits_upreg, return_counts=True)

(array([2, 3]), array([377,  28]))

#### So 1 more scenario to ignore: there are no interactions of a complex with a complex
#### And max complex size is 3 subunits - in case of IL2R (indeed, there are subunits: alpha, beta and gamma)

In [116]:
df_output_upreg_complex

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
36,FN1_integrin_a4b1_complex,FN1,"[ITGB1, ITGA4]",Macrophages,T8_activated,False,,,0.133807,True,"[0.0859402254234798, 0.0947612832088071]","[2.11382087510341e-06, 4.96771168591477e-05]","[0.215622, 0.351493]"
37,FN1_integrin_a4b1_complex,FN1,"[ITGB1, ITGA4]",Macrophages,T_regs,False,,,0.133807,True,"[0.0463857472576402, 0.13518526418308002]","[0.0115822540066519, 1.11954652107609e-14]","[0.307562, 0.461464]"
38,FN1_integrin_a4b1_complex,FN1,"[ITGB1, ITGA4]",Macrophages,iNKT_cells,False,,,0.133807,True,"[0.0706479330863757, 0.135036011748152]","[0.00781835399692852, 8.104892316104151e-05]","[0.173387, 0.350403]"
39,FN1_integrin_a4b1_complex,FN1,"[ITGB1, ITGA4]",Monocytes_classical,T8_activated,False,,,0.13555,True,"[0.0859402254234798, 0.0947612832088071]","[2.11382087510341e-06, 4.96771168591477e-05]","[0.215622, 0.351493]"
40,FN1_integrin_a4b1_complex,FN1,"[ITGB1, ITGA4]",Monocytes_classical,T_regs,False,,,0.13555,True,"[0.0463857472576402, 0.13518526418308002]","[0.0115822540066519, 1.11954652107609e-14]","[0.307562, 0.461464]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1902,IFNG_Type_II_IFNR,IFNG,"[IFNGR1, IFNGR2]",iNKT_cells,Plasma_cells,True,1.65079,6.16403e-173,0.838105,False,,,"[0.3511450381679389, 0.5057251908396947]"
1903,IFNG_Type_II_IFNR,IFNG,"[IFNGR1, IFNGR2]",iNKT_cells,Precursor_cells,True,1.65079,6.16403e-173,0.838105,False,,,"[0.12135922330097088, 0.25485436893203883]"
1904,IFNG_Type_II_IFNR,IFNG,"[IFNGR1, IFNGR2]",iNKT_cells,cDC1,True,1.65079,6.16403e-173,0.838105,False,,,"[0.5653057767236999, 0.6173132305607318]"
1905,IFNG_Type_II_IFNR,IFNG,"[IFNGR1, IFNGR2]",iNKT_cells,cDC2,True,1.65079,6.16403e-173,0.838105,False,,,"[0.19627749576988154, 0.4263959390862944]"


In [117]:
# Duplicating the table and then choosing only 0th or 1st or 2nd values for the complexes
df_output_upreg_complex_member_1 = df_output_upreg_complex.copy()
df_output_upreg_complex_member_2 = df_output_upreg_complex.copy()
df_output_upreg_complex_member_3 = df_output_upreg_complex.copy()

In [118]:
# splitting complex interaction entries by subunits / members
# for any values in these tables, if it's a list, save the 0th for df_output_upreg_complex_member_1 and 1st for df_output_upreg_complex_member_2

# which rows contain interaction with a 3-subunit complex? to then subset df_output_downreg_complex_member_3
subunit_3_rows = []

for n_row in list(df_output_upreg_complex.index):
    #print('outside for loop, row', n_row)
    
    for col in df_output_upreg_complex.columns:
        #print('col', col)
        if isinstance(df_output_upreg_complex.loc[n_row, col], list):
            df_output_upreg_complex_member_1.loc[n_row, col] = df_output_upreg_complex.loc[n_row, col][0]
            df_output_upreg_complex_member_2.loc[n_row, col] = df_output_upreg_complex.loc[n_row, col][1]
            
            # additionally, if there are 3 subunits, separate into 3 entries
            if len(df_output_upreg_complex.loc[n_row, col]) == 3:
                df_output_upreg_complex_member_3.loc[n_row, col] = df_output_upreg_complex.loc[n_row, col][2]
                #print('3-subunit complex')
                #print('row', n_row)
                #print('adding to subunit_3_rows')
                subunit_3_rows.append(n_row)


In [119]:
# not sure why but it gets duplicated
np.unique(subunit_3_rows, return_counts=True)

(array(['1271', '1272', '1273', '1274', '1275', '1276', '1277', '1278',
        '1279', '1280', '1281', '1282', '1283', '1284', '1285', '1286',
        '1287', '1288', '1289', '1290', '1291', '1292', '1293', '1294',
        '1295', '1296', '1297', '1298'], dtype='<U4'),
 array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2]))

In [120]:
# getting rid of duplicates
subunit_3_rows = list(set(subunit_3_rows))

In [121]:
np.unique(subunit_3_rows, return_counts=True)

(array(['1271', '1272', '1273', '1274', '1275', '1276', '1277', '1278',
        '1279', '1280', '1281', '1282', '1283', '1284', '1285', '1286',
        '1287', '1288', '1289', '1290', '1291', '1292', '1293', '1294',
        '1295', '1296', '1297', '1298'], dtype='<U4'),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1]))

In [122]:
len(subunit_3_rows)

28

In [123]:
subunit_3_rows

['1275',
 '1292',
 '1294',
 '1295',
 '1293',
 '1288',
 '1290',
 '1284',
 '1298',
 '1289',
 '1273',
 '1277',
 '1274',
 '1282',
 '1291',
 '1280',
 '1297',
 '1287',
 '1285',
 '1276',
 '1279',
 '1278',
 '1272',
 '1283',
 '1271',
 '1296',
 '1281',
 '1286']

In [124]:
# in df_output_downreg_complex_member_3 entries of 3rd members are correct
# removing other entries of interactions with less than 3 subunit complexes
df_output_upreg_complex_member_3 = df_output_upreg_complex_member_3.loc[subunit_3_rows,:]

In [125]:
df_output_upreg_complex_member_3

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
1275,IL2_receptor_HA_IL2,IL2RB,IL2,MAIT_cells,T4_activated,False,,,0.177323,True,0.0742786,0.000436377,0.176771
1292,IL2_receptor_HA_IL2,IL2RB,IL2,T_gd,T4_activated,False,,,0.211247,True,0.0742786,0.000436377,0.176771
1294,IL2_receptor_HA_IL2,IL2RB,IL2,T_regs,T4_activated,False,,,0.426173,True,0.0742786,0.000436377,0.176771
1295,IL2_receptor_HA_IL2,IL2RB,IL2,T_regs,iNKT_cells,False,,,0.426173,True,0.114141,0.000206659,0.23246
1293,IL2_receptor_HA_IL2,IL2RB,IL2,T_gd,iNKT_cells,False,,,0.211247,True,0.114141,0.000206659,0.23246
1288,IL2_receptor_HA_IL2,IL2RB,IL2,T8_activated,T4_activated,False,,,0.338487,True,0.0742786,0.000436377,0.176771
1290,IL2_receptor_HA_IL2,IL2RB,IL2,TCM_CD8+,T4_activated,False,,,0.226496,True,0.0742786,0.000436377,0.176771
1284,IL2_receptor_HA_IL2,IL2RB,IL2,Precursor_cells,iNKT_cells,False,,,0.106796,True,0.114141,0.000206659,0.23246
1298,IL2_receptor_HA_IL2,IL2RB,IL2,iNKT_cells,iNKT_cells,False,,,0.313165,True,0.114141,0.000206659,0.23246
1289,IL2_receptor_HA_IL2,IL2RB,IL2,T8_activated,iNKT_cells,False,,,0.338487,True,0.114141,0.000206659,0.23246


In [126]:
# making indices uniques for concatenantion later
df_output_upreg_complex_member_1.index = [idx + '_member_1' for idx in df_output_upreg_complex_member_1.index]
df_output_upreg_complex_member_2.index = [idx + '_member_2' for idx in df_output_upreg_complex_member_2.index]
df_output_upreg_complex_member_3.index = [idx + '_member_3' for idx in df_output_upreg_complex_member_3.index]

In [127]:
# getting all indices
idx_concat = list(df_output_upreg_complex_member_1.index) + list(df_output_upreg_complex_member_2.index) + list(df_output_upreg_complex_member_3.index)

# sorting by original index number, so that the order is: member 1, member 2 and (where applicable) member 3
idx_concat.sort(key = lambda x: x.split('_')[0])
idx_concat

['100_member_1',
 '100_member_2',
 '101_member_1',
 '101_member_2',
 '102_member_1',
 '102_member_2',
 '103_member_1',
 '103_member_2',
 '104_member_1',
 '104_member_2',
 '105_member_1',
 '105_member_2',
 '106_member_1',
 '106_member_2',
 '107_member_1',
 '107_member_2',
 '108_member_1',
 '108_member_2',
 '109_member_1',
 '109_member_2',
 '120_member_1',
 '120_member_2',
 '121_member_1',
 '121_member_2',
 '122_member_1',
 '122_member_2',
 '123_member_1',
 '123_member_2',
 '124_member_1',
 '124_member_2',
 '125_member_1',
 '125_member_2',
 '126_member_1',
 '126_member_2',
 '127_member_1',
 '127_member_2',
 '1271_member_1',
 '1271_member_2',
 '1271_member_3',
 '1272_member_1',
 '1272_member_2',
 '1272_member_3',
 '1273_member_1',
 '1273_member_2',
 '1273_member_3',
 '1274_member_1',
 '1274_member_2',
 '1274_member_3',
 '1275_member_1',
 '1275_member_2',
 '1275_member_3',
 '1276_member_1',
 '1276_member_2',
 '1276_member_3',
 '1277_member_1',
 '1277_member_2',
 '1277_member_3',
 '1278_mem

In [128]:
df_output_upreg_complex_deconv = pd.concat([df_output_upreg_complex_member_1, df_output_upreg_complex_member_2, df_output_upreg_complex_member_3])

In [129]:
df_output_upreg_complex_member_1

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
36_member_1,FN1_integrin_a4b1_complex,FN1,ITGB1,Macrophages,T8_activated,False,,,0.133807,True,0.0859402,2.11382e-06,0.215622
37_member_1,FN1_integrin_a4b1_complex,FN1,ITGB1,Macrophages,T_regs,False,,,0.133807,True,0.0463857,0.0115823,0.307562
38_member_1,FN1_integrin_a4b1_complex,FN1,ITGB1,Macrophages,iNKT_cells,False,,,0.133807,True,0.0706479,0.00781835,0.173387
39_member_1,FN1_integrin_a4b1_complex,FN1,ITGB1,Monocytes_classical,T8_activated,False,,,0.13555,True,0.0859402,2.11382e-06,0.215622
40_member_1,FN1_integrin_a4b1_complex,FN1,ITGB1,Monocytes_classical,T_regs,False,,,0.13555,True,0.0463857,0.0115823,0.307562
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1902_member_1,IFNG_Type_II_IFNR,IFNG,IFNGR1,iNKT_cells,Plasma_cells,True,1.65079,6.16403e-173,0.838105,False,,,0.351145
1903_member_1,IFNG_Type_II_IFNR,IFNG,IFNGR1,iNKT_cells,Precursor_cells,True,1.65079,6.16403e-173,0.838105,False,,,0.121359
1904_member_1,IFNG_Type_II_IFNR,IFNG,IFNGR1,iNKT_cells,cDC1,True,1.65079,6.16403e-173,0.838105,False,,,0.565306
1905_member_1,IFNG_Type_II_IFNR,IFNG,IFNGR1,iNKT_cells,cDC2,True,1.65079,6.16403e-173,0.838105,False,,,0.196277


In [131]:
# organising entries so that member 1 entry is followed by member 2 entry and then member 3 entry if applicable
df_output_upreg_complex_deconv = df_output_upreg_complex_deconv.loc[idx_concat,:]

In [132]:
df_output_upreg_complex_deconv

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
100_member_1,FN1_integrin_a5b1_complex,FN1,ITGA5,Monocytes_intermediate,Macrophages,True,0.0592074,0.0209018,0.153815,False,,,0.188486
100_member_2,FN1_integrin_a5b1_complex,FN1,ITGB1,Monocytes_intermediate,Macrophages,True,0.0592074,0.0209018,0.153815,False,,,0.506835
101_member_1,FN1_integrin_a5b1_complex,FN1,ITGA5,Monocytes_intermediate,Monocytes_classical,True,0.0592074,0.0209018,0.153815,False,,,0.138473
101_member_2,FN1_integrin_a5b1_complex,FN1,ITGB1,Monocytes_intermediate,Monocytes_classical,True,0.0592074,0.0209018,0.153815,False,,,0.351662
102_member_1,FN1_integrin_a5b1_complex,FN1,ITGA5,Monocytes_intermediate,cDC1,True,0.0592074,0.0209018,0.153815,False,,,0.134508
...,...,...,...,...,...,...,...,...,...,...,...,...,...
97_member_2,FN1_integrin_a4b7_complex,FN1,ITGA4,Monocytes_intermediate,T8_activated,True,0.0592074,0.0209018,0.153815,True,0.0947613,4.96771e-05,0.351493
98_member_1,FN1_integrin_a4b7_complex,FN1,ITGB7,Monocytes_intermediate,TCM_CD8+,True,0.0592074,0.0209018,0.153815,False,,,0.102259
98_member_2,FN1_integrin_a4b7_complex,FN1,ITGA4,Monocytes_intermediate,TCM_CD8+,True,0.0592074,0.0209018,0.153815,False,,,0.337607
99_member_1,FN1_integrin_a4b7_complex,FN1,ITGB7,Monocytes_intermediate,T_regs,True,0.0592074,0.0209018,0.153815,False,,,0.159173


In [133]:
# saving these deconvoluted complex interactions
df_output_upreg_complex_deconv.to_csv(save_path + '20210324_cellphone_interactions_table_with_gene_stats_upreg_in_CVID_all_validation_cohort_no_logFC_cutoff_complexes_deconv_into_pseduinteractions.csv')

In [182]:
# saving the simple interactions table
df_output_upreg_simple.to_csv(save_path + '20210324_cellphone_interactions_table_with_gene_stats_upreg_in_CVID_all_validation_cohort_no_logFC_cutoff_simple_interactions.csv')

In [135]:
save_path

'/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/'

In [103]:
save_path

'/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/'

In [104]:
#df_output_upreg.to_csv(save_path + '20210318_cellphone_interactions_table_with_gene_stats_upreg_in_CVID_validation_cohort_no_logFC_cutoff.csv')

### Downreg interactions

In [136]:
faulty_index_count = 0

vec2_append_downreg = {}

# row count
curr_count = 0

for interaction in df_Exrp_LR_in_celltype_pairs_downreg_DE.index:
    
    print(interaction, list(df_Exrp_LR_in_celltype_pairs_downreg_DE.index).index(interaction) + 1, 
          'out of', len(df_Exrp_LR_in_celltype_pairs_downreg_DE.index))
    # current row
    curr_table = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_downreg_DE.loc[interaction])
    curr_table = curr_table[curr_table[interaction] > 0]
    
    for celltype_pair in list(curr_table.index):
        
        #print('row', curr_count)
        
        #print('celltype pair', celltype_pair)
        
        # row by row
        vec2_append_downreg[str(curr_count)] = {}
        
        vec2_append_downreg[str(curr_count)]['interaction'] = interaction
        
        #print(celltype_pair)
        
        # getting genes, these are lists of length 1 for simple interactions and > 1 for complexes
        curr_partner_A_genes = Int2Gene[interaction]['partner_a']
        curr_partner_B_genes = Int2Gene[interaction]['partner_b']
        
        #print('curr partner A genes', curr_partner_A_genes, 'len:', len(curr_partner_A_genes))
        #print('curr partner B genes', curr_partner_B_genes, 'len:', len(curr_partner_B_genes))
        
        vec2_append_downreg[str(curr_count)]['partner_A_genes'] = curr_partner_A_genes
        vec2_append_downreg[str(curr_count)]['partner_B_genes'] = curr_partner_B_genes
        
        curr_celltype_A = celltype_pair.split('---')[0]
        curr_celltype_B = celltype_pair.split('---')[1]
        
        #print('curr partner A cell type', curr_celltype_A)
        #print('curr partner B cell type', curr_celltype_B)
        
        vec2_append_downreg[str(curr_count)]['celltype_A'] = curr_celltype_A
        vec2_append_downreg[str(curr_count)]['celltype_B'] = curr_celltype_B
        
        
        # are all partner_A genes DE in celltype_A and are all partner_B genes DE in celltype_B?
        # these DE table subsets have been filtered already according to cutoffs declared in the beginning of the notebook
        curr_celltype_A_DE_table_subset = DE_df_downreg[DE_df_downreg['cluster'] == curr_celltype_A]
        curr_celltype_A_DE_table_subset.set_index('Gene', inplace=True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_A_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_A])
        
        curr_celltype_B_DE_table_subset = DE_df_downreg[DE_df_downreg['cluster'] == curr_celltype_B]
        curr_celltype_B_DE_table_subset.set_index('Gene', inplace=True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_B_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_B])

        # if partner A is DE, add stats
        if all(elem in list(curr_celltype_A_DE_table_subset.index) for elem in curr_partner_A_genes):
            #print('curr partner A is DE')
            vec2_append_downreg[str(curr_count)]['is_partner_A_DE'] = True
            vec2_append_downreg[str(curr_count)]['logFC_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'logFC'])
            vec2_append_downreg[str(curr_count)]['adj_pval_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'adj.P.Val'])
            vec2_append_downreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'percentExpr_cluster'])
            
        else: # if not DE, add 'NA'
            #print('curr partner A is NOT DE')
            vec2_append_downreg[str(curr_count)]['is_partner_A_DE'] = False
            vec2_append_downreg[str(curr_count)]['logFC_gene_A'] = 'NA'
            vec2_append_downreg[str(curr_count)]['adj_pval_gene_A'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_downreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_per_df_full.loc[curr_partner_A_genes,curr_celltype_A])
            
            
        # if partner B is DE, add stats
        if all(elem in list(curr_celltype_B_DE_table_subset.index) for elem in curr_partner_B_genes):
            #print('curr partner B is DE')
            vec2_append_downreg[str(curr_count)]['is_partner_B_DE'] = True
            vec2_append_downreg[str(curr_count)]['logFC_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'logFC'])
            vec2_append_downreg[str(curr_count)]['adj_pval_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'adj.P.Val'])
            vec2_append_downreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'percentExpr_cluster'])
        else: # if not DE, add 'NA'
            #print('curr partner B is NOT DE')
            vec2_append_downreg[str(curr_count)]['is_partner_B_DE'] = False
            vec2_append_downreg[str(curr_count)]['logFC_gene_B'] = 'NA'
            vec2_append_downreg[str(curr_count)]['adj_pval_gene_B'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_downreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_per_df_full.loc[curr_partner_B_genes,curr_celltype_B])
            
        curr_count += 1
    
    

SIRPA_CD47 1 out of 59
PLAUR_integrin_a4b1_complex 2 out of 59
CD40LG_integrin_a5b1_complex 3 out of 59
TGFB1_TGFBR3 4 out of 59
ICAM1_integrin_aMb2_complex 5 out of 59
C3_integrin_aMb2_complex 6 out of 59
ICAM1_integrin_aXb2_complex 7 out of 59
CXCR3_CXCL9 8 out of 59
DPP4_CXCL9 9 out of 59
CD8_receptor_LCK 10 out of 59
CD94:NKG2A_HLA-E 11 out of 59
CD94:NKG2E_HLA-E 12 out of 59
CD74_APP 13 out of 59
ICAM1_SPN 14 out of 59
ICAM1_ITGAL 15 out of 59
ICAM1_integrin_aLb2_complex 16 out of 59
NRP1_VEGFB 17 out of 59
GMCSFR_CSF2 18 out of 59
HLA-A_KIR3DL1 19 out of 59
HLA-F_KIR3DL1 20 out of 59
HLA-F_KIR3DL2 21 out of 59
HLA-B_KIR3DL2 22 out of 59
HLA-F_LILRB2 23 out of 59
HLA-F_LILRB1 24 out of 59
CCL4_CCR5 25 out of 59
CCL5_CCR5 26 out of 59
KLRB1_CLEC2D 27 out of 59
TNF_TNFRSF1A 28 out of 59
LTA_TNFRSF1A 29 out of 59
TNF_TNFRSF1B 30 out of 59
LTA_TNFRSF1B 31 out of 59
CD27_CD70 32 out of 59
CD40_CD40LG 33 out of 59
IL21_receptor_IL21 34 out of 59
CCL5_CCR1 35 out of 59
CCL5_CCR4 36 out o

In [137]:
# outlining the final table format
df_output_downreg = pd.DataFrame(columns = ['interaction',
                                     'partner_A_genes',
                                     'partner_B_genes',
                                     'celltype_A',
                                     'celltype_B',
                                     'is_partner_A_DE',
                                     'logFC_gene_A',
                                     'adj_pval_gene_A',
                                     'percent_expr_gene_A',
                                     'is_partner_B_DE',
                                     'logFC_gene_B',
                                     'adj_pval_gene_B',
                                     'percent_expr_gene_B'],
                              index = list(vec2_append_downreg.keys())
                            )
df_output_downreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,,,,,,,,,,,,,
1,,,,,,,,,,,,,
2,,,,,,,,,,,,,
3,,,,,,,,,,,,,
4,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
949,,,,,,,,,,,,,
950,,,,,,,,,,,,,
951,,,,,,,,,,,,,
952,,,,,,,,,,,,,


In [138]:
list(df_output_downreg.columns) == list(vec2_append_downreg['0'].keys())

True

In [139]:
len(vec2_append_downreg.keys())

954

In [140]:
vec2_append_downreg['0'].keys()

dict_keys(['interaction', 'partner_A_genes', 'partner_B_genes', 'celltype_A', 'celltype_B', 'is_partner_A_DE', 'logFC_gene_A', 'adj_pval_gene_A', 'percent_expr_gene_A', 'is_partner_B_DE', 'logFC_gene_B', 'adj_pval_gene_B', 'percent_expr_gene_B'])

In [141]:
df_output_downreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,,,,,,,,,,,,,
1,,,,,,,,,,,,,
2,,,,,,,,,,,,,
3,,,,,,,,,,,,,
4,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
949,,,,,,,,,,,,,
950,,,,,,,,,,,,,
951,,,,,,,,,,,,,
952,,,,,,,,,,,,,


In [142]:
%%time

for i in list(vec2_append_downreg.keys()):
    #print(i)
    curr_keys = list(vec2_append_downreg[i].keys())
    for col in curr_keys:
        df_output_downreg.loc[i,col] = vec2_append_downreg[i][col]

CPU times: user 1.03 s, sys: 3.74 ms, total: 1.03 s
Wall time: 1.03 s


In [143]:
df_output_downreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,SIRPA_CD47,[SIRPA],[CD47],Macrophages,cDC1,False,,,[0.5268138801261829],True,[-0.0631467073654104],[0.0414284823587023],[0.490082]
1,SIRPA_CD47,[SIRPA],[CD47],Monocytes_classical,cDC1,False,,,[0.3735842162952137],True,[-0.0631467073654104],[0.0414284823587023],[0.490082]
2,SIRPA_CD47,[SIRPA],[CD47],Monocytes_intermediate,cDC1,False,,,[0.22008089459909588],True,[-0.0631467073654104],[0.0414284823587023],[0.490082]
3,SIRPA_CD47,[SIRPA],[CD47],Monocytes_non-classical,cDC1,False,,,[0.3581452104942038],True,[-0.0631467073654104],[0.0414284823587023],[0.490082]
4,SIRPA_CD47,[SIRPA],[CD47],cDC1,cDC1,False,,,[0.4163984414704388],True,[-0.0631467073654104],[0.0414284823587023],[0.490082]
...,...,...,...,...,...,...,...,...,...,...,...,...,...
949,CLEC2B_KLRF1,[CLEC2B],[KLRF1],TMRA_CD8+,NK_CD16_bright,False,,,[0.3829639889196676],True,[-0.2136441721063],[9.14144874598934e-13],[0.229479]
950,CLEC2B_KLRF1,[CLEC2B],[KLRF1],T_gd,NK_CD16_bright,False,,,[0.3129055515501081],True,[-0.2136441721063],[9.14144874598934e-13],[0.229479]
951,CLEC2B_KLRF1,[CLEC2B],[KLRF1],cDC1,NK_CD16_bright,False,,,[0.2812129425715737],True,[-0.2136441721063],[9.14144874598934e-13],[0.229479]
952,CLEC2B_KLRF1,[CLEC2B],[KLRF1],iNKT_cells,NK_CD16_bright,False,,,[0.11337828246983675],True,[-0.2136441721063],[9.14144874598934e-13],[0.229479]


In [144]:
cols2correct = ['partner_A_genes', 'partner_B_genes', 'logFC_gene_A', 'adj_pval_gene_A',
       'percent_expr_gene_A', 'logFC_gene_B',
       'adj_pval_gene_B', 'percent_expr_gene_B']

for row in list(df_output_downreg.index):
    #print('row', row)
    for col in cols2correct:
        #print('column', col)
        curr_value = df_output_downreg.loc[row, col] # with []
        #print(curr_value)
        if (curr_value != 'NA') & (len(curr_value) == 1): # ignoring NAs and lists of length > 1 - complex genes that is
        #if not isinstance(curr_value, list) & (curr_value != 'NA'): # ignoring NAs and lists of length > 1 - complex genes that is
            df_output_downreg.loc[row, col] = curr_value[0] # this just get the element - string if a gene, numerical value if it's a stat
            

In [145]:
df_output_downreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,SIRPA_CD47,SIRPA,CD47,Macrophages,cDC1,False,,,0.526814,True,-0.0631467,0.0414285,0.490082
1,SIRPA_CD47,SIRPA,CD47,Monocytes_classical,cDC1,False,,,0.373584,True,-0.0631467,0.0414285,0.490082
2,SIRPA_CD47,SIRPA,CD47,Monocytes_intermediate,cDC1,False,,,0.220081,True,-0.0631467,0.0414285,0.490082
3,SIRPA_CD47,SIRPA,CD47,Monocytes_non-classical,cDC1,False,,,0.358145,True,-0.0631467,0.0414285,0.490082
4,SIRPA_CD47,SIRPA,CD47,cDC1,cDC1,False,,,0.416398,True,-0.0631467,0.0414285,0.490082
...,...,...,...,...,...,...,...,...,...,...,...,...,...
949,CLEC2B_KLRF1,CLEC2B,KLRF1,TMRA_CD8+,NK_CD16_bright,False,,,0.382964,True,-0.213644,9.14145e-13,0.229479
950,CLEC2B_KLRF1,CLEC2B,KLRF1,T_gd,NK_CD16_bright,False,,,0.312906,True,-0.213644,9.14145e-13,0.229479
951,CLEC2B_KLRF1,CLEC2B,KLRF1,cDC1,NK_CD16_bright,False,,,0.281213,True,-0.213644,9.14145e-13,0.229479
952,CLEC2B_KLRF1,CLEC2B,KLRF1,iNKT_cells,NK_CD16_bright,False,,,0.113378,True,-0.213644,9.14145e-13,0.229479


In [146]:
df_output_downreg.columns

Index(['interaction', 'partner_A_genes', 'partner_B_genes', 'celltype_A',
       'celltype_B', 'is_partner_A_DE', 'logFC_gene_A', 'adj_pval_gene_A',
       'percent_expr_gene_A', 'is_partner_B_DE', 'logFC_gene_B',
       'adj_pval_gene_B', 'percent_expr_gene_B'],
      dtype='object')

In [148]:
save_path

'/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/'

In [118]:
#df_output_downreg.to_csv(save_path + '20210318_cellphone_interactions_table_with_gene_stats_downreg_in_CVID_validation_cohort_no_logFC_cutoff.csv')

### Splitting tables into 2 tables: simple interactions and complex interactions, latter being deconvoluted into pseudo-interactions for each subunit of a complex

In [150]:
# getting indices of complex interactions
complex_interaction_rows_downreg = []

for n_row in list(df_output_downreg.index):
    #print('row', n_row)
    
    curr_partner_A_genes = df_output_downreg.loc[n_row, 'partner_A_genes']
    curr_partner_B_genes = df_output_downreg.loc[n_row, 'partner_B_genes']
    
    if isinstance(curr_partner_A_genes, list) or isinstance(curr_partner_B_genes, list): # if partner A or B is a complex
        complex_interaction_rows_downreg.append(n_row)
        

In [151]:
len(complex_interaction_rows_downreg)

190

In [152]:
np.unique(complex_interaction_rows_downreg, return_counts=True)

(array(['10', '100', '101', '102', '103', '104', '105', '11', '12', '13',
        '14', '15', '152', '153', '154', '155', '156', '157', '158', '159',
        '16', '160', '161', '162', '163', '164', '165', '166', '167',
        '168', '169', '17', '170', '171', '177', '178', '179', '18', '180',
        '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29',
        '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40',
        '41', '42', '420', '421', '422', '423', '424', '425', '426', '427',
        '428', '43', '44', '45', '46', '47', '48', '49', '5', '55', '56',
        '57', '58', '59', '6', '60', '61', '62', '626', '627', '628',
        '629', '63', '630', '631', '632', '633', '634', '635', '636',
        '637', '638', '639', '64', '640', '641', '642', '643', '644',
        '645', '646', '647', '648', '649', '65', '650', '651', '652',
        '653', '654', '655', '656', '657', '658', '66', '67', '7', '8',
        '879', '880', '881', '882', '883', '884', '885

In [154]:
# splitting simple and complex interactions into 2 separate tables
df_output_downreg_complex = df_output_downreg.loc[complex_interaction_rows_downreg,:]
df_output_downreg_simple = df_output_downreg.drop(complex_interaction_rows_downreg, axis=0)

In [155]:
df_output_downreg_simple.shape

(764, 13)

In [156]:
df_output_downreg_complex.shape

(190, 13)

In [157]:
df_output_downreg.shape

(954, 13)

In [158]:
df_output_downreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,SIRPA_CD47,SIRPA,CD47,Macrophages,cDC1,False,,,0.526814,True,-0.0631467,0.0414285,0.490082
1,SIRPA_CD47,SIRPA,CD47,Monocytes_classical,cDC1,False,,,0.373584,True,-0.0631467,0.0414285,0.490082
2,SIRPA_CD47,SIRPA,CD47,Monocytes_intermediate,cDC1,False,,,0.220081,True,-0.0631467,0.0414285,0.490082
3,SIRPA_CD47,SIRPA,CD47,Monocytes_non-classical,cDC1,False,,,0.358145,True,-0.0631467,0.0414285,0.490082
4,SIRPA_CD47,SIRPA,CD47,cDC1,cDC1,False,,,0.416398,True,-0.0631467,0.0414285,0.490082
...,...,...,...,...,...,...,...,...,...,...,...,...,...
949,CLEC2B_KLRF1,CLEC2B,KLRF1,TMRA_CD8+,NK_CD16_bright,False,,,0.382964,True,-0.213644,9.14145e-13,0.229479
950,CLEC2B_KLRF1,CLEC2B,KLRF1,T_gd,NK_CD16_bright,False,,,0.312906,True,-0.213644,9.14145e-13,0.229479
951,CLEC2B_KLRF1,CLEC2B,KLRF1,cDC1,NK_CD16_bright,False,,,0.281213,True,-0.213644,9.14145e-13,0.229479
952,CLEC2B_KLRF1,CLEC2B,KLRF1,iNKT_cells,NK_CD16_bright,False,,,0.113378,True,-0.213644,9.14145e-13,0.229479


In [159]:
df_output_downreg_complex

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
5,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Monocytes_classical,B_cells_memory,True,-0.0862297,0.00977878,0.561617,False,,,"[0.19960988296488946, 0.3446033810143043]"
6,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Monocytes_classical,B_cells_memory_activated,True,-0.0862297,0.00977878,0.561617,False,,,"[0.1988657844990548, 0.3986767485822306]"
7,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Monocytes_classical,MAIT_cells,True,-0.0862297,0.00977878,0.561617,False,,,"[0.12446717817561807, 0.2557544757033248]"
8,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Monocytes_classical,Macrophages,True,-0.0862297,0.00977878,0.561617,False,,,"[0.5068349106203995, 0.22476340694006308]"
9,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Monocytes_classical,Monocytes_intermediate,True,-0.0862297,0.00977878,0.561617,False,,,"[0.2307875327147276, 0.14537235308113253]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
926,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",cDC1,Monocytes_non-classical,True,-0.0631467,0.0414285,0.490082,False,,,"[0.10494203782794387, 0.9853569249542404]"
927,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",iNKT_cells,Macrophages,False,,,0.346877,True,"[-0.0425479309720542, -0.0842033434927592]","[0.0282921855917273, 0.00115214844296501]","[0.192878, 0.989826]"
928,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",iNKT_cells,cDC1,False,,,0.346877,True,"[-0.0370663419936127, -0.0884798100710285]","[0.0122364390604355, 1.50842891999608e-05]","[0.130653, 0.985189]"
929,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",Macrophages,Macrophages,False,,,0.519716,True,"[-0.0425479309720542, -0.0842033434927592]","[0.0282921855917273, 0.00115214844296501]","[0.192878, 0.989826]"


In [160]:
# checking whether any complexes interact with other complexes and how many subunits each complex contains here

n_subunits_downreg = []

for n_row in list(df_output_downreg.index):
    #print('row', n_row)
    
    curr_partner_A_genes = df_output_downreg.loc[n_row, 'partner_A_genes']
    curr_partner_B_genes = df_output_downreg.loc[n_row, 'partner_B_genes']
    
    if isinstance(curr_partner_A_genes, list) and isinstance(curr_partner_B_genes, list): # if partner A AND B is a complex
        print('row', n_row)
        print('both are complexes')
        
    if isinstance(curr_partner_A_genes, list): # if partner A is a complex
        print('row', n_row)
        print('curr_partner_A_genes',curr_partner_A_genes)
        print('partner A is a complex, len is:', len(curr_partner_A_genes))
        n_subunits_downreg.append(len(curr_partner_A_genes))
        
    if isinstance(curr_partner_B_genes, list): # if partner B is a complex
        print('row', n_row)
        print('curr_partner_B_genes',curr_partner_B_genes)
        print('partner B is a complex, len is:', len(curr_partner_B_genes))
        n_subunits_downreg.append(len(curr_partner_B_genes))

row 5
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 6
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 7
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 8
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 9
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 10
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 11
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 12
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 13
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 14
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 15
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 16
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 17
curr_partner_B_genes ['ITG

In [162]:
np.unique(n_subunits_downreg, return_counts=True)

(array([2]), array([190]))

#### So 1 more scenario to ignore: there are no interactions of a complex with a complex
#### And max complex size is 2 subunits here

In [163]:
df_output_downreg_complex

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
5,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Monocytes_classical,B_cells_memory,True,-0.0862297,0.00977878,0.561617,False,,,"[0.19960988296488946, 0.3446033810143043]"
6,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Monocytes_classical,B_cells_memory_activated,True,-0.0862297,0.00977878,0.561617,False,,,"[0.1988657844990548, 0.3986767485822306]"
7,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Monocytes_classical,MAIT_cells,True,-0.0862297,0.00977878,0.561617,False,,,"[0.12446717817561807, 0.2557544757033248]"
8,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Monocytes_classical,Macrophages,True,-0.0862297,0.00977878,0.561617,False,,,"[0.5068349106203995, 0.22476340694006308]"
9,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",Monocytes_classical,Monocytes_intermediate,True,-0.0862297,0.00977878,0.561617,False,,,"[0.2307875327147276, 0.14537235308113253]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
926,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",cDC1,Monocytes_non-classical,True,-0.0631467,0.0414285,0.490082,False,,,"[0.10494203782794387, 0.9853569249542404]"
927,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",iNKT_cells,Macrophages,False,,,0.346877,True,"[-0.0425479309720542, -0.0842033434927592]","[0.0282921855917273, 0.00115214844296501]","[0.192878, 0.989826]"
928,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",iNKT_cells,cDC1,False,,,0.346877,True,"[-0.0370663419936127, -0.0884798100710285]","[0.0122364390604355, 1.50842891999608e-05]","[0.130653, 0.985189]"
929,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",Macrophages,Macrophages,False,,,0.519716,True,"[-0.0425479309720542, -0.0842033434927592]","[0.0282921855917273, 0.00115214844296501]","[0.192878, 0.989826]"


In [164]:
# Duplicating the table and then choosing only 0th or 1st or 2nd values for the complexes
df_output_downreg_complex_member_1 = df_output_downreg_complex.copy()
df_output_downreg_complex_member_2 = df_output_downreg_complex.copy()
#df_output_downreg_complex_member_3 = df_output_downreg_complex.copy()

In [165]:
# splitting complex interaction entries by subunits / members
# for any values in these tables, if it's a list, save the 0th for df_output_upreg_complex_member_1 and 1st for df_output_upreg_complex_member_2

# which rows contain interaction with a 3-subunit complex? to then subset df_output_downreg_complex_member_3
#subunit_3_rows = []

for n_row in list(df_output_downreg_complex.index):
    #print('outside for loop, row', n_row)
    
    for col in df_output_downreg_complex.columns:
        #print('col', col)
        if isinstance(df_output_downreg_complex.loc[n_row, col], list):
            df_output_downreg_complex_member_1.loc[n_row, col] = df_output_downreg_complex.loc[n_row, col][0]
            df_output_downreg_complex_member_2.loc[n_row, col] = df_output_downreg_complex.loc[n_row, col][1]
            
            # additionally, if there are 3 subunits, separate into 3 entries
            #if len(df_output_upreg_complex.loc[n_row, col]) == 3:
                #df_output_upreg_complex_member_3.loc[n_row, col] = df_output_upreg_complex.loc[n_row, col][2]
                #print('3-subunit complex')
                #print('row', n_row)
                #print('adding to subunit_3_rows')
                #subunit_3_rows.append(n_row)


In [173]:
# making indices uniques for concatenantion later
df_output_downreg_complex_member_1.index = [idx + '_member_1' for idx in df_output_downreg_complex_member_1.index]
df_output_downreg_complex_member_2.index = [idx + '_member_2' for idx in df_output_downreg_complex_member_2.index]
#df_output_upreg_complex_member_3.index = [idx + '_member_3' for idx in df_output_upreg_complex_member_3.index]

In [174]:
# getting all indices
idx_concat = list(df_output_downreg_complex_member_1.index) + list(df_output_downreg_complex_member_2.index)

# sorting by original index number, so that the order is: member 1, member 2 and (where applicable) member 3
idx_concat.sort(key = lambda x: x.split('_')[0])
idx_concat

['10_member_1',
 '10_member_2',
 '100_member_1',
 '100_member_2',
 '101_member_1',
 '101_member_2',
 '102_member_1',
 '102_member_2',
 '103_member_1',
 '103_member_2',
 '104_member_1',
 '104_member_2',
 '105_member_1',
 '105_member_2',
 '11_member_1',
 '11_member_2',
 '12_member_1',
 '12_member_2',
 '13_member_1',
 '13_member_2',
 '14_member_1',
 '14_member_2',
 '15_member_1',
 '15_member_2',
 '152_member_1',
 '152_member_2',
 '153_member_1',
 '153_member_2',
 '154_member_1',
 '154_member_2',
 '155_member_1',
 '155_member_2',
 '156_member_1',
 '156_member_2',
 '157_member_1',
 '157_member_2',
 '158_member_1',
 '158_member_2',
 '159_member_1',
 '159_member_2',
 '16_member_1',
 '16_member_2',
 '160_member_1',
 '160_member_2',
 '161_member_1',
 '161_member_2',
 '162_member_1',
 '162_member_2',
 '163_member_1',
 '163_member_2',
 '164_member_1',
 '164_member_2',
 '165_member_1',
 '165_member_2',
 '166_member_1',
 '166_member_2',
 '167_member_1',
 '167_member_2',
 '168_member_1',
 '168_membe

In [175]:
df_output_downreg_complex_deconv = pd.concat([df_output_downreg_complex_member_1, df_output_downreg_complex_member_2])

In [176]:
df_output_downreg_complex_deconv

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
5_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Monocytes_classical,B_cells_memory,True,-0.0862297,0.00977878,0.561617,False,,,0.19961
6_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Monocytes_classical,B_cells_memory_activated,True,-0.0862297,0.00977878,0.561617,False,,,0.198866
7_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Monocytes_classical,MAIT_cells,True,-0.0862297,0.00977878,0.561617,False,,,0.124467
8_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Monocytes_classical,Macrophages,True,-0.0862297,0.00977878,0.561617,False,,,0.506835
9_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Monocytes_classical,Monocytes_intermediate,True,-0.0862297,0.00977878,0.561617,False,,,0.230788
...,...,...,...,...,...,...,...,...,...,...,...,...,...
926_member_2,CD47_SIRB1_complex,CD47,TYROBP,cDC1,Monocytes_non-classical,True,-0.0631467,0.0414285,0.490082,False,,,0.985357
927_member_2,CD47_SIRB1_complex,CD47,TYROBP,iNKT_cells,Macrophages,False,,,0.346877,True,-0.0842033,0.00115215,0.989826
928_member_2,CD47_SIRB1_complex,CD47,TYROBP,iNKT_cells,cDC1,False,,,0.346877,True,-0.0884798,1.50843e-05,0.985189
929_member_2,CD47_SIRB1_complex,CD47,TYROBP,Macrophages,Macrophages,False,,,0.519716,True,-0.0842033,0.00115215,0.989826


In [177]:
# organising entries so that member 1 entry is followed by member 2 entry and then member 3 entry if applicable
df_output_downreg_complex_deconv = df_output_downreg_complex_deconv.loc[idx_concat,:]

In [178]:
df_output_downreg_complex_deconv

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
10_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,Monocytes_classical,Monocytes_non-classical,True,-0.0862297,0.00977878,0.561617,False,,,0.344112
10_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,Monocytes_classical,Monocytes_non-classical,True,-0.0862297,0.00977878,0.561617,False,,,0.260525
100_member_1,CD94:NKG2A_HLA-E,KLRC1,HLA-E,NK_CD16_bright,iNKT_cells,False,,,0.177748,True,-0.28633,1.76266e-11,0.801008
100_member_2,CD94:NKG2A_HLA-E,KLRD1,HLA-E,NK_CD16_bright,iNKT_cells,False,,,0.566301,True,-0.28633,1.76266e-11,0.801008
101_member_1,CD94:NKG2A_HLA-E,KLRC1,HLA-E,NK_CD16_bright_activated,iNKT_cells,False,,,0.218349,True,-0.28633,1.76266e-11,0.801008
...,...,...,...,...,...,...,...,...,...,...,...,...,...
97_member_2,CD8_receptor_LCK,CD8A,LCK,TMRA_CD8+,B_cells_memory_activated,False,,,0.435596,True,-0.0536824,0.000110454,0.180195
98_member_1,CD8_receptor_LCK,CD8B,LCK,T_regs,B_cells_memory_activated,False,,,0.183597,True,-0.0536824,0.000110454,0.180195
98_member_2,CD8_receptor_LCK,CD8A,LCK,T_regs,B_cells_memory_activated,False,,,0.200666,True,-0.0536824,0.000110454,0.180195
99_member_1,CD8_receptor_LCK,CD8B,LCK,iNKT_cells,B_cells_memory_activated,False,,,0.189851,True,-0.0536824,0.000110454,0.180195


In [179]:
# saving these deconvoluted complex interactions
df_output_downreg_complex_deconv.to_csv(save_path + '20210324_cellphone_interactions_table_with_gene_stats_downreg_in_CVID_all_validation_cohort_no_logFC_cutoff_complexes_deconv_into_pseduinteractions.csv')

In [180]:
# saving the simple interactions table
df_output_downreg_simple.to_csv(save_path + '20210324_cellphone_interactions_table_with_gene_stats_downreg_in_CVID_all_validation_cohort_no_logFC_cutoff_simple_interactions.csv')

In [181]:
save_path

'/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/'

In [93]:
# manually checking some hits from previous version of analysis of twins
for interaction in list(df_Exrp_LR_in_celltype_pairs_upreg_DE.index):
    if 'CCL22' in interaction:
        print(interaction)
        curr_subset = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_upreg_DE.loc[interaction])
        curr_subset_nonzero_interacting_celltype_pairs = list(curr_subset[curr_subset[interaction] > 0].index)
        print('this interaction is detected in following celltype pairs:', curr_subset_nonzero_interacting_celltype_pairs)

CCL22_CCR4
this interaction is detected in following celltype pairs: ['B_cells_memory---iNKT_cells', 'B_cells_memory_activated---iNKT_cells', 'B_cells_naive---iNKT_cells', 'B_cells_naive_activated---iNKT_cells', 'MAIT_cells---iNKT_cells', 'Macrophages---iNKT_cells', 'Monocytes_classical---iNKT_cells', 'Monocytes_intermediate---iNKT_cells', 'Monocytes_non-classical---iNKT_cells', 'NK_CD16_bright---iNKT_cells', 'NK_CD16_bright_activated---iNKT_cells', 'NK_CD56_bright_activated---iNKT_cells', 'Plasma_cells---iNKT_cells', 'Precursor_cells---iNKT_cells', 'T4_activated---iNKT_cells', 'T4_memory---iNKT_cells', 'T4_naive---iNKT_cells', 'T8_activated---iNKT_cells', 'T8_naive---iNKT_cells', 'TCM_CD8+---iNKT_cells', 'TEM_CD8+---iNKT_cells', 'TMRA_CD8+---iNKT_cells', 'T_gd---iNKT_cells', 'T_regs---iNKT_cells', 'cDC1---iNKT_cells', 'cDC2---iNKT_cells', 'iNKT_cells---B_cells_memory', 'iNKT_cells---B_cells_memory_activated', 'iNKT_cells---B_cells_naive', 'iNKT_cells---B_cells_naive_activated', 'iNKT_

In [90]:
curr_subset = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_upreg_DE.loc['CCL22_CCR4'])
curr_subset_nonzero_interacting_celltype_pairs = list(curr_subset[curr_subset['CCL22_CCR4'] > 0].index)
curr_subset_nonzero_interacting_celltype_pairs

['B_cells_memory---iNKT_cells',
 'B_cells_memory_activated---iNKT_cells',
 'B_cells_naive---iNKT_cells',
 'B_cells_naive_activated---iNKT_cells',
 'MAIT_cells---iNKT_cells',
 'Macrophages---iNKT_cells',
 'Monocytes_classical---iNKT_cells',
 'Monocytes_intermediate---iNKT_cells',
 'Monocytes_non-classical---iNKT_cells',
 'NK_CD16_bright---iNKT_cells',
 'NK_CD16_bright_activated---iNKT_cells',
 'NK_CD56_bright_activated---iNKT_cells',
 'Plasma_cells---iNKT_cells',
 'Precursor_cells---iNKT_cells',
 'T4_activated---iNKT_cells',
 'T4_memory---iNKT_cells',
 'T4_naive---iNKT_cells',
 'T8_activated---iNKT_cells',
 'T8_naive---iNKT_cells',
 'TCM_CD8+---iNKT_cells',
 'TEM_CD8+---iNKT_cells',
 'TMRA_CD8+---iNKT_cells',
 'T_gd---iNKT_cells',
 'T_regs---iNKT_cells',
 'cDC1---iNKT_cells',
 'cDC2---iNKT_cells',
 'iNKT_cells---B_cells_memory',
 'iNKT_cells---B_cells_memory_activated',
 'iNKT_cells---B_cells_naive',
 'iNKT_cells---B_cells_naive_activated',
 'iNKT_cells---MAIT_cells',
 'iNKT_cells---Mac

In [89]:
df_Exrp_LR_in_celltype_pairs_upreg_DE.loc['CCL22_CCR4']

B_cells_memory---B_cells_memory_activated    0.0
B_cells_memory---B_cells_naive               0.0
B_cells_memory---B_cells_naive_activated     0.0
B_cells_memory---MAIT_cells                  0.0
B_cells_memory---Macrophages                 0.0
                                            ... 
TMRA_CD8+---TMRA_CD8+                        0.0
T_gd---T_gd                                  0.0
T_regs---T_regs                              0.0
cDC1---cDC1                                  0.0
iNKT_cells---iNKT_cells                      1.0
Name: CCL22_CCR4, Length: 619, dtype: float64

In [92]:
# manually checking some hits from previous version of analysis of twins
for interaction in list(df_Exrp_LR_in_celltype_pairs_downreg_DE.index):
    if 'CCL22' in interaction:
        print(interaction)
        curr_subset = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_downreg_DE.loc[interaction])
        curr_subset_nonzero_interacting_celltype_pairs = list(curr_subset[curr_subset[interaction] > 0].index)
        print('this interaction is detected in following celltype pairs:', curr_subset_nonzero_interacting_celltype_pairs)

CCL22_CCR4
this interaction is detected in following celltype pairs: ['B_cells_memory_activated---B_cells_memory', 'B_cells_memory_activated---B_cells_naive', 'B_cells_memory_activated---B_cells_naive_activated', 'B_cells_memory_activated---MAIT_cells', 'B_cells_memory_activated---Macrophages', 'B_cells_memory_activated---Monocytes_classical', 'B_cells_memory_activated---Monocytes_intermediate', 'B_cells_memory_activated---Monocytes_non-classical', 'B_cells_memory_activated---NK_CD16_bright', 'B_cells_memory_activated---NK_CD16_bright_activated', 'B_cells_memory_activated---NK_CD56_bright_activated', 'B_cells_memory_activated---Plasma_cells', 'B_cells_memory_activated---T4_activated', 'B_cells_memory_activated---T4_memory', 'B_cells_memory_activated---T4_naive', 'B_cells_memory_activated---T8_activated', 'B_cells_memory_activated---T8_naive', 'B_cells_memory_activated---TCM_CD8+', 'B_cells_memory_activated---TEM_CD8+', 'B_cells_memory_activated---TMRA_CD8+', 'B_cells_memory_activated--

In [97]:
# manually checking some hits from previous version of analysis of twins
for interaction in list(df_Exrp_LR_in_celltype_pairs_upreg_DE.index):
    if 'CXCL10_CXCR3' in interaction:
        print(interaction)
        curr_subset = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_upreg_DE.loc[interaction])
        curr_subset_nonzero_interacting_celltype_pairs = list(curr_subset[curr_subset[interaction] > 0].index)
        print('this interaction is detected in following celltype pairs:', curr_subset_nonzero_interacting_celltype_pairs)

CXCL10_CXCR3
this interaction is detected in following celltype pairs: ['B_cells_memory---iNKT_cells', 'B_cells_memory_activated---iNKT_cells', 'B_cells_naive---iNKT_cells', 'B_cells_naive_activated---iNKT_cells', 'MAIT_cells---iNKT_cells', 'Macrophages---iNKT_cells', 'Monocytes_classical---iNKT_cells', 'Monocytes_intermediate---iNKT_cells', 'Monocytes_non-classical---iNKT_cells', 'NK_CD16_bright---iNKT_cells', 'NK_CD16_bright_activated---iNKT_cells', 'NK_CD56_bright_activated---iNKT_cells', 'Plasma_cells---iNKT_cells', 'Precursor_cells---iNKT_cells', 'T4_activated---iNKT_cells', 'T4_memory---iNKT_cells', 'T4_naive---iNKT_cells', 'T8_activated---iNKT_cells', 'T8_naive---iNKT_cells', 'TCM_CD8+---iNKT_cells', 'TEM_CD8+---iNKT_cells', 'TMRA_CD8+---iNKT_cells', 'T_gd---iNKT_cells', 'T_regs---iNKT_cells', 'cDC1---iNKT_cells', 'cDC2---iNKT_cells', 'iNKT_cells---B_cells_memory', 'iNKT_cells---B_cells_memory_activated', 'iNKT_cells---B_cells_naive', 'iNKT_cells---B_cells_naive_activated', 'iNK

In [98]:
# manually checking some hits from previous version of analysis of twins
for interaction in list(df_Exrp_LR_in_celltype_pairs_downreg_DE.index):
    if 'CXCL10_CXCR3' in interaction:
        print(interaction)
        curr_subset = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_downreg_DE.loc[interaction])
        curr_subset_nonzero_interacting_celltype_pairs = list(curr_subset[curr_subset[interaction] > 0].index)
        print('this interaction is detected in following celltype pairs:', curr_subset_nonzero_interacting_celltype_pairs)

CXCL10_CXCR3
this interaction is detected in following celltype pairs: ['B_cells_memory---NK_CD16_bright', 'B_cells_memory---TMRA_CD8+', 'B_cells_memory_activated---NK_CD16_bright', 'B_cells_memory_activated---TMRA_CD8+', 'B_cells_naive---NK_CD16_bright', 'B_cells_naive---TMRA_CD8+', 'B_cells_naive_activated---NK_CD16_bright', 'B_cells_naive_activated---TMRA_CD8+', 'MAIT_cells---NK_CD16_bright', 'MAIT_cells---TMRA_CD8+', 'Macrophages---NK_CD16_bright', 'Macrophages---TMRA_CD8+', 'Monocytes_classical---NK_CD16_bright', 'Monocytes_classical---TMRA_CD8+', 'Monocytes_intermediate---NK_CD16_bright', 'Monocytes_intermediate---TMRA_CD8+', 'Monocytes_non-classical---NK_CD16_bright', 'Monocytes_non-classical---TMRA_CD8+', 'NK_CD16_bright---TMRA_CD8+', 'NK_CD16_bright_activated---NK_CD16_bright', 'NK_CD16_bright_activated---TMRA_CD8+', 'NK_CD56_bright_activated---NK_CD16_bright', 'NK_CD56_bright_activated---TMRA_CD8+', 'Plasma_cells---NK_CD16_bright', 'Plasma_cells---TMRA_CD8+', 'T4_activated---

### Checking some stuff

In [29]:
# reading the user curated database starting files to see what interactions haven't made it here

path = '/home/jovyan/notebooks/Vento_Lab/CVID/202009_new_analysis_revision/CITE_all_samples_analysis/CVID/scTranscriptomics_CITE/cellphonedb_analysis/'

# saving them into .tsv files now
interactions_curated = pd.read_csv(path + 'interactions_curated_subset_notLuz.tsv', sep='\t', index_col=0)
complexes_curated = pd.read_csv(path + 'complex_curated.tsv', sep='\t', index_col=0)

In [30]:
interactions_curated.columns

Index(['partner_a', 'partner_b', 'protein_name_a', 'protein_name_b',
       'annotation_strategy', 'source', 'is_ppi', 'reactome_complex',
       'reactome_reaction', 'reactome_pathway', 'complexPortal_complex',
       'curator', 'comments'],
      dtype='object')

In [31]:
np.unique(interactions_curated['curator'], return_counts=True)

(array(['JRodriguezUbreva', 'RVentoTormo'], dtype=object), array([   1, 1339]))

In [32]:
np.unique(interactions_curated['annotation_strategy'], return_counts=True)

(array(['curated'], dtype=object), array([1340]))

In [65]:
interactions_curated#[interactions_curated['partner_a'] == 'Q92478']

Unnamed: 0_level_0,partner_a,partner_b,protein_name_a,protein_name_b,annotation_strategy,source,is_ppi,reactome_complex,reactome_reaction,reactome_pathway,complexPortal_complex,curator,comments
id_cp_interaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
,Q9Y275,Q96RJ3,TN13B_HUMAN,TR13C_HUMAN,curated,uniprot;reactome,True,R-HSA-5676540,R-HSA-5676599,R-HSA-1280215,,JRodriguezUbreva,
CPI-CC0041E1D30,IL12,IL12_receptor,,,curated,uniprot,True,,,,,RVentoTormo,
CPI-CC0104F2A96,ACVR_1B2A_receptor,Activin_ligand_ab,,,curated,PMID:22710174;PMID:22991378,True,,,,,RVentoTormo,
CPI-CC045C36F28,ACVR_1A2A_receptor,Activin_ligand_ab,,,curated,less_common_binding;PMID:22710174;PMID:22991378_,True,,,,,RVentoTormo,
CPI-CC051643E98,IL23,IL23_receptor,,,curated,uniprot,True,,,,,RVentoTormo,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
,O14905,Q6FHJ7,WNT9B_HUMAN,SFRP4_HUMAN,curated,PMID:12775774,True,,,,,RVentoTormo,Inhibition WNT. Soluble proteins
,O14905,Q8N474,WNT9B_HUMAN,SFRP1_HUMAN,curated,PMID:12775774,True,,,,,RVentoTormo,Inhibition WNT. Soluble proteins
,O14905,Q92765,WNT9B_HUMAN,SFRP3_HUMAN,curated,PMID:12775774,True,,,,,RVentoTormo,Inhibition WNT. Soluble proteins
,O14905,Q96HF1,WNT9B_HUMAN,SFRP2_HUMAN,curated,PMID:12775774,True,,,,,RVentoTormo,Inhibition WNT. Soluble proteins


In [28]:
#for interaction in int_cpDB['interacting_pair']:
#    if 'IL' in interaction:
#        print(interaction)
#        print(int_cpDB[int_cpDB['interacting_pair'] == interaction])

In [68]:
interactions_curated.columns

Index(['partner_a', 'partner_b', 'protein_name_a', 'protein_name_b',
       'annotation_strategy', 'source', 'is_ppi', 'reactome_complex',
       'reactome_reaction', 'reactome_pathway', 'complexPortal_complex',
       'curator', 'comments'],
      dtype='object')

In [33]:
list(interactions_curated['partner_a'])[:10]

['Q9Y275',
 'IL12',
 'ACVR_1B2A_receptor',
 'ACVR_1A2A_receptor',
 'IL23',
 'ACVR_1B2B_receptor',
 'integrin_aMb2_complex',
 'ACVR_1C2A_receptor',
 'ACVR_1A2B_receptor',
 'IL27']

For example, IL12 and IL12_receptor interaction is in the initial table but is not in the final, going to see if it made it into the expr table

In [70]:
for compl in list(complexes_curated.index):
    if 'OSMR' in compl:
        print(compl)
        print(complexes_curated.loc[compl,:])

OSMR
uniprot_1                                                           Q99650
uniprot_2                                                           P40189
uniprot_3                                                              NaN
uniprot_4                                                              NaN
transmembrane                                                         True
peripheral                                                           False
secreted                                                             False
secreted_desc                                                          NaN
secreted_highlight                                                   False
receptor                                                              True
receptor_desc                                 Cytokine_receptor_IL6_family
integrin                                                             False
other                                                                False
other_desc          

In [24]:
# database generated from 1.3K odd interactions
database_file = '/home/jovyan/notebooks/Vento_Lab/CVID/202009_new_analysis_revision/CITE_all_samples_analysis/CVID/scTranscriptomics_CITE/cellphonedb_analysis/database_20210218/cellphonedb_user_2021-02-18-14_26.db'

import sqlite3

def importdb(file_path):
    conn = sqlite3.connect(file_path)
    c = conn.cursor()
    c.execute("SELECT name FROM sqlite_master WHERE type='table';")
    for table in c.fetchall():
        yield list(c.execute('SELECT * from ?;', (table[0],)))

In [26]:
database = importdb(database_file)

In [30]:
database

AttributeError: 'generator' object has no attribute 'keys'