In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import itertools

In [2]:
# define cutoff variables
filter_int_user_curated = True
per_cutoff = 0.1 # only consider genes expressed in >10% of cells
pval_cutoff = 0.05 # FDR

Load database and correct gene names

In [3]:
genes_cpDB = pd.read_csv('/home/jovyan/COVID/NB6_CellPhoneDB/hsa_uniprot.txt', sep = '\t')
genes_cpDB

Unnamed: 0,uniprot,Entry,gene_name
0,P01611,KVD12_HUMAN,IGKV1D-12
1,P01615,KVD28_HUMAN,IGKV2D-28
2,Q15334,L2GL1_HUMAN,LLGL1
3,Q6ZP29,LAAT1_HUMAN,PQLC2
4,Q9GZZ8,LACRT_HUMAN,LACRT
...,...,...,...
20311,Q9H900,ZWILC_HUMAN,ZWILCH
20312,P98169,ZXDB_HUMAN,ZXDB
20313,Q2QGD7,ZXDC_HUMAN,ZXDC
20314,Q15942,ZYX_HUMAN,ZYX


In [4]:
com_cpDB = pd.read_csv('/home/jovyan/COVID/NB6_CellPhoneDB/database_20210402/complex_generated.csv')
com_cpDB['complex_name'] = ['complex:' + complex_name for complex_name in com_cpDB['complex_name']]
com_cpDB

Unnamed: 0,complex_name,uniprot_1,uniprot_2,uniprot_3,uniprot_4,transmembrane,peripheral,secreted,secreted_desc,secreted_highlight,receptor,receptor_desc,integrin,other,other_desc,pdb_id,pdb_structure,stoichiometry,comments_complex
0,complex:contactin complex II,Q12860,Q92823,,,True,False,False,,False,False,,False,False,,,FALSE,,NRCAM bind in cis and in trans to contactin-1
1,complex:IL6 receptor,P08887,P40189,,,True,False,False,,False,True,Cytokine receptor IL6 family,False,False,,1p9m,binding,IL6;IL6;IL6R;IL6R;IL6ST;IL6ST,Signal activation necessitate an association w...
2,complex:AT8B4CC50B complex,Q8TF62,Q3MIR4,,,True,False,False,,False,False,,False,False,,,FALSE,,Interacts with beta subunits TMEM30A and TMEM30B
3,complex:KCNV1KCNB2 complex,Q6PIU1,Q92953,,,True,False,False,,False,False,,False,False,,,FALSE,,Has to be associated with another potassium ch...
4,complex:LRFN3LRFN5 complex,Q9BTN0,Q96NI6,,,True,False,False,,False,False,,False,False,,,FALSE,,"Can form heteromeric complexes with LRFN1, LRF..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
614,complex:FZD8_LRP6,O75581,Q9H461,,,True,False,False,,False,False,,False,False,,,False,,
615,complex:FZD9_LRP5,O75197,O00144,,,True,False,False,,False,False,,False,False,,,False,,
616,complex:FZD9_LRP6,O75581,O00144,,,True,False,False,,False,False,,False,False,,,False,,
617,complex:FZD10_LRP5,O75197,Q9ULW2,,,True,False,False,,False,False,,False,False,,,False,,


In [5]:
Com2Gene = {}
for complex_name in np.unique(com_cpDB['complex_name']):
    curr_complex_proteins = list(com_cpDB[(com_cpDB['complex_name'] == complex_name)].loc[:, ['uniprot_1', 'uniprot_2', 'uniprot_3', 'uniprot_4']].values.tolist())
    curr_complex_proteins = [item for sublist in curr_complex_proteins for item in sublist]
    curr_complex_proteins = [x for x in curr_complex_proteins if str(x) != 'nan']
    Com2Gene[complex_name] = list(genes_cpDB[genes_cpDB['uniprot'].isin(curr_complex_proteins)]['gene_name'])
list(Com2Gene.items())[:10]

[('complex:12oxoLeukotrieneB4_byPTGR1', ['PTGR1']),
 ('complex:17aHydroxyprogesterone_byCYP17A1', ['CYP17A1']),
 ('complex:22Hydroxycholesterol_byCYP11A1', ['CYP11A1']),
 ('complex:22Hydroxycholesterol_byCYP3A4', ['CYP3A4']),
 ('complex:2arachidonoylglycerol_byDAGLA', ['DAGLA']),
 ('complex:2arachidonoylglycerol_byDAGLB', ['DAGLB']),
 ('complex:5-alpha-Dihydroprogesterone_byDHRS9', ['DHRS9']),
 ('complex:5HT3C5HT3A complex', ['HTR3A', 'HTR3C']),
 ('complex:5HT3C5HT3A_complex', ['HTR3A', 'HTR3C']),
 ('complex:5HT3D receptor', ['HTR3A', 'HTR3D'])]

In [6]:
int_cpDB = pd.read_csv('/home/jovyan/COVID/NB6_CellPhoneDB/out_20210402/means.txt', sep = '\t')
int_cpDB = int_cpDB.loc[:, list(int_cpDB.columns)[:11]]
int_cpDB = int_cpDB[int_cpDB['annotation_strategy'] != 'curated']
if filter_int_user_curated:
    int_cpDB = int_cpDB[int_cpDB['annotation_strategy'] == 'user_curated']
int_cpDB

Unnamed: 0,id_cp_interaction,interacting_pair,partner_a,partner_b,gene_a,gene_b,secreted,receptor_a,receptor_b,annotation_strategy,is_integrin
1,CPI-CS0481C1F9A,FZD1_LRP5_WNT11,complex:FZD1_LRP5,simple:O96014,,WNT11,True,False,False,user_curated,False
2,CPI-CS0F29C6285,FZD1_LRP6_WNT11,complex:FZD1_LRP6,simple:O96014,,WNT11,True,False,False,user_curated,False
3,CPI-CS0372FC240,FZD2_LRP5_WNT11,complex:FZD2_LRP5,simple:O96014,,WNT11,True,False,False,user_curated,False
4,CPI-CS031A2034E,FZD2_LRP6_WNT11,complex:FZD2_LRP6,simple:O96014,,WNT11,True,False,False,user_curated,False
5,CPI-CS02643715E,FZD3_LRP5_WNT11,complex:FZD3_LRP5,simple:O96014,,WNT11,True,False,False,user_curated,False
...,...,...,...,...,...,...,...,...,...,...,...
1453,CPI-SC090068F7B,TSLP_TSLPR,simple:Q969D9,complex:TSLPR,TSLP,,True,False,True,user_curated,False
1454,CPI-SC047CEF2DD,CRLF2_TSLPR,simple:Q9HC73,complex:TSLPR,CRLF2,,True,True,True,user_curated,False
1455,CPI-SS04C672963,ESAM_ESAM,simple:Q96AP7,simple:Q96AP7,ESAM,ESAM,False,False,False,user_curated,False
1457,CPI-SC060C69786,NRTN_RET_receptor_2,simple:Q99748,complex:RET_receptor_2,NRTN,,True,False,True,user_curated,False


In [7]:
interactions_list = int_cpDB['interacting_pair'].tolist()
interactions_list

['FZD1_LRP5_WNT11',
 'FZD1_LRP6_WNT11',
 'FZD2_LRP5_WNT11',
 'FZD2_LRP6_WNT11',
 'FZD3_LRP5_WNT11',
 'FZD3_LRP6_WNT11',
 'FZD4_LRP5_WNT11',
 'FZD4_LRP6_WNT11',
 'FZD5_LRP5_WNT11',
 'FZD5_LRP6_WNT11',
 'FZD6_LRP5_WNT11',
 'FZD6_LRP6_WNT11',
 'FZD7_LRP5_WNT11',
 'FZD7_LRP6_WNT11',
 'FZD8_LRP5_WNT11',
 'FZD8_LRP6_WNT11',
 'FZD1_LRP5_WNT5B',
 'FZD1_LRP6_WNT5B',
 'FZD2_LRP5_WNT5B',
 'FZD2_LRP6_WNT5B',
 'FZD3_LRP5_WNT5B',
 'FZD3_LRP6_WNT5B',
 'FZD4_LRP5_WNT5B',
 'FZD4_LRP6_WNT5B',
 'FZD5_LRP5_WNT5B',
 'FZD5_LRP6_WNT5B',
 'FZD6_LRP5_WNT5B',
 'FZD6_LRP6_WNT5B',
 'FZD7_LRP5_WNT5B',
 'FZD7_LRP6_WNT5B',
 'FZD8_LRP5_WNT5B',
 'FZD8_LRP6_WNT5B',
 'SFRP5_WNT5B',
 'SFRP4_WNT5B',
 'FRZB_WNT5B',
 'SFRP2_WNT5B',
 'FZD1_LRP5_WNT7A',
 'FZD1_LRP6_WNT7A',
 'FZD2_LRP5_WNT7A',
 'FZD2_LRP6_WNT7A',
 'FZD3_LRP5_WNT7A',
 'FZD3_LRP6_WNT7A',
 'FZD4_LRP5_WNT7A',
 'FZD4_LRP6_WNT7A',
 'FZD5_LRP5_WNT7A',
 'FZD5_LRP6_WNT7A',
 'FZD6_LRP5_WNT7A',
 'FZD6_LRP6_WNT7A',
 'FZD7_LRP5_WNT7A',
 'FZD7_LRP6_WNT7A',
 'FZD8_LRP5_WNT7A

In [8]:
Int2Gene = {}
for i in int_cpDB.index:
    curr_df_row = int_cpDB.loc[i,:]
    if str(curr_df_row['gene_a']) == 'nan':
        partner_A = Com2Gene[curr_df_row['partner_a']]
    else:
        partner_A = [curr_df_row['gene_a']]

    if str(curr_df_row['gene_b']) == 'nan':
        partner_B = Com2Gene[curr_df_row['partner_b']]
    else:
        partner_B = [curr_df_row['gene_b']]
        
    interaction_id = curr_df_row['interacting_pair']
    Int2Gene[interaction_id] = {'partner_a': partner_A,
                                'partner_b': partner_B}
list(Int2Gene.items())[:10]

[('FZD1_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD1'], 'partner_b': ['WNT11']}),
 ('FZD1_LRP6_WNT11', {'partner_a': ['FZD1', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD2_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD2'], 'partner_b': ['WNT11']}),
 ('FZD2_LRP6_WNT11', {'partner_a': ['FZD2', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD3_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD3'], 'partner_b': ['WNT11']}),
 ('FZD3_LRP6_WNT11', {'partner_a': ['FZD3', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD4_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD4'], 'partner_b': ['WNT11']}),
 ('FZD4_LRP6_WNT11', {'partner_a': ['FZD4', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD5_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD5'], 'partner_b': ['WNT11']}),
 ('FZD5_LRP6_WNT11', {'partner_a': ['FZD5', 'LRP6'], 'partner_b': ['WNT11']})]

In [9]:
list(Int2Gene.items())

[('FZD1_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD1'], 'partner_b': ['WNT11']}),
 ('FZD1_LRP6_WNT11', {'partner_a': ['FZD1', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD2_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD2'], 'partner_b': ['WNT11']}),
 ('FZD2_LRP6_WNT11', {'partner_a': ['FZD2', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD3_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD3'], 'partner_b': ['WNT11']}),
 ('FZD3_LRP6_WNT11', {'partner_a': ['FZD3', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD4_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD4'], 'partner_b': ['WNT11']}),
 ('FZD4_LRP6_WNT11', {'partner_a': ['FZD4', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD5_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD5'], 'partner_b': ['WNT11']}),
 ('FZD5_LRP6_WNT11', {'partner_a': ['FZD5', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD6_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD6'], 'partner_b': ['WNT11']}),
 ('FZD6_LRP6_WNT11', {'partner_a': ['FZD6', 'LRP6'], 'partner_b': ['WNT11']}),
 ('FZD7_LRP5_WNT11', {'partner_a': ['LRP5', 'FZD7'],

In [10]:
# use to define cell type pairs
path_Exp = '/lustre/scratch117/cellgen/team292/ab55/20210402_PercentExpressed_for_cellphone.csv'
Per_df = pd.read_csv(path_Exp, index_col = 0)

genes_expr_per_cell_type = {} 
for ct in Per_df.columns:
    print(ct)
    curr_table = pd.DataFrame(Per_df.loc[:, ct])
    mask = (curr_table[ct] > per_cutoff)
    genes_expr_per_cell_type[ct] = list(curr_table[ct][mask].index)
Per_df

CD14 mono
CD16 mono
CD4 memory T
CD4 naïve T
CD8 memory T
CD8 naïve T
Exhausted B
HSC
Immature B
MAIT
Memory B
NK CD56(bright)
NK CD56(dim)
NKT
Naïve B
Neutrophil
Plasma B
Plasmablast
Platelets
Prolif NK
Prolif T
RBC
Treg
cDC
pDC
γδT


Unnamed: 0,CD14 mono,CD16 mono,CD4 memory T,CD4 naïve T,CD8 memory T,CD8 naïve T,Exhausted B,HSC,Immature B,MAIT,...,Plasma B,Plasmablast,Platelets,Prolif NK,Prolif T,RBC,Treg,cDC,pDC,γδT
AL627309.1,0.001057,0.001560,0.001221,0.001190,0.000482,0.001257,0.000000,0.011111,0.003922,0.000000,...,0.000000,0.002421,0.000693,0.000000,0.001179,0.000000,0.000000,0.001842,0.000000,0.00000
AL627309.3,0.000042,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000807,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000
AL627309.2,0.000803,0.001040,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000
AL669831.2,0.000085,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000
AL669831.5,0.013828,0.016121,0.008242,0.008963,0.006909,0.007541,0.015686,0.029630,0.007843,0.008969,...,0.033735,0.022599,0.003463,0.004425,0.024160,0.003839,0.019608,0.031308,0.013333,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AL354822.1,0.001184,0.002600,0.003053,0.003608,0.001928,0.002095,0.001961,0.003704,0.003922,0.004484,...,0.016867,0.001614,0.002078,0.013274,0.006482,0.000256,0.003268,0.003683,0.000000,0.00463
AC004556.1,0.034971,0.033281,0.002137,0.007996,0.004981,0.008379,0.021569,0.048148,0.003922,0.000000,...,0.081928,0.079096,0.002078,0.022124,0.030053,0.001792,0.009804,0.047882,0.000000,0.00463
AC233755.2,0.002453,0.000520,0.002747,0.002678,0.001607,0.001676,0.005882,0.000000,0.007843,0.000000,...,0.021687,0.020178,0.005540,0.004425,0.003536,0.003071,0.006536,0.001842,0.000000,0.00463
AC233755.1,0.009853,0.020281,0.011294,0.016625,0.011889,0.016757,0.013725,0.000000,0.058824,0.026906,...,0.091566,0.081517,0.010388,0.044248,0.004714,0.033786,0.022876,0.003683,0.093333,0.00463


Load DE tables

In [11]:
DE_RA = pd.read_csv('/home/jovyan/COVID/NB6_CellPhoneDB/DE_Tables/Joint_DE_table_RA_vs_rest_filtered.csv')
DE_RA

Unnamed: 0.1,Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_case,AveExpr_ctrl,percentExpr_case,percentExpr_ctrl,cluster
0,0,LGALS2,0.387839,0.000000e+00,0.000000e+00,0.652597,0.264758,0.569413,0.247351,CD14 mono
1,1,FOS,-0.543533,6.896551e-292,2.855862e-288,2.046344,2.589877,0.923695,0.951635,CD14 mono
2,2,CD63,-0.433710,1.118027e-281,3.858125e-278,0.885395,1.319104,0.736462,0.802894,CD14 mono
3,3,CLU,-0.278287,5.909354e-272,1.747903e-268,0.115825,0.394112,0.156219,0.362709,CD14 mono
4,4,HMOX1,-0.504810,1.691163e-228,3.501553e-225,0.600111,1.104921,0.558582,0.640481,CD14 mono
...,...,...,...,...,...,...,...,...,...,...
10298,26,CD2,-0.137560,5.562531e-04,1.200101e-02,0.725809,0.863369,0.557915,0.597362,NKT
10299,27,EPSTI1,-0.081279,6.638167e-04,1.383300e-02,0.144002,0.225281,0.138996,0.187742,NKT
10300,28,IRF7,-0.093833,7.389212e-04,1.510081e-02,0.222031,0.315864,0.206564,0.258340,NKT
10301,29,JPT1,-0.080550,1.461101e-03,2.600990e-02,0.208325,0.288875,0.200772,0.259891,NKT


In [12]:
DE_PS = pd.read_csv('/home/jovyan/COVID/NB6_CellPhoneDB/DE_Tables/Joint_DE_table_PS_vs_rest_filtered.csv')
DE_PS

Unnamed: 0.1,Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_case,AveExpr_ctrl,percentExpr_case,percentExpr_ctrl,cluster
0,0,MTSS1,0.894124,0.000000,0.000000,1.327510,0.433386,0.761057,0.407629,CD14 mono
1,1,PLIN2,1.020501,0.000000,0.000000,1.484773,0.464272,0.711932,0.457540,CD14 mono
2,2,SAT1,0.985907,0.000000,0.000000,3.495494,2.509587,0.989760,0.974216,CD14 mono
3,3,AREG,1.279460,0.000000,0.000000,1.920389,0.640930,0.797121,0.363513,CD14 mono
4,4,TPST1,0.543313,0.000000,0.000000,0.668993,0.125680,0.519887,0.157185,CD14 mono
...,...,...,...,...,...,...,...,...,...,...
10798,47,ISG20,0.185423,0.000259,0.008250,1.098238,0.912815,0.735849,0.649805,NKT
10799,48,CXCR4,-0.212548,0.000331,0.010089,1.628972,1.841520,0.864151,0.885863,NKT
10800,49,KLF6,-0.223692,0.000440,0.012882,1.109934,1.333626,0.660377,0.751621,NKT
10801,50,RPS3,0.087406,0.000928,0.023205,3.476708,3.389302,0.996226,1.000000,NKT


In [13]:
DE_MS = pd.read_csv('/home/jovyan/COVID/NB6_CellPhoneDB/DE_Tables/Joint_DE_table_MS_vs_rest_filtered.csv')
DE_MS

Unnamed: 0.1,Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_case,AveExpr_ctrl,percentExpr_case,percentExpr_ctrl,cluster
0,0,MNDA,1.146410,0.000000,0.000000,2.030917,0.884508,0.926096,0.671338,CD14 mono
1,1,GIMAP4,0.547464,0.000000,0.000000,0.657609,0.110145,0.580414,0.145095,CD14 mono
2,2,TNFSF10,0.560615,0.000000,0.000000,0.682024,0.121409,0.590317,0.156856,CD14 mono
3,3,GIMAP7,0.467760,0.000000,0.000000,0.551168,0.083408,0.534201,0.105469,CD14 mono
4,4,SAMHD1,0.737967,0.000000,0.000000,1.192707,0.454740,0.835687,0.460676,CD14 mono
...,...,...,...,...,...,...,...,...,...,...
15630,43,MT-ND3,-0.130767,0.001654,0.011188,1.623948,1.754714,0.880412,0.906203,NKT
15631,44,RPL37A,0.085446,0.002876,0.017507,2.589446,2.504000,0.995876,0.990166,NKT
15632,45,RPL29,0.070024,0.003278,0.019629,2.924436,2.854411,0.997938,0.997731,NKT
15633,46,RPS6,0.069916,0.005354,0.029036,2.907112,2.837196,1.000000,0.996218,NKT


In [14]:
DE_Control = pd.read_csv('/home/jovyan/COVID/NB6_CellPhoneDB/DE_Tables/Joint_DE_table_Control_vs_rest_filtered.csv')
DE_Control

Unnamed: 0.1,Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_case,AveExpr_ctrl,percentExpr_case,percentExpr_ctrl,cluster
0,0,MT-ND4L,0.699686,0.000000,0.000000,2.108854,1.409167,0.959475,0.892411,CD14 mono
1,1,MT-ND3,0.555052,0.000000,0.000000,2.405833,1.850780,0.981132,0.957673,CD14 mono
2,2,MT-ND4,0.502666,0.000000,0.000000,2.714005,2.211339,0.989171,0.978370,CD14 mono
3,3,MT-ATP6,0.498666,0.000000,0.000000,2.629306,2.130639,0.988078,0.979489,CD14 mono
4,4,IGLV2-23,-0.104248,0.000000,0.000000,0.002640,0.106888,0.003500,0.112996,CD14 mono
...,...,...,...,...,...,...,...,...,...,...
11210,32,MT-CO2,0.101612,0.000361,0.006406,3.343557,3.241945,0.999211,0.996289,NKT
11211,33,TYROBP,-0.173622,0.000786,0.012112,1.706277,1.879899,0.767350,0.897959,NKT
11212,34,TOMM7,0.120765,0.000966,0.014078,1.526897,1.406132,0.889590,0.851577,NKT
11213,35,MT-ND2,0.115930,0.002174,0.027052,1.677349,1.561419,0.914038,0.883117,NKT


In [15]:
DE_RA = DE_RA[DE_RA['percentExpr_case'] > per_cutoff]
DE_RA

Unnamed: 0.1,Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_case,AveExpr_ctrl,percentExpr_case,percentExpr_ctrl,cluster
0,0,LGALS2,0.387839,0.000000e+00,0.000000e+00,0.652597,0.264758,0.569413,0.247351,CD14 mono
1,1,FOS,-0.543533,6.896551e-292,2.855862e-288,2.046344,2.589877,0.923695,0.951635,CD14 mono
2,2,CD63,-0.433710,1.118027e-281,3.858125e-278,0.885395,1.319104,0.736462,0.802894,CD14 mono
3,3,CLU,-0.278287,5.909354e-272,1.747903e-268,0.115825,0.394112,0.156219,0.362709,CD14 mono
4,4,HMOX1,-0.504810,1.691163e-228,3.501553e-225,0.600111,1.104921,0.558582,0.640481,CD14 mono
...,...,...,...,...,...,...,...,...,...,...
10298,26,CD2,-0.137560,5.562531e-04,1.200101e-02,0.725809,0.863369,0.557915,0.597362,NKT
10299,27,EPSTI1,-0.081279,6.638167e-04,1.383300e-02,0.144002,0.225281,0.138996,0.187742,NKT
10300,28,IRF7,-0.093833,7.389212e-04,1.510081e-02,0.222031,0.315864,0.206564,0.258340,NKT
10301,29,JPT1,-0.080550,1.461101e-03,2.600990e-02,0.208325,0.288875,0.200772,0.259891,NKT


In [16]:
DE_PS = DE_PS[DE_PS['percentExpr_case'] > per_cutoff]
DE_PS

Unnamed: 0.1,Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_case,AveExpr_ctrl,percentExpr_case,percentExpr_ctrl,cluster
0,0,MTSS1,0.894124,0.000000,0.000000,1.327510,0.433386,0.761057,0.407629,CD14 mono
1,1,PLIN2,1.020501,0.000000,0.000000,1.484773,0.464272,0.711932,0.457540,CD14 mono
2,2,SAT1,0.985907,0.000000,0.000000,3.495494,2.509587,0.989760,0.974216,CD14 mono
3,3,AREG,1.279460,0.000000,0.000000,1.920389,0.640930,0.797121,0.363513,CD14 mono
4,4,TPST1,0.543313,0.000000,0.000000,0.668993,0.125680,0.519887,0.157185,CD14 mono
...,...,...,...,...,...,...,...,...,...,...
10798,47,ISG20,0.185423,0.000259,0.008250,1.098238,0.912815,0.735849,0.649805,NKT
10799,48,CXCR4,-0.212548,0.000331,0.010089,1.628972,1.841520,0.864151,0.885863,NKT
10800,49,KLF6,-0.223692,0.000440,0.012882,1.109934,1.333626,0.660377,0.751621,NKT
10801,50,RPS3,0.087406,0.000928,0.023205,3.476708,3.389302,0.996226,1.000000,NKT


In [17]:
DE_MS = DE_MS[DE_MS['percentExpr_case'] > per_cutoff]
DE_MS

Unnamed: 0.1,Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_case,AveExpr_ctrl,percentExpr_case,percentExpr_ctrl,cluster
0,0,MNDA,1.146410,0.000000,0.000000,2.030917,0.884508,0.926096,0.671338,CD14 mono
1,1,GIMAP4,0.547464,0.000000,0.000000,0.657609,0.110145,0.580414,0.145095,CD14 mono
2,2,TNFSF10,0.560615,0.000000,0.000000,0.682024,0.121409,0.590317,0.156856,CD14 mono
3,3,GIMAP7,0.467760,0.000000,0.000000,0.551168,0.083408,0.534201,0.105469,CD14 mono
4,4,SAMHD1,0.737967,0.000000,0.000000,1.192707,0.454740,0.835687,0.460676,CD14 mono
...,...,...,...,...,...,...,...,...,...,...
15630,43,MT-ND3,-0.130767,0.001654,0.011188,1.623948,1.754714,0.880412,0.906203,NKT
15631,44,RPL37A,0.085446,0.002876,0.017507,2.589446,2.504000,0.995876,0.990166,NKT
15632,45,RPL29,0.070024,0.003278,0.019629,2.924436,2.854411,0.997938,0.997731,NKT
15633,46,RPS6,0.069916,0.005354,0.029036,2.907112,2.837196,1.000000,0.996218,NKT


In [18]:
DE_Control = DE_Control[DE_Control['percentExpr_case'] > per_cutoff]
DE_Control

Unnamed: 0.1,Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_case,AveExpr_ctrl,percentExpr_case,percentExpr_ctrl,cluster
0,0,MT-ND4L,0.699686,0.000000,0.000000,2.108854,1.409167,0.959475,0.892411,CD14 mono
1,1,MT-ND3,0.555052,0.000000,0.000000,2.405833,1.850780,0.981132,0.957673,CD14 mono
2,2,MT-ND4,0.502666,0.000000,0.000000,2.714005,2.211339,0.989171,0.978370,CD14 mono
3,3,MT-ATP6,0.498666,0.000000,0.000000,2.629306,2.130639,0.988078,0.979489,CD14 mono
5,5,MT-CYB,0.518412,0.000000,0.000000,2.760122,2.241710,0.989171,0.979862,CD14 mono
...,...,...,...,...,...,...,...,...,...,...
11210,32,MT-CO2,0.101612,0.000361,0.006406,3.343557,3.241945,0.999211,0.996289,NKT
11211,33,TYROBP,-0.173622,0.000786,0.012112,1.706277,1.879899,0.767350,0.897959,NKT
11212,34,TOMM7,0.120765,0.000966,0.014078,1.526897,1.406132,0.889590,0.851577,NKT
11213,35,MT-ND2,0.115930,0.002174,0.027052,1.677349,1.561419,0.914038,0.883117,NKT


In [19]:
clusters_RA = list(np.unique(DE_RA['cluster']))
is_DE_RA = {}
for cluster in clusters_RA:
    is_DE_RA[cluster] = list(DE_RA[DE_RA['cluster'] == cluster]['Gene'])
for ct in list(is_DE_RA.keys()):
    print(ct)
    print(len(is_DE_RA[ct]), '\n')

CD14 mono
1019 

CD16 mono
179 

CD4 memory T
1109 

CD4 naïve T
1448 

CD8 memory T
1026 

CD8 naïve T
510 

NK CD56(bright)
271 

NK CD56(dim)
793 

NKT
29 

Naïve B
994 

cDC
160 



In [20]:
clusters_PS = list(np.unique(DE_PS['cluster']))
is_DE_PS = {}
for cluster in clusters_PS:
    is_DE_PS[cluster] = list(DE_PS[DE_PS['cluster'] == cluster]['Gene'])
for ct in list(is_DE_PS.keys()):
    print(ct)
    print(len(is_DE_PS[ct]), '\n')

CD14 mono
1423 

CD16 mono
243 

CD4 memory T
700 

CD4 naïve T
1275 

CD8 memory T
824 

CD8 naïve T
585 

NK CD56(bright)
205 

NK CD56(dim)
791 

NKT
45 

Naïve B
760 

cDC
144 



In [21]:
clusters_MS = list(np.unique(DE_MS['cluster']))
is_DE_MS = {}
for cluster in clusters_MS:
    is_DE_MS[cluster] = list(DE_MS[DE_MS['cluster'] == cluster]['Gene'])
for ct in list(is_DE_MS.keys()):
    print(ct)
    print(len(is_DE_MS[ct]), '\n')

CD14 mono
1790 

CD16 mono
308 

CD4 memory T
821 

CD4 naïve T
1800 

CD8 memory T
1298 

CD8 naïve T
1462 

NK CD56(bright)
554 

NK CD56(dim)
1671 

NKT
47 

Naïve B
1198 

cDC
380 



In [22]:
clusters_Control = list(np.unique(DE_Control['cluster']))
is_DE_Control = {}
for cluster in clusters_Control:
    is_DE_Control[cluster] = list(DE_Control[DE_Control['cluster'] == cluster]['Gene'])
for ct in list(is_DE_Control.keys()):
    print(ct)
    print(len(is_DE_Control[ct]), '\n')

CD14 mono
1278 

CD16 mono
228 

CD4 memory T
275 

CD4 naïve T
1527 

CD8 memory T
930 

CD8 naïve T
1198 

NK CD56(bright)
360 

NK CD56(dim)
1074 

NKT
36 

Naïve B
885 

cDC
144 



Define celltype pairs for interactions

In [23]:
list(genes_expr_per_cell_type.keys())

['CD14 mono',
 'CD16 mono',
 'CD4 memory T',
 'CD4 naïve T',
 'CD8 memory T',
 'CD8 naïve T',
 'Exhausted B',
 'HSC',
 'Immature B',
 'MAIT',
 'Memory B',
 'NK CD56(bright)',
 'NK CD56(dim)',
 'NKT',
 'Naïve B',
 'Neutrophil',
 'Plasma B',
 'Plasmablast',
 'Platelets',
 'Prolif NK',
 'Prolif T',
 'RBC',
 'Treg',
 'cDC',
 'pDC',
 'γδT']

In [24]:
# all possible interactions (including reverse and self-interactions) => 26*26 = 676
pairwise_cluster_combinations = list(itertools.permutations(list(genes_expr_per_cell_type.keys()), 2))
self_inter_combinations = [(ct, ct) for ct in list(genes_expr_per_cell_type.keys())]
pairwise_cluster_combinations = pairwise_cluster_combinations + self_inter_combinations
len(pairwise_cluster_combinations)

676

In [25]:
pairwise_cluster_combinations[:10]

[('CD14 mono', 'CD16 mono'),
 ('CD14 mono', 'CD4 memory T'),
 ('CD14 mono', 'CD4 naïve T'),
 ('CD14 mono', 'CD8 memory T'),
 ('CD14 mono', 'CD8 naïve T'),
 ('CD14 mono', 'Exhausted B'),
 ('CD14 mono', 'HSC'),
 ('CD14 mono', 'Immature B'),
 ('CD14 mono', 'MAIT'),
 ('CD14 mono', 'Memory B')]

In [26]:
# consider interactions where at least one cell type has DEG
pairwise_cluster_combinations_DE = [elem for elem in pairwise_cluster_combinations if elem[0] in list(is_DE_RA.keys())
                                    or elem[1] in list(is_DE_RA.keys())]
len(pairwise_cluster_combinations_DE)

451

In [27]:
cluster_combinations_labels_DE = [comb[0] + '---' + comb[1] for comb in pairwise_cluster_combinations_DE]
(cluster_combinations_labels_DE)[:10]

['CD14 mono---CD16 mono',
 'CD14 mono---CD4 memory T',
 'CD14 mono---CD4 naïve T',
 'CD14 mono---CD8 memory T',
 'CD14 mono---CD8 naïve T',
 'CD14 mono---Exhausted B',
 'CD14 mono---HSC',
 'CD14 mono---Immature B',
 'CD14 mono---MAIT',
 'CD14 mono---Memory B']

Retrieve interactions

In [28]:
len(Int2Gene.keys())

899

In [29]:
df_Exrp_LR_in_celltype_pairs = pd.DataFrame(index = list(Int2Gene.keys()),
                                            columns = cluster_combinations_labels_DE,
                                            data = np.zeros((len(list(Int2Gene.keys())), 
                                                             len(cluster_combinations_labels_DE))))
df_Exrp_LR_in_celltype_pairs

Unnamed: 0,CD14 mono---CD16 mono,CD14 mono---CD4 memory T,CD14 mono---CD4 naïve T,CD14 mono---CD8 memory T,CD14 mono---CD8 naïve T,CD14 mono---Exhausted B,CD14 mono---HSC,CD14 mono---Immature B,CD14 mono---MAIT,CD14 mono---Memory B,...,CD16 mono---CD16 mono,CD4 memory T---CD4 memory T,CD4 naïve T---CD4 naïve T,CD8 memory T---CD8 memory T,CD8 naïve T---CD8 naïve T,NK CD56(bright)---NK CD56(bright),NK CD56(dim)---NK CD56(dim),NKT---NKT,Naïve B---Naïve B,cDC---cDC
FZD1_LRP5_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD1_LRP6_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD2_LRP5_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD2_LRP6_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FZD3_LRP5_WNT11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TSLP_TSLPR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CRLF2_TSLPR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ESAM_ESAM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NRTN_RET_receptor_2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [30]:
%%time
for interaction in list(df_Exrp_LR_in_celltype_pairs.index):
    for ct_pair in list(df_Exrp_LR_in_celltype_pairs.columns):
        ct_A = ct_pair.split('---')[0]
        ct_B = ct_pair.split('---')[1]
        partner_A_genes = Int2Gene[interaction]['partner_a']
        partner_B_genes = Int2Gene[interaction]['partner_b']
        are_all_expressed = all(elem in genes_expr_per_cell_type[ct_A] for elem in partner_A_genes) & all(elem in genes_expr_per_cell_type[ct_B] for elem in partner_B_genes)
        if are_all_expressed:
            df_Exrp_LR_in_celltype_pairs.loc[interaction, ct_pair] = 1
np.unique(df_Exrp_LR_in_celltype_pairs.values, return_counts = True)

CPU times: user 53.1 s, sys: 5.18 ms, total: 53.1 s
Wall time: 53.1 s


(array([0., 1.]), array([398287,   7162]))

In [31]:
df_Exrp_LR_in_celltype_pairs = df_Exrp_LR_in_celltype_pairs.loc[:, (df_Exrp_LR_in_celltype_pairs != 0).any(axis=0)]
print(df_Exrp_LR_in_celltype_pairs.shape)

(899, 451)


In [32]:
df_Exrp_LR_in_celltype_pairs = df_Exrp_LR_in_celltype_pairs.loc[(df_Exrp_LR_in_celltype_pairs != 0).any(axis=1), :]
print(df_Exrp_LR_in_celltype_pairs.shape)

(131, 451)


In [33]:
np.unique(df_Exrp_LR_in_celltype_pairs.sum(axis=0), return_counts = True)

(array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
        14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
        27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39.]),
 array([ 1,  1,  2, 10, 13,  8, 19, 17, 27, 21, 32, 25, 22, 28, 18, 23, 12,
        21, 21, 11, 19, 18, 13,  5,  8,  3, 13,  4,  7,  8,  5,  2,  4,  1,
         4,  2,  1,  1,  1]))

In [34]:
np.sum(df_Exrp_LR_in_celltype_pairs.values) == np.unique(df_Exrp_LR_in_celltype_pairs.values, return_counts = True)

array([[False, False],
       [False,  True]])

In [35]:
df_Exrp_LR_in_celltype_pairs

Unnamed: 0,CD14 mono---CD16 mono,CD14 mono---CD4 memory T,CD14 mono---CD4 naïve T,CD14 mono---CD8 memory T,CD14 mono---CD8 naïve T,CD14 mono---Exhausted B,CD14 mono---HSC,CD14 mono---Immature B,CD14 mono---MAIT,CD14 mono---Memory B,...,CD16 mono---CD16 mono,CD4 memory T---CD4 memory T,CD4 naïve T---CD4 naïve T,CD8 memory T---CD8 memory T,CD8 naïve T---CD8 naïve T,NK CD56(bright)---NK CD56(bright),NK CD56(dim)---NK CD56(dim),NKT---NKT,Naïve B---Naïve B,cDC---cDC
SIRPA_CD47,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
LGALS9_HAVCR2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0
CRTAM_CADM1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TIGIT_NECTIN2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD226_NECTIN2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LILRA4_BST2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRPG,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRB1_complex,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
LAIR1_LILRB4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [36]:
df_Exrp_LR_in_celltype_pairs_DE = pd.DataFrame(index = list(df_Exrp_LR_in_celltype_pairs.index),
                                               columns = list(df_Exrp_LR_in_celltype_pairs.columns),
                                               data = np.zeros((len(list(df_Exrp_LR_in_celltype_pairs.index)), 
                                                                len(list(df_Exrp_LR_in_celltype_pairs.columns)))))
df_Exrp_LR_in_celltype_pairs_DE

Unnamed: 0,CD14 mono---CD16 mono,CD14 mono---CD4 memory T,CD14 mono---CD4 naïve T,CD14 mono---CD8 memory T,CD14 mono---CD8 naïve T,CD14 mono---Exhausted B,CD14 mono---HSC,CD14 mono---Immature B,CD14 mono---MAIT,CD14 mono---Memory B,...,CD16 mono---CD16 mono,CD4 memory T---CD4 memory T,CD4 naïve T---CD4 naïve T,CD8 memory T---CD8 memory T,CD8 naïve T---CD8 naïve T,NK CD56(bright)---NK CD56(bright),NK CD56(dim)---NK CD56(dim),NKT---NKT,Naïve B---Naïve B,cDC---cDC
SIRPA_CD47,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LGALS9_HAVCR2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CRTAM_CADM1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TIGIT_NECTIN2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD226_NECTIN2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LILRA4_BST2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRPG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRB1_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LAIR1_LILRB4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [37]:
df_Exrp_LR_in_celltype_pairs_DE_RA = df_Exrp_LR_in_celltype_pairs_DE.copy()
df_Exrp_LR_in_celltype_pairs_DE_PS = df_Exrp_LR_in_celltype_pairs_DE.copy()
df_Exrp_LR_in_celltype_pairs_DE_MS = df_Exrp_LR_in_celltype_pairs_DE.copy()
df_Exrp_LR_in_celltype_pairs_DE_Control = df_Exrp_LR_in_celltype_pairs_DE.copy()

In [38]:
all_ct = list(genes_expr_per_cell_type.keys())
ct_with_upreg_DE = list(is_DE_RA.keys())
ct_with_no_upreg_DE_genes = set(all_ct) - set(ct_with_upreg_DE)
ct_with_no_upreg_DE_genes

{'Exhausted B',
 'HSC',
 'Immature B',
 'MAIT',
 'Memory B',
 'Neutrophil',
 'Plasma B',
 'Plasmablast',
 'Platelets',
 'Prolif NK',
 'Prolif T',
 'RBC',
 'Treg',
 'pDC',
 'γδT'}

RA

In [39]:
for interaction in list(df_Exrp_LR_in_celltype_pairs_DE_RA.index):
    for ct_pair in list(df_Exrp_LR_in_celltype_pairs_DE_RA.columns):
        ct_A = ct_pair.split('---')[0]
        ct_B = ct_pair.split('---')[1]
        partner_A_genes = Int2Gene[interaction]['partner_a']
        partner_B_genes = Int2Gene[interaction]['partner_b']

        # so cell type Exhausted B will throw an error here because no DE analysis, so needs a special if
        if ct_A in ct_with_no_upreg_DE_genes:
            # if ct_A doesn't have upreg DE, we only care about if partner_B_genes are upreg in this case
            are_any_DE = all(elem in is_DE_RA[ct_B] for elem in partner_B_genes)
        elif ct_B in ct_with_no_upreg_DE_genes:
            # if ct_B doesn't have upreg DE, we only care about if partner_A_genes are upreg in this case
            are_any_DE = all(elem in is_DE_RA[ct_A] for elem in partner_A_genes)
        else:
            # if both partners have DE genes, proceed as normal
            # are partner_A genes DE in celltype_A OR are partner_B genes DE in celltype_B?
            are_any_DE = all(elem in is_DE_RA[ct_A] for elem in partner_A_genes) | all(elem in is_DE_RA[ct_B] for elem in partner_B_genes)
      
        if are_any_DE & (df_Exrp_LR_in_celltype_pairs.loc[interaction, ct_pair] == 1):
            df_Exrp_LR_in_celltype_pairs_DE_RA.loc[interaction, ct_pair] = 1

df_Exrp_LR_in_celltype_pairs_DE_RA

Unnamed: 0,CD14 mono---CD16 mono,CD14 mono---CD4 memory T,CD14 mono---CD4 naïve T,CD14 mono---CD8 memory T,CD14 mono---CD8 naïve T,CD14 mono---Exhausted B,CD14 mono---HSC,CD14 mono---Immature B,CD14 mono---MAIT,CD14 mono---Memory B,...,CD16 mono---CD16 mono,CD4 memory T---CD4 memory T,CD4 naïve T---CD4 naïve T,CD8 memory T---CD8 memory T,CD8 naïve T---CD8 naïve T,NK CD56(bright)---NK CD56(bright),NK CD56(dim)---NK CD56(dim),NKT---NKT,Naïve B---Naïve B,cDC---cDC
SIRPA_CD47,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LGALS9_HAVCR2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
CRTAM_CADM1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TIGIT_NECTIN2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD226_NECTIN2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LILRA4_BST2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRPG,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRB1_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LAIR1_LILRB4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [40]:
df_Exrp_LR_in_celltype_pairs_DE_RA = df_Exrp_LR_in_celltype_pairs_DE_RA.loc[:, (df_Exrp_LR_in_celltype_pairs_DE_RA != 0).any(axis=0)]
print('shape after filtering interactions')
print(df_Exrp_LR_in_celltype_pairs_DE_RA.shape, '\n')

df_Exrp_LR_in_celltype_pairs_DE_RA = df_Exrp_LR_in_celltype_pairs_DE_RA.loc[(df_Exrp_LR_in_celltype_pairs_DE_RA != 0).any(axis=1),:]
print('shape after filtering cell type pairs')
print(df_Exrp_LR_in_celltype_pairs_DE_RA.shape, '\n')

shape after filtering interactions
(131, 407) 

shape after filtering cell type pairs
(89, 407) 



In [41]:
df_Exrp_LR_in_celltype_pairs_DE_RA.to_csv('/lustre/scratch117/cellgen/team292/ab55/20210429_cellphone_interactions_DE_RA.csv')
df_Exrp_LR_in_celltype_pairs_DE_RA

Unnamed: 0,CD14 mono---CD16 mono,CD14 mono---CD4 memory T,CD14 mono---CD4 naïve T,CD14 mono---CD8 memory T,CD14 mono---CD8 naïve T,CD14 mono---Exhausted B,CD14 mono---HSC,CD14 mono---Immature B,CD14 mono---MAIT,CD14 mono---Memory B,...,CD16 mono---CD16 mono,CD4 memory T---CD4 memory T,CD4 naïve T---CD4 naïve T,CD8 memory T---CD8 memory T,CD8 naïve T---CD8 naïve T,NK CD56(bright)---NK CD56(bright),NK CD56(dim)---NK CD56(dim),NKT---NKT,Naïve B---Naïve B,cDC---cDC
LGALS9_HAVCR2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
CD226_NECTIN2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
COL9A3_integrin_a1b1_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PLAUR_integrin_a4b1_complex,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TGFB1_TGFBR3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LTBR_LTB,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LILRA4_BST2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRPG,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
LAIR1_LILRB4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [42]:
np.unique(df_Exrp_LR_in_celltype_pairs_DE_RA, return_counts = True)

(array([0., 1.]), array([33940,  2283]))

PS

In [43]:
for interaction in list(df_Exrp_LR_in_celltype_pairs_DE_PS.index):
    for ct_pair in list(df_Exrp_LR_in_celltype_pairs_DE_PS.columns):
        ct_A = ct_pair.split('---')[0]
        ct_B = ct_pair.split('---')[1]
        partner_A_genes = Int2Gene[interaction]['partner_a']
        partner_B_genes = Int2Gene[interaction]['partner_b']
        
        # so cell type Exhausted B will throw an error here because no DE analysis, so needs a special if
        if ct_A in ct_with_no_upreg_DE_genes:
            # if ct_A doesn't have upreg DE, we only care about if partner_B_genes are upreg in this case
            are_any_DE = all(elem in is_DE_PS[ct_B] for elem in partner_B_genes)
        elif ct_B in ct_with_no_upreg_DE_genes:
            # if ct_B doesn't have upreg DE, we only care about if partner_A_genes are upreg in this case
            are_any_DE = all(elem in is_DE_PS[ct_A] for elem in partner_A_genes)
        else:
            # if both partners have DE genes, proceed as normal
            # are partner_A genes DE in celltype_A OR are partner_B genes DE in celltype_B?
            are_any_DE = all(elem in is_DE_PS[ct_A] for elem in partner_A_genes) | all(elem in is_DE_PS[ct_B] for elem in partner_B_genes)
      
        if are_any_DE & (df_Exrp_LR_in_celltype_pairs.loc[interaction, ct_pair] == 1):
            df_Exrp_LR_in_celltype_pairs_DE_PS.loc[interaction, ct_pair] = 1
df_Exrp_LR_in_celltype_pairs_DE_PS

Unnamed: 0,CD14 mono---CD16 mono,CD14 mono---CD4 memory T,CD14 mono---CD4 naïve T,CD14 mono---CD8 memory T,CD14 mono---CD8 naïve T,CD14 mono---Exhausted B,CD14 mono---HSC,CD14 mono---Immature B,CD14 mono---MAIT,CD14 mono---Memory B,...,CD16 mono---CD16 mono,CD4 memory T---CD4 memory T,CD4 naïve T---CD4 naïve T,CD8 memory T---CD8 memory T,CD8 naïve T---CD8 naïve T,NK CD56(bright)---NK CD56(bright),NK CD56(dim)---NK CD56(dim),NKT---NKT,Naïve B---Naïve B,cDC---cDC
SIRPA_CD47,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LGALS9_HAVCR2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
CRTAM_CADM1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TIGIT_NECTIN2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD226_NECTIN2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LILRA4_BST2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRPG,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRB1_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LAIR1_LILRB4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [44]:
df_Exrp_LR_in_celltype_pairs_DE_PS = df_Exrp_LR_in_celltype_pairs_DE_PS.loc[:, (df_Exrp_LR_in_celltype_pairs_DE_PS != 0).any(axis=0)]
print('shape after filtering interactions')
print(df_Exrp_LR_in_celltype_pairs_DE_PS.shape, '\n')

df_Exrp_LR_in_celltype_pairs_DE_PS = df_Exrp_LR_in_celltype_pairs_DE_PS.loc[(df_Exrp_LR_in_celltype_pairs_DE_PS != 0).any(axis=1),:]
print('shape after filtering cell type pairs')
print(df_Exrp_LR_in_celltype_pairs_DE_PS.shape, '\n')

shape after filtering interactions
(131, 388) 

shape after filtering cell type pairs
(97, 388) 



In [50]:
df_Exrp_LR_in_celltype_pairs_DE_PS.to_csv('/lustre/scratch117/cellgen/team292/ab55/20210429_cellphone_interactions_DE_PS.csv')
df_Exrp_LR_in_celltype_pairs_DE_PS

Unnamed: 0,CD14 mono---CD16 mono,CD14 mono---CD4 memory T,CD14 mono---CD4 naïve T,CD14 mono---CD8 memory T,CD14 mono---CD8 naïve T,CD14 mono---Exhausted B,CD14 mono---HSC,CD14 mono---Immature B,CD14 mono---MAIT,CD14 mono---Memory B,...,γδT---cDC,CD14 mono---CD14 mono,CD16 mono---CD16 mono,CD4 memory T---CD4 memory T,CD4 naïve T---CD4 naïve T,CD8 memory T---CD8 memory T,CD8 naïve T---CD8 naïve T,NK CD56(dim)---NK CD56(dim),Naïve B---Naïve B,cDC---cDC
SIRPA_CD47,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LGALS9_HAVCR2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
THBS1_integrin_a2Bb3_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD40LG_integrin_a2Bb3_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PLAUR_integrin_a4b1_complex,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LILRA4_BST2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRPG,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
CD47_SIRB1_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LAIR1_LILRB4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [46]:
np.unique(df_Exrp_LR_in_celltype_pairs_DE_PS, return_counts = True)

(array([0., 1.]), array([35031,  2605]))

MS

In [47]:
for interaction in list(df_Exrp_LR_in_celltype_pairs_DE_MS.index):
    for ct_pair in list(df_Exrp_LR_in_celltype_pairs_DE_MS.columns):
        ct_A = ct_pair.split('---')[0]
        ct_B = ct_pair.split('---')[1]
        partner_A_genes = Int2Gene[interaction]['partner_a']
        partner_B_genes = Int2Gene[interaction]['partner_b']
        # so cell type Exhausted B will throw an error here because no DE analysis, so needs a special if
        if ct_A in ct_with_no_upreg_DE_genes:
            # if ct_A doesn't have upreg DE, we only care about if partner_B_genes are upreg in this case
            are_any_DE = all(elem in is_DE_MS[ct_B] for elem in partner_B_genes)
        elif ct_B in ct_with_no_upreg_DE_genes:
            # if ct_B doesn't have upreg DE, we only care about if partner_A_genes are upreg in this case
            are_any_DE = all(elem in is_DE_MS[ct_A] for elem in partner_A_genes)
        else:
            # if both partners have DE genes, proceed as normal
            # are partner_A genes DE in celltype_A OR are partner_B genes DE in celltype_B?
            are_any_DE = all(elem in is_DE_MS[ct_A] for elem in partner_A_genes) | all(elem in is_DE_MS[ct_B] for elem in partner_B_genes)
      
        if are_any_DE & (df_Exrp_LR_in_celltype_pairs.loc[interaction, ct_pair] == 1):
            df_Exrp_LR_in_celltype_pairs_DE_MS.loc[interaction, ct_pair] = 1
df_Exrp_LR_in_celltype_pairs_DE_MS

Unnamed: 0,CD14 mono---CD16 mono,CD14 mono---CD4 memory T,CD14 mono---CD4 naïve T,CD14 mono---CD8 memory T,CD14 mono---CD8 naïve T,CD14 mono---Exhausted B,CD14 mono---HSC,CD14 mono---Immature B,CD14 mono---MAIT,CD14 mono---Memory B,...,CD16 mono---CD16 mono,CD4 memory T---CD4 memory T,CD4 naïve T---CD4 naïve T,CD8 memory T---CD8 memory T,CD8 naïve T---CD8 naïve T,NK CD56(bright)---NK CD56(bright),NK CD56(dim)---NK CD56(dim),NKT---NKT,Naïve B---Naïve B,cDC---cDC
SIRPA_CD47,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LGALS9_HAVCR2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
CRTAM_CADM1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TIGIT_NECTIN2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD226_NECTIN2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LILRA4_BST2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRPG,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRB1_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LAIR1_LILRB4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [48]:
df_Exrp_LR_in_celltype_pairs_DE_MS = df_Exrp_LR_in_celltype_pairs_DE_MS.loc[:, (df_Exrp_LR_in_celltype_pairs_DE_MS != 0).any(axis=0)]
print('shape after filtering interactions')
print(df_Exrp_LR_in_celltype_pairs_DE_MS.shape, '\n')

df_Exrp_LR_in_celltype_pairs_DE_MS = df_Exrp_LR_in_celltype_pairs_DE_MS.loc[(df_Exrp_LR_in_celltype_pairs_DE_MS != 0).any(axis=1),:]
print('shape after filtering cell type pairs')
print(df_Exrp_LR_in_celltype_pairs_DE_MS.shape, '\n')

shape after filtering interactions
(131, 429) 

shape after filtering cell type pairs
(98, 429) 



In [51]:
df_Exrp_LR_in_celltype_pairs_DE_MS.to_csv('/lustre/scratch117/cellgen/team292/ab55/20210429_cellphone_interactions_DE_MS.csv')
df_Exrp_LR_in_celltype_pairs_DE_MS

Unnamed: 0,CD14 mono---CD16 mono,CD14 mono---CD4 memory T,CD14 mono---CD4 naïve T,CD14 mono---CD8 memory T,CD14 mono---CD8 naïve T,CD14 mono---Exhausted B,CD14 mono---HSC,CD14 mono---Immature B,CD14 mono---MAIT,CD14 mono---Memory B,...,CD16 mono---CD16 mono,CD4 memory T---CD4 memory T,CD4 naïve T---CD4 naïve T,CD8 memory T---CD8 memory T,CD8 naïve T---CD8 naïve T,NK CD56(bright)---NK CD56(bright),NK CD56(dim)---NK CD56(dim),NKT---NKT,Naïve B---Naïve B,cDC---cDC
SIRPA_CD47,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LGALS9_HAVCR2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
TIGIT_NECTIN2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD226_NECTIN2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PLAUR_integrin_a4b1_complex,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LILRA4_BST2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRPG,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRB1_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LAIR1_LILRB4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [51]:
np.unique(df_Exrp_LR_in_celltype_pairs_DE_MS, return_counts = True)

(array([0., 1.]), array([24002,  2164]))

Control

In [52]:
for interaction in list(df_Exrp_LR_in_celltype_pairs_DE_Control.index):
    for ct_pair in list(df_Exrp_LR_in_celltype_pairs_DE_Control.columns):
        ct_A = ct_pair.split('---')[0]
        ct_B = ct_pair.split('---')[1]
        partner_A_genes = Int2Gene[interaction]['partner_a']
        partner_B_genes = Int2Gene[interaction]['partner_b']
        # so cell type Exhausted B will throw an error here because no DE analysis, so needs a special if
        if ct_A in ct_with_no_upreg_DE_genes:
            # if ct_A doesn't have upreg DE, we only care about if partner_B_genes are upreg in this case
            are_any_DE = all(elem in is_DE_Control[ct_B] for elem in partner_B_genes)
        elif ct_B in ct_with_no_upreg_DE_genes:
            # if ct_B doesn't have upreg DE, we only care about if partner_A_genes are upreg in this case
            are_any_DE = all(elem in is_DE_Control[ct_A] for elem in partner_A_genes)
        else:
            # if both partners have DE genes, proceed as normal
            # are partner_A genes DE in celltype_A OR are partner_B genes DE in celltype_B?
            are_any_DE = all(elem in is_DE_Control[ct_A] for elem in partner_A_genes) | all(elem in is_DE_Control[ct_B] for elem in partner_B_genes)
      
        if are_any_DE & (df_Exrp_LR_in_celltype_pairs.loc[interaction, ct_pair] == 1):
            df_Exrp_LR_in_celltype_pairs_DE_Control.loc[interaction, ct_pair] = 1
df_Exrp_LR_in_celltype_pairs_DE_Control

Unnamed: 0,CD14 mono---CD16 mono,CD14 mono---CD4 memory T,CD14 mono---CD4 naïve T,CD14 mono---CD8 memory T,CD14 mono---CD8 naïve T,CD14 mono---Exhausted B,CD14 mono---HSC,CD14 mono---Immature B,CD14 mono---MAIT,CD14 mono---Memory B,...,CD16 mono---CD16 mono,CD4 memory T---CD4 memory T,CD4 naïve T---CD4 naïve T,CD8 memory T---CD8 memory T,CD8 naïve T---CD8 naïve T,NK CD56(bright)---NK CD56(bright),NK CD56(dim)---NK CD56(dim),NKT---NKT,Naïve B---Naïve B,cDC---cDC
SIRPA_CD47,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LGALS9_HAVCR2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
CRTAM_CADM1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TIGIT_NECTIN2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD226_NECTIN2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LILRA4_BST2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRPG,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRB1_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LAIR1_LILRB4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [53]:
df_Exrp_LR_in_celltype_pairs_DE_Control = df_Exrp_LR_in_celltype_pairs_DE_Control.loc[:, (df_Exrp_LR_in_celltype_pairs_DE_Control != 0).any(axis=0)]
print('shape after filtering interactions')
print(df_Exrp_LR_in_celltype_pairs_DE_Control.shape, '\n')

df_Exrp_LR_in_celltype_pairs_DE_Control = df_Exrp_LR_in_celltype_pairs_DE_Control.loc[(df_Exrp_LR_in_celltype_pairs_DE_Control != 0).any(axis=1),:]
print('shape after filtering cell type pairs')
print(df_Exrp_LR_in_celltype_pairs_DE_Control.shape, '\n')

shape after filtering interactions
(131, 411) 

shape after filtering cell type pairs
(94, 411) 



In [54]:
df_Exrp_LR_in_celltype_pairs_DE_Control.to_csv('/lustre/scratch117/cellgen/team292/ab55/20210429_cellphone_interactions_DE_Control.csv')
df_Exrp_LR_in_celltype_pairs_DE_Control

Unnamed: 0,CD14 mono---CD16 mono,CD14 mono---CD4 memory T,CD14 mono---CD4 naïve T,CD14 mono---CD8 memory T,CD14 mono---CD8 naïve T,CD14 mono---Exhausted B,CD14 mono---HSC,CD14 mono---Immature B,CD14 mono---MAIT,CD14 mono---Memory B,...,CD16 mono---CD16 mono,CD4 memory T---CD4 memory T,CD4 naïve T---CD4 naïve T,CD8 memory T---CD8 memory T,CD8 naïve T---CD8 naïve T,NK CD56(bright)---NK CD56(bright),NK CD56(dim)---NK CD56(dim),NKT---NKT,Naïve B---Naïve B,cDC---cDC
SIRPA_CD47,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LGALS9_HAVCR2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
CD226_NECTIN2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PLAUR_integrin_a4b1_complex,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TGFB1_TGFBR3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LILRA4_BST2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRPG,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRB1_complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LAIR1_LILRB4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [55]:
np.unique(df_Exrp_LR_in_celltype_pairs_DE_Control, return_counts = True)

(array([0., 1.]), array([36186,  2448]))

More readable tables

RA

In [56]:
faulty_index_count = 0

vec2_append_upreg = {}

# row count
curr_count = 0

for interaction in df_Exrp_LR_in_celltype_pairs_DE_RA.index:
    
    print(interaction, list(df_Exrp_LR_in_celltype_pairs_DE_RA.index).index(interaction) + 1, 
          'out of', len(df_Exrp_LR_in_celltype_pairs_DE_RA.index))
    # current row
    curr_table = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_DE_RA.loc[interaction])
    curr_table = curr_table[curr_table[interaction] > 0]
    
    for celltype_pair in list(curr_table.index):
        
        # row by row
        vec2_append_upreg[str(curr_count)] = {}
        
        vec2_append_upreg[str(curr_count)]['interaction'] = interaction
        
        # getting genes, these are lists of length 1 for simple interactions and > 1 for complexes
        curr_partner_A_genes = Int2Gene[interaction]['partner_a']
        curr_partner_B_genes = Int2Gene[interaction]['partner_b']
                
        vec2_append_upreg[str(curr_count)]['partner_A_genes'] = curr_partner_A_genes
        vec2_append_upreg[str(curr_count)]['partner_B_genes'] = curr_partner_B_genes
           
        curr_celltype_A = celltype_pair.split('---')[0]
        curr_celltype_B = celltype_pair.split('---')[1]
        
        vec2_append_upreg[str(curr_count)]['celltype_A'] = curr_celltype_A
        vec2_append_upreg[str(curr_count)]['celltype_B'] = curr_celltype_B
        
        # are all partner_A genes DE in celltype_A and are all partner_B genes DE in celltype_B?
        # these DE table subsets have been filtered already according to cutoffs declared in the beginning of the notebook
        curr_celltype_A_DE_table_subset = DE_RA[DE_RA['cluster'] == curr_celltype_A]
        curr_celltype_A_DE_table_subset.set_index('Gene', inplace = True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_A_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_A])
        
        curr_celltype_B_DE_table_subset = DE_RA[DE_RA['cluster'] == curr_celltype_B]
        curr_celltype_B_DE_table_subset.set_index('Gene', inplace = True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_B_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_B])
        
        # if partner A is DE, add stats
        if all(elem in list(curr_celltype_A_DE_table_subset.index) for elem in curr_partner_A_genes):
            vec2_append_upreg[str(curr_count)]['is_partner_A_DE'] = True
            vec2_append_upreg[str(curr_count)]['logFC_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'logFC'])
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'adj.P.Val'])
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'percentExpr_case'])
            
        else: # if not DE, add 'NA'
            vec2_append_upreg[str(curr_count)]['is_partner_A_DE'] = False
            vec2_append_upreg[str(curr_count)]['logFC_gene_A'] = 'NA'
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_A'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_per_df_full.loc[curr_partner_A_genes,curr_celltype_A])
            
        # if partner B is DE, add stats
        if all(elem in list(curr_celltype_B_DE_table_subset.index) for elem in curr_partner_B_genes):
            vec2_append_upreg[str(curr_count)]['is_partner_B_DE'] = True
            vec2_append_upreg[str(curr_count)]['logFC_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'logFC'])
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'adj.P.Val'])
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'percentExpr_case'])
        else: # if not DE, add 'NA'
            vec2_append_upreg[str(curr_count)]['is_partner_B_DE'] = False
            vec2_append_upreg[str(curr_count)]['logFC_gene_B'] = 'NA'
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_B'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_per_df_full.loc[curr_partner_B_genes,curr_celltype_B])
            
        curr_count += 1

LGALS9_HAVCR2 1 out of 89
CD226_NECTIN2 2 out of 89
COL9A3_integrin_a1b1_complex 3 out of 89
PLAUR_integrin_a4b1_complex 4 out of 89
TGFB1_TGFBR3 5 out of 89
FCER2_integrin_aMb2_complex 6 out of 89
ICAM1_integrin_aMb2_complex 7 out of 89
FCER2_integrin_aXb2_complex 8 out of 89
ICAM1_integrin_aXb2_complex 9 out of 89
FCER2_CR2 10 out of 89
HLA-C_KIR2DL3 11 out of 89
HLA-C_KIR2DL1 12 out of 89
CD8_receptor_LCK 13 out of 89
CD94:NKG2A_HLA-E 14 out of 89
CD94:NKG2C_HLA-E 15 out of 89
CD94:NKG2E_HLA-E 16 out of 89
TNFRSF13B_TNFSF13B 17 out of 89
TNFRSF17_TNFSF13B 18 out of 89
TNFRSF13C_TNFSF13B 19 out of 89
CD74_APP 20 out of 89
ICAM1_SPN 21 out of 89
ICAM1_ITGAL 22 out of 89
ICAM1_integrin_aLb2_complex 23 out of 89
ICAM2_integrin_aLb2_complex 24 out of 89
ICAM3_integrin_aLb2_complex 25 out of 89
ICAM4_integrin_aLb2_complex 26 out of 89
F11R_integrin_aLb2_complex 27 out of 89
FAS_FASLG 28 out of 89
NRP1_VEGFA 29 out of 89
HLA-A_KIR3DL1 30 out of 89
HLA-F_KIR3DL1 31 out of 89
HLA-F_KIR3DL2 3

In [57]:
# outlining the final table format
df_output_upreg = pd.DataFrame(columns = ['interaction',
                                     'partner_A_genes',
                                     'partner_B_genes',
                                     'celltype_A',
                                     'celltype_B',
                                     'is_partner_A_DE',
                                     'logFC_gene_A',
                                     'adj_pval_gene_A',
                                     'percent_expr_gene_A',
                                     'is_partner_B_DE',
                                     'logFC_gene_B',
                                     'adj_pval_gene_B',
                                     'percent_expr_gene_B'],
                            index = list(vec2_append_upreg.keys()))

for i in list(vec2_append_upreg.keys()):
    curr_keys = list(vec2_append_upreg[i].keys())
    for col in curr_keys:
        df_output_upreg.loc[i,col] = vec2_append_upreg[i][col]
        
# getting rid of the square parentheses [] in all the values
cols2correct = ['partner_A_genes', 'partner_B_genes', 'logFC_gene_A', 'adj_pval_gene_A',
       'percent_expr_gene_A', 'logFC_gene_B',
       'adj_pval_gene_B', 'percent_expr_gene_B']

for row in list(df_output_upreg.index):
    for col in cols2correct:
        curr_value = df_output_upreg.loc[row, col] # with []
        if (curr_value != 'NA') & (len(curr_value) == 1): # ignoring NAs and lists of length > 1 (i.e., complexes)
            df_output_upreg.loc[row, col] = curr_value[0] # this is just to get the element - string if a gene, numerical value if it's a stat
        
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,LGALS9_HAVCR2,LGALS9,HAVCR2,CD14 mono,CD16 mono,True,-0.037306,0.000144,0.569248,False,,,0.308372
1,LGALS9_HAVCR2,LGALS9,HAVCR2,CD14 mono,NK CD56(bright),True,-0.037306,0.000144,0.569248,False,,,0.156954
2,LGALS9_HAVCR2,LGALS9,HAVCR2,CD14 mono,NK CD56(dim),True,-0.037306,0.000144,0.569248,False,,,0.227979
3,LGALS9_HAVCR2,LGALS9,HAVCR2,CD14 mono,NKT,True,-0.037306,0.000144,0.569248,False,,,0.115661
4,LGALS9_HAVCR2,LGALS9,HAVCR2,CD14 mono,Prolif NK,True,-0.037306,0.000144,0.569248,False,,,0.314159
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2278,CLEC2B_KLRF1,CLEC2B,KLRF1,cDC,NK CD56(dim),False,,,0.714549,True,-0.129218,0.0,0.585076
2279,CLEC2B_KLRF1,CLEC2B,KLRF1,pDC,NK CD56(dim),False,,,0.493333,True,-0.129218,0.0,0.585076
2280,CLEC2B_KLRF1,CLEC2B,KLRF1,γδT,NK CD56(dim),False,,,0.583333,True,-0.129218,0.0,0.585076
2281,CLEC2B_KLRF1,CLEC2B,KLRF1,CD8 memory T,CD8 memory T,True,0.075376,0.001224,0.59082,False,,,0.106844


In [58]:
list(df_output_upreg.columns) == list(vec2_append_upreg['0'].keys())

True

In [59]:
df_output_upreg[:10]

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,LGALS9_HAVCR2,LGALS9,HAVCR2,CD14 mono,CD16 mono,True,-0.037306,0.000144,0.569248,False,,,0.308372
1,LGALS9_HAVCR2,LGALS9,HAVCR2,CD14 mono,NK CD56(bright),True,-0.037306,0.000144,0.569248,False,,,0.156954
2,LGALS9_HAVCR2,LGALS9,HAVCR2,CD14 mono,NK CD56(dim),True,-0.037306,0.000144,0.569248,False,,,0.227979
3,LGALS9_HAVCR2,LGALS9,HAVCR2,CD14 mono,NKT,True,-0.037306,0.000144,0.569248,False,,,0.115661
4,LGALS9_HAVCR2,LGALS9,HAVCR2,CD14 mono,Prolif NK,True,-0.037306,0.000144,0.569248,False,,,0.314159
5,LGALS9_HAVCR2,LGALS9,HAVCR2,CD14 mono,Prolif T,True,-0.037306,0.000144,0.569248,False,,,0.332351
6,LGALS9_HAVCR2,LGALS9,HAVCR2,CD14 mono,cDC,True,-0.037306,0.000144,0.569248,False,,,0.506446
7,LGALS9_HAVCR2,LGALS9,HAVCR2,CD16 mono,CD14 mono,True,-0.110017,0.014108,0.845361,True,-0.125476,0.0,0.270594
8,LGALS9_HAVCR2,LGALS9,HAVCR2,CD16 mono,NK CD56(bright),True,-0.110017,0.014108,0.845361,False,,,0.156954
9,LGALS9_HAVCR2,LGALS9,HAVCR2,CD16 mono,NK CD56(dim),True,-0.110017,0.014108,0.845361,False,,,0.227979


In [60]:
# split tables into 2 tables, one for simple interactions and one for complex interactions (contains pseudo-interactions for each subunit)
# getting indices of complex interactions
complex_interaction_rows_upreg = []

for n_row in list(df_output_upreg.index):
    curr_partner_A_genes = df_output_upreg.loc[n_row, 'partner_A_genes']
    curr_partner_B_genes = df_output_upreg.loc[n_row, 'partner_B_genes']
    
    if isinstance(curr_partner_A_genes, list) or isinstance(curr_partner_B_genes, list): # if partner A or B is a complex
        complex_interaction_rows_upreg.append(n_row)
len(complex_interaction_rows_upreg)

553

In [61]:
df_output_upreg_complex = df_output_upreg.loc[complex_interaction_rows_upreg,:]
df_output_upreg_simple = df_output_upreg.drop(complex_interaction_rows_upreg, axis=0)
df_output_upreg_simple

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,LGALS9_HAVCR2,LGALS9,HAVCR2,CD14 mono,CD16 mono,True,-0.037306,0.000144,0.569248,False,,,0.308372
1,LGALS9_HAVCR2,LGALS9,HAVCR2,CD14 mono,NK CD56(bright),True,-0.037306,0.000144,0.569248,False,,,0.156954
2,LGALS9_HAVCR2,LGALS9,HAVCR2,CD14 mono,NK CD56(dim),True,-0.037306,0.000144,0.569248,False,,,0.227979
3,LGALS9_HAVCR2,LGALS9,HAVCR2,CD14 mono,NKT,True,-0.037306,0.000144,0.569248,False,,,0.115661
4,LGALS9_HAVCR2,LGALS9,HAVCR2,CD14 mono,Prolif NK,True,-0.037306,0.000144,0.569248,False,,,0.314159
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2278,CLEC2B_KLRF1,CLEC2B,KLRF1,cDC,NK CD56(dim),False,,,0.714549,True,-0.129218,0.0,0.585076
2279,CLEC2B_KLRF1,CLEC2B,KLRF1,pDC,NK CD56(dim),False,,,0.493333,True,-0.129218,0.0,0.585076
2280,CLEC2B_KLRF1,CLEC2B,KLRF1,γδT,NK CD56(dim),False,,,0.583333,True,-0.129218,0.0,0.585076
2281,CLEC2B_KLRF1,CLEC2B,KLRF1,CD8 memory T,CD8 memory T,True,0.075376,0.001224,0.59082,False,,,0.106844


In [62]:
df_output_upreg_complex

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
38,COL9A3_integrin_a1b1_complex,COL9A3,"[ITGA1, ITGB1]",Naïve B,Prolif T,True,0.064693,0.0,0.145393,False,,,"[0.2427813789039481, 0.7860931054802592]"
39,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",CD14 mono,CD16 mono,True,0.128713,0.0,0.695274,False,,,"[0.5241809672386896, 0.6625065002600105]"
40,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",CD14 mono,CD4 memory T,True,0.128713,0.0,0.695274,False,,,"[0.478021978021978, 0.3644688644688644]"
41,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",CD14 mono,CD4 naïve T,True,0.128713,0.0,0.695274,False,,,"[0.3183694722356529, 0.3630378993565664]"
42,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",CD14 mono,CD8 memory T,True,0.128713,0.0,0.695274,False,,,"[0.4142030848329049, 0.5176735218508998]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1405,TGFB1_TGFbeta_receptor1,TGFB1,"[TGFBR1, TGFBR2]",CD8 memory T,Prolif NK,True,0.120911,0.0,0.508301,False,,,"[0.1371681415929203, 0.1327433628318584]"
1406,TGFB1_TGFbeta_receptor1,TGFB1,"[TGFBR1, TGFBR2]",Naïve B,Prolif NK,True,0.062362,0.0,0.227447,False,,,"[0.1371681415929203, 0.1327433628318584]"
1407,IFNG_Type_II_IFNR,IFNG,"[IFNGR1, IFNGR2]",NKT,CD4 naïve T,False,,,0.163254,True,"[0.0649280480528577, 0.0473061328962261]","[7.367592037250389e-30, 3.3824841033395e-21]","[0.258164, 0.199854]"
1408,IFNG_Type_II_IFNR,IFNG,"[IFNGR1, IFNGR2]",Prolif NK,CD4 naïve T,False,,,0.181416,True,"[0.0649280480528577, 0.0473061328962261]","[7.367592037250389e-30, 3.3824841033395e-21]","[0.258164, 0.199854]"


In [63]:
df_output_upreg_simple.shape

(1730, 13)

In [64]:
df_output_upreg_complex.shape

(553, 13)

In [65]:
df_output_upreg.shape

(2283, 13)

In [66]:
# checking whether any complexes interact with other complexes and how many subunits each complex contains here
n_subunits_upreg = []

for n_row in list(df_output_upreg.index):    
    curr_partner_A_genes = df_output_upreg.loc[n_row, 'partner_A_genes']
    curr_partner_B_genes = df_output_upreg.loc[n_row, 'partner_B_genes']
    
    if isinstance(curr_partner_A_genes, list) and isinstance(curr_partner_B_genes, list): # if partner A AND B is a complex
        print('row', n_row)
        print('both are complexes')
        
    if isinstance(curr_partner_A_genes, list): # if partner A is a complex
        print('row', n_row)
        print('curr_partner_A_genes',curr_partner_A_genes)
        print('partner A is a complex, len is:', len(curr_partner_A_genes))
        n_subunits_upreg.append(len(curr_partner_A_genes))
        
    if isinstance(curr_partner_B_genes, list): # if partner B is a complex
        print('row', n_row)
        print('curr_partner_B_genes',curr_partner_B_genes)
        print('partner B is a complex, len is:', len(curr_partner_B_genes))
        n_subunits_upreg.append(len(curr_partner_B_genes))

row 38
curr_partner_B_genes ['ITGA1', 'ITGB1']
partner B is a complex, len is: 2
row 39
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 40
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 41
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 42
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 43
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 44
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 45
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 46
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 47
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 48
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 49
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 50
curr_partner_B_genes 

In [67]:
# resulting complexes here have at most two subunits
np.unique(n_subunits_upreg, return_counts = True)

(array([2]), array([553]))

In [68]:
df_output_upreg_complex_member_1 = df_output_upreg_complex.copy()
df_output_upreg_complex_member_2 = df_output_upreg_complex.copy()

# splitting complex interaction entries by subunits / members
# for any values in these tables, if it's a list, save the 0th for df_output_upreg_complex_member_1 and 1st for df_output_upreg_complex_member_2
for n_row in list(df_output_upreg_complex.index):    
    for col in df_output_upreg_complex.columns:
        if isinstance(df_output_upreg_complex.loc[n_row, col], list):
            df_output_upreg_complex_member_1.loc[n_row, col] = df_output_upreg_complex.loc[n_row, col][0]
            df_output_upreg_complex_member_2.loc[n_row, col] = df_output_upreg_complex.loc[n_row, col][1]

In [69]:
df_output_upreg_complex_member_1.index = [idx + '_member_1' for idx in df_output_upreg_complex_member_1.index]
df_output_upreg_complex_member_2.index = [idx + '_member_2' for idx in df_output_upreg_complex_member_2.index]

# getting all indices
idx_concat = list(df_output_upreg_complex_member_1.index) + list(df_output_upreg_complex_member_2.index)

# sorting by original index number, so that the order is member 1 then member 2
idx_concat.sort(key = lambda x: x.split('_')[0])
idx_concat

['104_member_1',
 '104_member_2',
 '105_member_1',
 '105_member_2',
 '106_member_1',
 '106_member_2',
 '107_member_1',
 '107_member_2',
 '108_member_1',
 '108_member_2',
 '109_member_1',
 '109_member_2',
 '110_member_1',
 '110_member_2',
 '111_member_1',
 '111_member_2',
 '112_member_1',
 '112_member_2',
 '113_member_1',
 '113_member_2',
 '114_member_1',
 '114_member_2',
 '115_member_1',
 '115_member_2',
 '116_member_1',
 '116_member_2',
 '117_member_1',
 '117_member_2',
 '118_member_1',
 '118_member_2',
 '1181_member_1',
 '1181_member_2',
 '1182_member_1',
 '1182_member_2',
 '1183_member_1',
 '1183_member_2',
 '1184_member_1',
 '1184_member_2',
 '1185_member_1',
 '1185_member_2',
 '1186_member_1',
 '1186_member_2',
 '1187_member_1',
 '1187_member_2',
 '1188_member_1',
 '1188_member_2',
 '1189_member_1',
 '1189_member_2',
 '119_member_1',
 '119_member_2',
 '1190_member_1',
 '1190_member_2',
 '1191_member_1',
 '1191_member_2',
 '1192_member_1',
 '1192_member_2',
 '120_member_1',
 '120_m

In [70]:
df_output_upreg_complex_deconv = pd.concat([df_output_upreg_complex_member_1, df_output_upreg_complex_member_2])
df_output_upreg_complex_member_1

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
38_member_1,COL9A3_integrin_a1b1_complex,COL9A3,ITGA1,Naïve B,Prolif T,True,0.064693,0.0,0.145393,False,,,0.242781
39_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,CD14 mono,CD16 mono,True,0.128713,0.0,0.695274,False,,,0.524181
40_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,CD14 mono,CD4 memory T,True,0.128713,0.0,0.695274,False,,,0.478022
41_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,CD14 mono,CD4 naïve T,True,0.128713,0.0,0.695274,False,,,0.318369
42_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,CD14 mono,CD8 memory T,True,0.128713,0.0,0.695274,False,,,0.414203
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1405_member_1,TGFB1_TGFbeta_receptor1,TGFB1,TGFBR1,CD8 memory T,Prolif NK,True,0.120911,0.0,0.508301,False,,,0.137168
1406_member_1,TGFB1_TGFbeta_receptor1,TGFB1,TGFBR1,Naïve B,Prolif NK,True,0.062362,0.0,0.227447,False,,,0.137168
1407_member_1,IFNG_Type_II_IFNR,IFNG,IFNGR1,NKT,CD4 naïve T,False,,,0.163254,True,0.064928,0.0,0.258164
1408_member_1,IFNG_Type_II_IFNR,IFNG,IFNGR1,Prolif NK,CD4 naïve T,False,,,0.181416,True,0.064928,0.0,0.258164


In [71]:
df_output_upreg_complex_member_2

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
38_member_2,COL9A3_integrin_a1b1_complex,COL9A3,ITGB1,Naïve B,Prolif T,True,0.064693,0.0,0.145393,False,,,0.786093
39_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,CD14 mono,CD16 mono,True,0.128713,0.0,0.695274,False,,,0.662507
40_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,CD14 mono,CD4 memory T,True,0.128713,0.0,0.695274,False,,,0.364469
41_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,CD14 mono,CD4 naïve T,True,0.128713,0.0,0.695274,False,,,0.363038
42_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,CD14 mono,CD8 memory T,True,0.128713,0.0,0.695274,False,,,0.517674
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1405_member_2,TGFB1_TGFbeta_receptor1,TGFB1,TGFBR2,CD8 memory T,Prolif NK,True,0.120911,0.0,0.508301,False,,,0.132743
1406_member_2,TGFB1_TGFbeta_receptor1,TGFB1,TGFBR2,Naïve B,Prolif NK,True,0.062362,0.0,0.227447,False,,,0.132743
1407_member_2,IFNG_Type_II_IFNR,IFNG,IFNGR2,NKT,CD4 naïve T,False,,,0.163254,True,0.047306,0.0,0.199854
1408_member_2,IFNG_Type_II_IFNR,IFNG,IFNGR2,Prolif NK,CD4 naïve T,False,,,0.181416,True,0.047306,0.0,0.199854


In [72]:
df_output_upreg_complex_deconv = df_output_upreg_complex_deconv.loc[idx_concat,:]
df_output_upreg_complex_deconv

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
104_member_1,FCER2_integrin_aMb2_complex,FCER2,ITGB2,Naïve B,CD14 mono,True,-0.142621,0.0,0.324376,False,,,0.840071
104_member_2,FCER2_integrin_aMb2_complex,FCER2,ITGAM,Naïve B,CD14 mono,True,-0.142621,0.0,0.324376,False,,,0.418767
105_member_1,FCER2_integrin_aMb2_complex,FCER2,ITGB2,Naïve B,CD16 mono,True,-0.142621,0.0,0.324376,False,,,0.951638
105_member_2,FCER2_integrin_aMb2_complex,FCER2,ITGAM,Naïve B,CD16 mono,True,-0.142621,0.0,0.324376,False,,,0.350494
106_member_1,FCER2_integrin_aMb2_complex,FCER2,ITGB2,Naïve B,CD8 memory T,True,-0.142621,0.0,0.324376,False,,,0.690874
...,...,...,...,...,...,...,...,...,...,...,...,...,...
829_member_2,F11R_integrin_aLb2_complex,F11R,ITGAL,pDC,CD4 memory T,False,,,0.173333,True,0.061126,0.034977,0.334337
830_member_1,F11R_integrin_aLb2_complex,F11R,ITGB2,pDC,CD4 naïve T,False,,,0.173333,True,-0.0716,0.0,0.467388
830_member_2,F11R_integrin_aLb2_complex,F11R,ITGAL,pDC,CD4 naïve T,False,,,0.173333,True,-0.020179,0.000355,0.16465
831_member_1,F11R_integrin_aLb2_complex,F11R,ITGB2,pDC,CD8 memory T,False,,,0.173333,True,0.068645,0.016215,0.737305


In [73]:
# save the final tables
df_output_upreg_complex_deconv.to_csv('/lustre/scratch117/cellgen/team292/ab55/20210429_readable_cellphone_interactions_DE_RA_Complex.csv')
df_output_upreg_simple.to_csv('/lustre/scratch117/cellgen/team292/ab55/20210429_readable_cellphone_interactions_DE_RA_Simple.csv')

PS

In [74]:
faulty_index_count = 0

vec2_append_upreg = {}

# row count
curr_count = 0

for interaction in df_Exrp_LR_in_celltype_pairs_DE_PS.index:
    
    print(interaction, list(df_Exrp_LR_in_celltype_pairs_DE_PS.index).index(interaction) + 1, 
          'out of', len(df_Exrp_LR_in_celltype_pairs_DE_PS.index))
    # current row
    curr_table = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_DE_PS.loc[interaction])
    curr_table = curr_table[curr_table[interaction] > 0]
    
    for celltype_pair in list(curr_table.index):
        
        # row by row
        vec2_append_upreg[str(curr_count)] = {}
        
        vec2_append_upreg[str(curr_count)]['interaction'] = interaction
                
        # getting genes, these are lists of length 1 for simple interactions and > 1 for complexes
        curr_partner_A_genes = Int2Gene[interaction]['partner_a']
        curr_partner_B_genes = Int2Gene[interaction]['partner_b']
                
        vec2_append_upreg[str(curr_count)]['partner_A_genes'] = curr_partner_A_genes
        vec2_append_upreg[str(curr_count)]['partner_B_genes'] = curr_partner_B_genes
           
        curr_celltype_A = celltype_pair.split('---')[0]
        curr_celltype_B = celltype_pair.split('---')[1]
        
        vec2_append_upreg[str(curr_count)]['celltype_A'] = curr_celltype_A
        vec2_append_upreg[str(curr_count)]['celltype_B'] = curr_celltype_B
        
        # are all partner_A genes DE in celltype_A and are all partner_B genes DE in celltype_B?
        # these DE table subsets have been filtered already according to cutoffs declared in the beginning of the notebook
        curr_celltype_A_DE_table_subset = DE_PS[DE_PS['cluster'] == curr_celltype_A]
        curr_celltype_A_DE_table_subset.set_index('Gene', inplace = True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_A_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_A])
        
        curr_celltype_B_DE_table_subset = DE_PS[DE_PS['cluster'] == curr_celltype_B]
        curr_celltype_B_DE_table_subset.set_index('Gene', inplace = True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_B_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_B])
        
        # if partner A is DE, add stats
        if all(elem in list(curr_celltype_A_DE_table_subset.index) for elem in curr_partner_A_genes):
            vec2_append_upreg[str(curr_count)]['is_partner_A_DE'] = True
            vec2_append_upreg[str(curr_count)]['logFC_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'logFC'])
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'adj.P.Val'])
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'percentExpr_case'])
            
        else: # if not DE, add 'NA'
            vec2_append_upreg[str(curr_count)]['is_partner_A_DE'] = False
            vec2_append_upreg[str(curr_count)]['logFC_gene_A'] = 'NA'
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_A'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_per_df_full.loc[curr_partner_A_genes,curr_celltype_A])
            
        # if partner B is DE, add stats
        if all(elem in list(curr_celltype_B_DE_table_subset.index) for elem in curr_partner_B_genes):
            vec2_append_upreg[str(curr_count)]['is_partner_B_DE'] = True
            vec2_append_upreg[str(curr_count)]['logFC_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'logFC'])
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'adj.P.Val'])
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'percentExpr_case'])
        else: # if not DE, add 'NA'
            vec2_append_upreg[str(curr_count)]['is_partner_B_DE'] = False
            vec2_append_upreg[str(curr_count)]['logFC_gene_B'] = 'NA'
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_B'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_per_df_full.loc[curr_partner_B_genes,curr_celltype_B])
            
        curr_count += 1

SIRPA_CD47 1 out of 97
LGALS9_HAVCR2 2 out of 97
THBS1_integrin_a2Bb3_complex 3 out of 97
CD40LG_integrin_a2Bb3_complex 4 out of 97
PLAUR_integrin_a4b1_complex 5 out of 97
CD40LG_integrin_a5b1_complex 6 out of 97
TGFB1_TGFBR3 7 out of 97
FCER2_integrin_aMb2_complex 8 out of 97
ICAM1_integrin_aMb2_complex 9 out of 97
GP1BA_integrin_aMb2_complex 10 out of 97
JAM3_integrin_aMb2_complex 11 out of 97
FCER2_integrin_aXb2_complex 12 out of 97
ICAM1_integrin_aXb2_complex 13 out of 97
FCER2_CR2 14 out of 97
HLA-C_KIR2DL3 15 out of 97
HLA-C_KIR2DL1 16 out of 97
CD8_receptor_LCK 17 out of 97
CD94:NKG2A_HLA-E 18 out of 97
CD94:NKG2C_HLA-E 19 out of 97
CD94:NKG2E_HLA-E 20 out of 97
TNFRSF13B_TNFSF13B 21 out of 97
TNFRSF17_TNFSF13B 22 out of 97
TNFRSF13C_TNFSF13B 23 out of 97
CD74_APP 24 out of 97
ICAM1_SPN 25 out of 97
ICAM1_ITGAL 26 out of 97
ICAM1_integrin_aLb2_complex 27 out of 97
ICAM2_integrin_aLb2_complex 28 out of 97
ICAM3_integrin_aLb2_complex 29 out of 97
ICAM4_integrin_aLb2_complex 30 out

In [75]:
# outlining the final table format
df_output_upreg = pd.DataFrame(columns = ['interaction',
                                     'partner_A_genes',
                                     'partner_B_genes',
                                     'celltype_A',
                                     'celltype_B',
                                     'is_partner_A_DE',
                                     'logFC_gene_A',
                                     'adj_pval_gene_A',
                                     'percent_expr_gene_A',
                                     'is_partner_B_DE',
                                     'logFC_gene_B',
                                     'adj_pval_gene_B',
                                     'percent_expr_gene_B'],
                            index = list(vec2_append_upreg.keys()))

for i in list(vec2_append_upreg.keys()):
    curr_keys = list(vec2_append_upreg[i].keys())
    for col in curr_keys:
        df_output_upreg.loc[i,col] = vec2_append_upreg[i][col]
        
# getting rid of the square parentheses [] in all the values
cols2correct = ['partner_A_genes', 'partner_B_genes', 'logFC_gene_A', 'adj_pval_gene_A',
       'percent_expr_gene_A', 'logFC_gene_B',
       'adj_pval_gene_B', 'percent_expr_gene_B']

for row in list(df_output_upreg.index):
    for col in cols2correct:
        curr_value = df_output_upreg.loc[row, col] # with []
        if (curr_value != 'NA') & (len(curr_value) == 1): # ignoring NAs and lists of length > 1 (i.e., complexes)
            df_output_upreg.loc[row, col] = curr_value[0] # this is just to get the element - string if a gene, numerical value if it's a stat
        
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,SIRPA_CD47,SIRPA,CD47,CD14 mono,CD4 naïve T,False,,,0.326286,True,0.034853,0.003062,0.451709
1,SIRPA_CD47,SIRPA,CD47,CD14 mono,CD8 memory T,False,,,0.326286,True,0.058683,0.022976,0.453462
2,SIRPA_CD47,SIRPA,CD47,CD16 mono,CD4 naïve T,False,,,0.269891,True,0.034853,0.003062,0.451709
3,SIRPA_CD47,SIRPA,CD47,CD16 mono,CD8 memory T,False,,,0.269891,True,0.058683,0.022976,0.453462
4,SIRPA_CD47,SIRPA,CD47,cDC,CD4 naïve T,False,,,0.366483,True,0.034853,0.003062,0.451709
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2600,CLEC2B_KLRF1,CLEC2B,KLRF1,cDC,NK CD56(dim),False,,,0.714549,True,-0.212604,0.0,0.489147
2601,CLEC2B_KLRF1,CLEC2B,KLRF1,pDC,NK CD56(dim),False,,,0.493333,True,-0.212604,0.0,0.489147
2602,CLEC2B_KLRF1,CLEC2B,KLRF1,γδT,NK CD56(dim),False,,,0.583333,True,-0.212604,0.0,0.489147
2603,CLEC2B_KLRF1,CLEC2B,KLRF1,CD8 memory T,CD8 memory T,True,0.258273,0.0,0.585257,False,,,0.106844


In [76]:
list(df_output_upreg.columns) == list(vec2_append_upreg['0'].keys())

True

In [77]:
# split tables into 2 tables, one for simple interactions and one for complex interactions (contains pseudo-interactions for each subunit)
#getting indices of complex interactions
complex_interaction_rows_upreg = []

for n_row in list(df_output_upreg.index):
    curr_partner_A_genes = df_output_upreg.loc[n_row, 'partner_A_genes']
    curr_partner_B_genes = df_output_upreg.loc[n_row, 'partner_B_genes']
    
    if isinstance(curr_partner_A_genes, list) or isinstance(curr_partner_B_genes, list): # if partner A or B is a complex
        complex_interaction_rows_upreg.append(n_row)
len(complex_interaction_rows_upreg)

669

In [78]:
df_output_upreg_complex = df_output_upreg.loc[complex_interaction_rows_upreg,:]
df_output_upreg_simple = df_output_upreg.drop(complex_interaction_rows_upreg, axis=0)
df_output_upreg_simple

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,SIRPA_CD47,SIRPA,CD47,CD14 mono,CD4 naïve T,False,,,0.326286,True,0.034853,0.003062,0.451709
1,SIRPA_CD47,SIRPA,CD47,CD14 mono,CD8 memory T,False,,,0.326286,True,0.058683,0.022976,0.453462
2,SIRPA_CD47,SIRPA,CD47,CD16 mono,CD4 naïve T,False,,,0.269891,True,0.034853,0.003062,0.451709
3,SIRPA_CD47,SIRPA,CD47,CD16 mono,CD8 memory T,False,,,0.269891,True,0.058683,0.022976,0.453462
4,SIRPA_CD47,SIRPA,CD47,cDC,CD4 naïve T,False,,,0.366483,True,0.034853,0.003062,0.451709
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2600,CLEC2B_KLRF1,CLEC2B,KLRF1,cDC,NK CD56(dim),False,,,0.714549,True,-0.212604,0.0,0.489147
2601,CLEC2B_KLRF1,CLEC2B,KLRF1,pDC,NK CD56(dim),False,,,0.493333,True,-0.212604,0.0,0.489147
2602,CLEC2B_KLRF1,CLEC2B,KLRF1,γδT,NK CD56(dim),False,,,0.583333,True,-0.212604,0.0,0.489147
2603,CLEC2B_KLRF1,CLEC2B,KLRF1,CD8 memory T,CD8 memory T,True,0.258273,0.0,0.585257,False,,,0.106844


In [79]:
df_output_upreg_complex

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
35,THBS1_integrin_a2Bb3_complex,THBS1,"[ITGB3, ITGA2B]",CD14 mono,Platelets,True,1.105608,0.0,0.773078,False,,,"[0.3081717451523545, 0.5886426592797784]"
36,CD40LG_integrin_a2Bb3_complex,CD40LG,"[ITGB3, ITGA2B]",CD4 memory T,Platelets,True,0.110887,0.000002,0.177331,False,,,"[0.3081717451523545, 0.5886426592797784]"
37,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",CD14 mono,CD16 mono,True,0.174463,0.0,0.627486,False,,,"[0.5241809672386896, 0.6625065002600105]"
38,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",CD14 mono,CD4 memory T,True,0.174463,0.0,0.627486,False,,,"[0.478021978021978, 0.3644688644688644]"
39,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",CD14 mono,CD4 naïve T,True,0.174463,0.0,0.627486,False,,,"[0.3183694722356529, 0.3630378993565664]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2502,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",CD8 memory T,CD14 mono,True,0.058683,0.022976,0.453462,False,,,"[0.2845060893098782, 0.9258288227334236]"
2503,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",CD8 memory T,CD16 mono,True,0.058683,0.022976,0.453462,False,,,"[0.4914196567862715, 0.999479979199168]"
2504,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",CD8 memory T,Neutrophil,True,0.058683,0.022976,0.453462,False,,,"[0.1505016722408027, 0.862876254180602]"
2505,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",CD8 memory T,cDC,True,0.058683,0.022976,0.453462,False,,,"[0.2007366482504604, 0.9705340699815838]"


In [80]:
df_output_upreg_simple.shape

(1936, 13)

In [81]:
df_output_upreg_complex.shape

(669, 13)

In [82]:
df_output_upreg.shape

(2605, 13)

In [83]:
# checking whether any complexes interact with other complexes and how many subunits each complex contains here
n_subunits_upreg = []

for n_row in list(df_output_upreg.index):    
    curr_partner_A_genes = df_output_upreg.loc[n_row, 'partner_A_genes']
    curr_partner_B_genes = df_output_upreg.loc[n_row, 'partner_B_genes']
    
    if isinstance(curr_partner_A_genes, list) and isinstance(curr_partner_B_genes, list): # if partner A AND B is a complex
        print('row', n_row)
        print('both are complexes')
        
    if isinstance(curr_partner_A_genes, list): # if partner A is a complex
        print('row', n_row)
        print('curr_partner_A_genes',curr_partner_A_genes)
        print('partner A is a complex, len is:', len(curr_partner_A_genes))
        n_subunits_upreg.append(len(curr_partner_A_genes))
        
    if isinstance(curr_partner_B_genes, list): # if partner B is a complex
        print('row', n_row)
        print('curr_partner_B_genes',curr_partner_B_genes)
        print('partner B is a complex, len is:', len(curr_partner_B_genes))
        n_subunits_upreg.append(len(curr_partner_B_genes))

row 35
curr_partner_B_genes ['ITGB3', 'ITGA2B']
partner B is a complex, len is: 2
row 36
curr_partner_B_genes ['ITGB3', 'ITGA2B']
partner B is a complex, len is: 2
row 37
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 38
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 39
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 40
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 41
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 42
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 43
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 44
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 45
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 46
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 47
curr_partner_B_gene

In [84]:
# resulting complexes here have at most two subunits
np.unique(n_subunits_upreg, return_counts = True)

(array([2]), array([669]))

In [85]:
df_output_upreg_complex_member_1 = df_output_upreg_complex.copy()
df_output_upreg_complex_member_2 = df_output_upreg_complex.copy()

# splitting complex interaction entries by subunits / members
# for any values in these tables, if it's a list, save the 0th for df_output_upreg_complex_member_1 and 1st for df_output_upreg_complex_member_2
for n_row in list(df_output_upreg_complex.index):    
    for col in df_output_upreg_complex.columns:
        if isinstance(df_output_upreg_complex.loc[n_row, col], list):
            df_output_upreg_complex_member_1.loc[n_row, col] = df_output_upreg_complex.loc[n_row, col][0]
            df_output_upreg_complex_member_2.loc[n_row, col] = df_output_upreg_complex.loc[n_row, col][1]

In [86]:
df_output_upreg_complex_member_1.index = [idx + '_member_1' for idx in df_output_upreg_complex_member_1.index]
df_output_upreg_complex_member_2.index = [idx + '_member_2' for idx in df_output_upreg_complex_member_2.index]

# getting all indices
idx_concat = list(df_output_upreg_complex_member_1.index) + list(df_output_upreg_complex_member_2.index)

# sorting by original index number, so that the order is member 1 then member 2
idx_concat.sort(key = lambda x: x.split('_')[0])
idx_concat

['1000_member_1',
 '1000_member_2',
 '1001_member_1',
 '1001_member_2',
 '1002_member_1',
 '1002_member_2',
 '1003_member_1',
 '1003_member_2',
 '1004_member_1',
 '1004_member_2',
 '1005_member_1',
 '1005_member_2',
 '1006_member_1',
 '1006_member_2',
 '1007_member_1',
 '1007_member_2',
 '1008_member_1',
 '1008_member_2',
 '1009_member_1',
 '1009_member_2',
 '1010_member_1',
 '1010_member_2',
 '1011_member_1',
 '1011_member_2',
 '1012_member_1',
 '1012_member_2',
 '1013_member_1',
 '1013_member_2',
 '1014_member_1',
 '1014_member_2',
 '1015_member_1',
 '1015_member_2',
 '1016_member_1',
 '1016_member_2',
 '114_member_1',
 '114_member_2',
 '115_member_1',
 '115_member_2',
 '116_member_1',
 '116_member_2',
 '117_member_1',
 '117_member_2',
 '118_member_1',
 '118_member_2',
 '119_member_1',
 '119_member_2',
 '120_member_1',
 '120_member_2',
 '121_member_1',
 '121_member_2',
 '122_member_1',
 '122_member_2',
 '123_member_1',
 '123_member_2',
 '124_member_1',
 '124_member_2',
 '125_member_1

In [87]:
df_output_upreg_complex_deconv = pd.concat([df_output_upreg_complex_member_1, df_output_upreg_complex_member_2])
df_output_upreg_complex_member_1

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
35_member_1,THBS1_integrin_a2Bb3_complex,THBS1,ITGB3,CD14 mono,Platelets,True,1.105608,0.0,0.773078,False,,,0.308172
36_member_1,CD40LG_integrin_a2Bb3_complex,CD40LG,ITGB3,CD4 memory T,Platelets,True,0.110887,0.000002,0.177331,False,,,0.308172
37_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,CD14 mono,CD16 mono,True,0.174463,0.0,0.627486,False,,,0.524181
38_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,CD14 mono,CD4 memory T,True,0.174463,0.0,0.627486,False,,,0.478022
39_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,CD14 mono,CD4 naïve T,True,0.174463,0.0,0.627486,False,,,0.318369
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2502_member_1,CD47_SIRB1_complex,CD47,SIRPB1,CD8 memory T,CD14 mono,True,0.058683,0.022976,0.453462,False,,,0.284506
2503_member_1,CD47_SIRB1_complex,CD47,SIRPB1,CD8 memory T,CD16 mono,True,0.058683,0.022976,0.453462,False,,,0.49142
2504_member_1,CD47_SIRB1_complex,CD47,SIRPB1,CD8 memory T,Neutrophil,True,0.058683,0.022976,0.453462,False,,,0.150502
2505_member_1,CD47_SIRB1_complex,CD47,SIRPB1,CD8 memory T,cDC,True,0.058683,0.022976,0.453462,False,,,0.200737


In [88]:
df_output_upreg_complex_member_2

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
35_member_2,THBS1_integrin_a2Bb3_complex,THBS1,ITGA2B,CD14 mono,Platelets,True,1.105608,0.0,0.773078,False,,,0.588643
36_member_2,CD40LG_integrin_a2Bb3_complex,CD40LG,ITGA2B,CD4 memory T,Platelets,True,0.110887,0.000002,0.177331,False,,,0.588643
37_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,CD14 mono,CD16 mono,True,0.174463,0.0,0.627486,False,,,0.662507
38_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,CD14 mono,CD4 memory T,True,0.174463,0.0,0.627486,False,,,0.364469
39_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,CD14 mono,CD4 naïve T,True,0.174463,0.0,0.627486,False,,,0.363038
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2502_member_2,CD47_SIRB1_complex,CD47,TYROBP,CD8 memory T,CD14 mono,True,0.058683,0.022976,0.453462,False,,,0.925829
2503_member_2,CD47_SIRB1_complex,CD47,TYROBP,CD8 memory T,CD16 mono,True,0.058683,0.022976,0.453462,False,,,0.99948
2504_member_2,CD47_SIRB1_complex,CD47,TYROBP,CD8 memory T,Neutrophil,True,0.058683,0.022976,0.453462,False,,,0.862876
2505_member_2,CD47_SIRB1_complex,CD47,TYROBP,CD8 memory T,cDC,True,0.058683,0.022976,0.453462,False,,,0.970534


In [89]:
df_output_upreg_complex_deconv = df_output_upreg_complex_deconv.loc[idx_concat,:]
df_output_upreg_complex_deconv

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
1000_member_1,F11R_integrin_aLb2_complex,F11R,ITGB2,Platelets,CD8 memory T,False,,,0.218837,True,-0.189308,0.0,0.599404
1000_member_2,F11R_integrin_aLb2_complex,F11R,ITGAL,Platelets,CD8 memory T,False,,,0.218837,True,-0.153741,0.0,0.2621
1001_member_1,F11R_integrin_aLb2_complex,F11R,ITGB2,Platelets,NK CD56(dim),False,,,0.218837,True,-0.08111,0.022176,0.834109
1001_member_2,F11R_integrin_aLb2_complex,F11R,ITGAL,Platelets,NK CD56(dim),False,,,0.218837,True,-0.201631,0.0,0.394574
1002_member_1,F11R_integrin_aLb2_complex,F11R,ITGB2,Prolif NK,CD14 mono,False,,,0.115044,True,-0.283149,0.0,0.755714
...,...,...,...,...,...,...,...,...,...,...,...,...,...
997_member_2,F11R_integrin_aLb2_complex,F11R,ITGAL,Platelets,CD14 mono,False,,,0.218837,True,-0.111758,0.0,0.124963
998_member_1,F11R_integrin_aLb2_complex,F11R,ITGB2,Platelets,CD4 memory T,False,,,0.218837,True,-0.276287,0.0,0.38574
998_member_2,F11R_integrin_aLb2_complex,F11R,ITGAL,Platelets,CD4 memory T,False,,,0.218837,True,-0.11141,0.002273,0.206581
999_member_1,F11R_integrin_aLb2_complex,F11R,ITGB2,Platelets,CD4 naïve T,False,,,0.218837,True,-0.060877,0.0,0.43565


In [90]:
# save the final tables
df_output_upreg_complex_deconv.to_csv('/lustre/scratch117/cellgen/team292/ab55/20210429_readable_cellphone_interactions_DE_PS_Complex.csv')
df_output_upreg_simple.to_csv('/lustre/scratch117/cellgen/team292/ab55/20210429_readable_cellphone_interactions_DE_PS_Simple.csv')

MS

In [91]:
faulty_index_count = 0

vec2_append_upreg = {}

# row count
curr_count = 0

for interaction in df_Exrp_LR_in_celltype_pairs_DE_MS.index:
    
    print(interaction, list(df_Exrp_LR_in_celltype_pairs_DE_MS.index).index(interaction) + 1, 
          'out of', len(df_Exrp_LR_in_celltype_pairs_DE_MS.index))
    # current row
    curr_table = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_DE_MS.loc[interaction])
    curr_table = curr_table[curr_table[interaction] > 0]
    
    for celltype_pair in list(curr_table.index):
        
        # row by row
        vec2_append_upreg[str(curr_count)] = {}
        
        vec2_append_upreg[str(curr_count)]['interaction'] = interaction
                
        # getting genes, these are lists of length 1 for simple interactions and > 1 for complexes
        curr_partner_A_genes = Int2Gene[interaction]['partner_a']
        curr_partner_B_genes = Int2Gene[interaction]['partner_b']
                
        vec2_append_upreg[str(curr_count)]['partner_A_genes'] = curr_partner_A_genes
        vec2_append_upreg[str(curr_count)]['partner_B_genes'] = curr_partner_B_genes
           
        curr_celltype_A = celltype_pair.split('---')[0]
        curr_celltype_B = celltype_pair.split('---')[1]
        
        vec2_append_upreg[str(curr_count)]['celltype_A'] = curr_celltype_A
        vec2_append_upreg[str(curr_count)]['celltype_B'] = curr_celltype_B
        
        # are all partner_A genes DE in celltype_A and are all partner_B genes DE in celltype_B?
        # these DE table subsets have been filtered already according to cutoffs declared in the beginning of the notebook
        curr_celltype_A_DE_table_subset = DE_MS[DE_MS['cluster'] == curr_celltype_A]
        curr_celltype_A_DE_table_subset.set_index('Gene', inplace = True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_A_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_A])
        
        curr_celltype_B_DE_table_subset = DE_MS[DE_MS['cluster'] == curr_celltype_B]
        curr_celltype_B_DE_table_subset.set_index('Gene', inplace = True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_B_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_B])
        
        # if partner A is DE, add stats
        if all(elem in list(curr_celltype_A_DE_table_subset.index) for elem in curr_partner_A_genes):
            vec2_append_upreg[str(curr_count)]['is_partner_A_DE'] = True
            vec2_append_upreg[str(curr_count)]['logFC_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'logFC'])
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'adj.P.Val'])
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'percentExpr_case'])
            
        else: # if not DE, add 'NA'
            vec2_append_upreg[str(curr_count)]['is_partner_A_DE'] = False
            vec2_append_upreg[str(curr_count)]['logFC_gene_A'] = 'NA'
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_A'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_per_df_full.loc[curr_partner_A_genes,curr_celltype_A])
            
        # if partner B is DE, add stats
        if all(elem in list(curr_celltype_B_DE_table_subset.index) for elem in curr_partner_B_genes):
            vec2_append_upreg[str(curr_count)]['is_partner_B_DE'] = True
            vec2_append_upreg[str(curr_count)]['logFC_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'logFC'])
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'adj.P.Val'])
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'percentExpr_case'])
        else: # if not DE, add 'NA'
            vec2_append_upreg[str(curr_count)]['is_partner_B_DE'] = False
            vec2_append_upreg[str(curr_count)]['logFC_gene_B'] = 'NA'
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_B'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_per_df_full.loc[curr_partner_B_genes,curr_celltype_B])
            
        curr_count += 1

SIRPA_CD47 1 out of 98
LGALS9_HAVCR2 2 out of 98
TIGIT_NECTIN2 3 out of 98
CD226_NECTIN2 4 out of 98
PLAUR_integrin_a4b1_complex 5 out of 98
TGFB1_TGFBR3 6 out of 98
FCER2_integrin_aMb2_complex 7 out of 98
ICAM1_integrin_aMb2_complex 8 out of 98
GP1BA_integrin_aMb2_complex 9 out of 98
JAM3_integrin_aMb2_complex 10 out of 98
FCER2_integrin_aXb2_complex 11 out of 98
FCER2_CR2 12 out of 98
HLA-C_KIR2DL3 13 out of 98
HLA-C_KIR2DL1 14 out of 98
CD8_receptor_LCK 15 out of 98
CD94:NKG2A_HLA-E 16 out of 98
CD94:NKG2C_HLA-E 17 out of 98
CD94:NKG2E_HLA-E 18 out of 98
TNFRSF13B_TNFSF13B 19 out of 98
TNFRSF17_TNFSF13B 20 out of 98
TNFRSF13C_TNFSF13B 21 out of 98
CD74_APP 22 out of 98
ICAM1_SPN 23 out of 98
ICAM1_ITGAL 24 out of 98
ICAM1_integrin_aLb2_complex 25 out of 98
ICAM2_integrin_aLb2_complex 26 out of 98
ICAM3_integrin_aLb2_complex 27 out of 98
ICAM4_integrin_aLb2_complex 28 out of 98
F11R_integrin_aLb2_complex 29 out of 98
FAS_FASLG 30 out of 98
HLA-A_KIR3DL1 31 out of 98
HLA-F_KIR3DL1 32 

In [92]:
# outlining the final table format
df_output_upreg = pd.DataFrame(columns = ['interaction',
                                     'partner_A_genes',
                                     'partner_B_genes',
                                     'celltype_A',
                                     'celltype_B',
                                     'is_partner_A_DE',
                                     'logFC_gene_A',
                                     'adj_pval_gene_A',
                                     'percent_expr_gene_A',
                                     'is_partner_B_DE',
                                     'logFC_gene_B',
                                     'adj_pval_gene_B',
                                     'percent_expr_gene_B'],
                            index = list(vec2_append_upreg.keys()))

for i in list(vec2_append_upreg.keys()):
    curr_keys = list(vec2_append_upreg[i].keys())
    for col in curr_keys:
        df_output_upreg.loc[i,col] = vec2_append_upreg[i][col]
        
# getting rid of the square parentheses [] in all the values
cols2correct = ['partner_A_genes', 'partner_B_genes', 'logFC_gene_A', 'adj_pval_gene_A',
       'percent_expr_gene_A', 'logFC_gene_B',
       'adj_pval_gene_B', 'percent_expr_gene_B']

for row in list(df_output_upreg.index):
    for col in cols2correct:
        curr_value = df_output_upreg.loc[row, col] # with []
        if (curr_value != 'NA') & (len(curr_value) == 1): # ignoring NAs and lists of length > 1 (i.e., complexes)
            df_output_upreg.loc[row, col] = curr_value[0] # this is just to get the element - string if a gene, numerical value if it's a stat
        
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,SIRPA_CD47,SIRPA,CD47,CD14 mono,CD4 naïve T,False,,,0.326286,True,0.061446,0.0,0.505482
1,SIRPA_CD47,SIRPA,CD47,CD14 mono,CD8 memory T,False,,,0.326286,True,0.083813,0.009621,0.4883
2,SIRPA_CD47,SIRPA,CD47,CD16 mono,CD4 naïve T,False,,,0.269891,True,0.061446,0.0,0.505482
3,SIRPA_CD47,SIRPA,CD47,CD16 mono,CD8 memory T,False,,,0.269891,True,0.083813,0.009621,0.4883
4,SIRPA_CD47,SIRPA,CD47,cDC,CD4 naïve T,False,,,0.366483,True,0.061446,0.0,0.505482
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2980,CLEC2B_KLRF1,CLEC2B,KLRF1,γδT,NK CD56(dim),False,,,0.583333,True,0.321457,0.0,0.704261
2981,CLEC2B_KLRF1,CLEC2B,KLRF1,γδT,NKT,False,,,0.583333,True,-0.342709,0.0,0.369072
2982,CLEC2B_KLRF1,CLEC2B,KLRF1,CD8 memory T,CD8 memory T,False,,,0.514621,True,0.078934,0.000011,0.149766
2983,CLEC2B_KLRF1,CLEC2B,KLRF1,NK CD56(dim),NK CD56(dim),False,,,0.563184,True,0.321457,0.0,0.704261


In [93]:
list(df_output_upreg.columns) == list(vec2_append_upreg['0'].keys())

True

In [94]:
# split tables into 2 tables, one for simple interactions and one for complex interactions (contains pseudo-interactions for each subunit)
#getting indices of complex interactions
complex_interaction_rows_upreg = []

for n_row in list(df_output_upreg.index):
    curr_partner_A_genes = df_output_upreg.loc[n_row, 'partner_A_genes']
    curr_partner_B_genes = df_output_upreg.loc[n_row, 'partner_B_genes']
    
    if isinstance(curr_partner_A_genes, list) or isinstance(curr_partner_B_genes, list): # if partner A or B is a complex
        complex_interaction_rows_upreg.append(n_row)
len(complex_interaction_rows_upreg)

642

In [95]:
df_output_upreg_complex = df_output_upreg.loc[complex_interaction_rows_upreg,:]
df_output_upreg_simple = df_output_upreg.drop(complex_interaction_rows_upreg, axis=0)
df_output_upreg_simple

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,SIRPA_CD47,SIRPA,CD47,CD14 mono,CD4 naïve T,False,,,0.326286,True,0.061446,0.0,0.505482
1,SIRPA_CD47,SIRPA,CD47,CD14 mono,CD8 memory T,False,,,0.326286,True,0.083813,0.009621,0.4883
2,SIRPA_CD47,SIRPA,CD47,CD16 mono,CD4 naïve T,False,,,0.269891,True,0.061446,0.0,0.505482
3,SIRPA_CD47,SIRPA,CD47,CD16 mono,CD8 memory T,False,,,0.269891,True,0.083813,0.009621,0.4883
4,SIRPA_CD47,SIRPA,CD47,cDC,CD4 naïve T,False,,,0.366483,True,0.061446,0.0,0.505482
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2980,CLEC2B_KLRF1,CLEC2B,KLRF1,γδT,NK CD56(dim),False,,,0.583333,True,0.321457,0.0,0.704261
2981,CLEC2B_KLRF1,CLEC2B,KLRF1,γδT,NKT,False,,,0.583333,True,-0.342709,0.0,0.369072
2982,CLEC2B_KLRF1,CLEC2B,KLRF1,CD8 memory T,CD8 memory T,False,,,0.514621,True,0.078934,0.000011,0.149766
2983,CLEC2B_KLRF1,CLEC2B,KLRF1,NK CD56(dim),NK CD56(dim),False,,,0.563184,True,0.321457,0.0,0.704261


In [96]:
df_output_upreg_complex

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
48,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",CD14 mono,CD16 mono,True,-0.139836,0.0,0.592151,False,,,"[0.5241809672386896, 0.6625065002600105]"
49,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",CD14 mono,CD4 memory T,True,-0.139836,0.0,0.592151,False,,,"[0.478021978021978, 0.3644688644688644]"
50,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",CD14 mono,CD4 naïve T,True,-0.139836,0.0,0.592151,False,,,"[0.3183694722356529, 0.3630378993565664]"
51,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",CD14 mono,CD8 memory T,True,-0.139836,0.0,0.592151,False,,,"[0.4142030848329049, 0.5176735218508998]"
52,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",CD14 mono,CD8 naïve T,True,-0.139836,0.0,0.592151,False,,,"[0.145370758273984, 0.4537075827398408]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2848,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",CD8 memory T,CD14 mono,True,0.083813,0.009621,0.4883,False,,,"[0.2845060893098782, 0.9258288227334236]"
2849,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",CD8 memory T,CD16 mono,True,0.083813,0.009621,0.4883,False,,,"[0.4914196567862715, 0.999479979199168]"
2850,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",CD8 memory T,Neutrophil,True,0.083813,0.009621,0.4883,False,,,"[0.1505016722408027, 0.862876254180602]"
2851,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",CD8 memory T,cDC,True,0.083813,0.009621,0.4883,False,,,"[0.2007366482504604, 0.9705340699815838]"


In [97]:
df_output_upreg_simple.shape

(2343, 13)

In [98]:
df_output_upreg_complex.shape

(642, 13)

In [99]:
df_output_upreg.shape

(2985, 13)

In [100]:
# checking whether any complexes interact with other complexes and how many subunits each complex contains here
n_subunits_upreg = []

for n_row in list(df_output_upreg.index):    
    curr_partner_A_genes = df_output_upreg.loc[n_row, 'partner_A_genes']
    curr_partner_B_genes = df_output_upreg.loc[n_row, 'partner_B_genes']
    
    if isinstance(curr_partner_A_genes, list) and isinstance(curr_partner_B_genes, list): # if partner A AND B is a complex
        print('row', n_row)
        print('both are complexes')
        
    if isinstance(curr_partner_A_genes, list): # if partner A is a complex
        print('row', n_row)
        print('curr_partner_A_genes',curr_partner_A_genes)
        print('partner A is a complex, len is:', len(curr_partner_A_genes))
        n_subunits_upreg.append(len(curr_partner_A_genes))
        
    if isinstance(curr_partner_B_genes, list): # if partner B is a complex
        print('row', n_row)
        print('curr_partner_B_genes',curr_partner_B_genes)
        print('partner B is a complex, len is:', len(curr_partner_B_genes))
        n_subunits_upreg.append(len(curr_partner_B_genes))

row 48
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 49
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 50
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 51
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 52
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 53
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 54
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 55
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 56
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 57
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 58
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 59
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 60
curr_partner_B_genes 

In [101]:
# resulting complexes here have at most two subunits
np.unique(n_subunits_upreg, return_counts = True)

(array([2]), array([642]))

In [102]:
df_output_upreg_complex_member_1 = df_output_upreg_complex.copy()
df_output_upreg_complex_member_2 = df_output_upreg_complex.copy()

# splitting complex interaction entries by subunits / members
# for any values in these tables, if it's a list, save the 0th for df_output_upreg_complex_member_1 and 1st for df_output_upreg_complex_member_2
for n_row in list(df_output_upreg_complex.index):
    
    for col in df_output_upreg_complex.columns:
        if isinstance(df_output_upreg_complex.loc[n_row, col], list):
            df_output_upreg_complex_member_1.loc[n_row, col] = df_output_upreg_complex.loc[n_row, col][0]
            df_output_upreg_complex_member_2.loc[n_row, col] = df_output_upreg_complex.loc[n_row, col][1]

In [103]:
df_output_upreg_complex_member_1.index = [idx + '_member_1' for idx in df_output_upreg_complex_member_1.index]
df_output_upreg_complex_member_2.index = [idx + '_member_2' for idx in df_output_upreg_complex_member_2.index]

# getting all indices
idx_concat = list(df_output_upreg_complex_member_1.index) + list(df_output_upreg_complex_member_2.index)

# sorting by original index number, so that the order is member 1 then member 2
idx_concat.sort(key = lambda x: x.split('_')[0])
idx_concat

['144_member_1',
 '144_member_2',
 '145_member_1',
 '145_member_2',
 '146_member_1',
 '146_member_2',
 '147_member_1',
 '147_member_2',
 '148_member_1',
 '148_member_2',
 '149_member_1',
 '149_member_2',
 '150_member_1',
 '150_member_2',
 '151_member_1',
 '151_member_2',
 '152_member_1',
 '152_member_2',
 '153_member_1',
 '153_member_2',
 '154_member_1',
 '154_member_2',
 '155_member_1',
 '155_member_2',
 '1557_member_1',
 '1557_member_2',
 '1558_member_1',
 '1558_member_2',
 '1559_member_1',
 '1559_member_2',
 '156_member_1',
 '156_member_2',
 '1560_member_1',
 '1560_member_2',
 '1561_member_1',
 '1561_member_2',
 '1562_member_1',
 '1562_member_2',
 '1563_member_1',
 '1563_member_2',
 '1564_member_1',
 '1564_member_2',
 '1565_member_1',
 '1565_member_2',
 '1566_member_1',
 '1566_member_2',
 '1567_member_1',
 '1567_member_2',
 '1568_member_1',
 '1568_member_2',
 '1569_member_1',
 '1569_member_2',
 '157_member_1',
 '157_member_2',
 '1570_member_1',
 '1570_member_2',
 '1571_member_1',
 '

In [104]:
df_output_upreg_complex_deconv = pd.concat([df_output_upreg_complex_member_1, df_output_upreg_complex_member_2])
df_output_upreg_complex_member_1

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
48_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,CD14 mono,CD16 mono,True,-0.139836,0.0,0.592151,False,,,0.524181
49_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,CD14 mono,CD4 memory T,True,-0.139836,0.0,0.592151,False,,,0.478022
50_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,CD14 mono,CD4 naïve T,True,-0.139836,0.0,0.592151,False,,,0.318369
51_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,CD14 mono,CD8 memory T,True,-0.139836,0.0,0.592151,False,,,0.414203
52_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,CD14 mono,CD8 naïve T,True,-0.139836,0.0,0.592151,False,,,0.145371
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2848_member_1,CD47_SIRB1_complex,CD47,SIRPB1,CD8 memory T,CD14 mono,True,0.083813,0.009621,0.4883,False,,,0.284506
2849_member_1,CD47_SIRB1_complex,CD47,SIRPB1,CD8 memory T,CD16 mono,True,0.083813,0.009621,0.4883,False,,,0.49142
2850_member_1,CD47_SIRB1_complex,CD47,SIRPB1,CD8 memory T,Neutrophil,True,0.083813,0.009621,0.4883,False,,,0.150502
2851_member_1,CD47_SIRB1_complex,CD47,SIRPB1,CD8 memory T,cDC,True,0.083813,0.009621,0.4883,False,,,0.200737


In [106]:
df_output_upreg_complex_member_2

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
48_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,CD14 mono,CD16 mono,True,-0.139836,0.0,0.592151,False,,,0.662507
49_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,CD14 mono,CD4 memory T,True,-0.139836,0.0,0.592151,False,,,0.364469
50_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,CD14 mono,CD4 naïve T,True,-0.139836,0.0,0.592151,False,,,0.363038
51_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,CD14 mono,CD8 memory T,True,-0.139836,0.0,0.592151,False,,,0.517674
52_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,CD14 mono,CD8 naïve T,True,-0.139836,0.0,0.592151,False,,,0.453708
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2848_member_2,CD47_SIRB1_complex,CD47,TYROBP,CD8 memory T,CD14 mono,True,0.083813,0.009621,0.4883,False,,,0.925829
2849_member_2,CD47_SIRB1_complex,CD47,TYROBP,CD8 memory T,CD16 mono,True,0.083813,0.009621,0.4883,False,,,0.99948
2850_member_2,CD47_SIRB1_complex,CD47,TYROBP,CD8 memory T,Neutrophil,True,0.083813,0.009621,0.4883,False,,,0.862876
2851_member_2,CD47_SIRB1_complex,CD47,TYROBP,CD8 memory T,cDC,True,0.083813,0.009621,0.4883,False,,,0.970534


In [107]:
df_output_upreg_complex_deconv = df_output_upreg_complex_deconv.loc[idx_concat,:]
df_output_upreg_complex_deconv

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
144_member_1,FCER2_integrin_aMb2_complex,FCER2,ITGB2,Immature B,CD14 mono,False,,,0.694118,True,0.37791,0.0,0.903906
144_member_2,FCER2_integrin_aMb2_complex,FCER2,ITGAM,Immature B,CD14 mono,False,,,0.694118,True,0.128188,0.0,0.496974
145_member_1,FCER2_integrin_aMb2_complex,FCER2,ITGB2,Immature B,CD8 memory T,False,,,0.694118,True,0.442694,0.0,0.834633
145_member_2,FCER2_integrin_aMb2_complex,FCER2,ITGAM,Immature B,CD8 memory T,False,,,0.694118,True,0.045704,0.017139,0.160686
146_member_1,FCER2_integrin_aMb2_complex,FCER2,ITGB2,Memory B,CD14 mono,False,,,0.350109,True,0.37791,0.0,0.903906
...,...,...,...,...,...,...,...,...,...,...,...,...,...
985_member_2,F11R_integrin_aLb2_complex,F11R,ITGAL,pDC,CD4 naïve T,False,,,0.173333,True,0.10831,0.0,0.260307
986_member_1,F11R_integrin_aLb2_complex,F11R,ITGB2,pDC,CD8 memory T,False,,,0.173333,True,0.442694,0.0,0.834633
986_member_2,F11R_integrin_aLb2_complex,F11R,ITGAL,pDC,CD8 memory T,False,,,0.173333,True,0.155766,0.0,0.477379
987_member_1,F11R_integrin_aLb2_complex,F11R,ITGB2,pDC,CD8 naïve T,False,,,0.173333,True,0.377221,0.0,0.709779


In [108]:
# save the final tables
df_output_upreg_complex_deconv.to_csv('/lustre/scratch117/cellgen/team292/ab55/20210429_readable_cellphone_interactions_DE_MS_Complex.csv')
df_output_upreg_simple.to_csv('/lustre/scratch117/cellgen/team292/ab55/20210429_readable_cellphone_interactions_DE_MS_Simple.csv')

Control

In [109]:
faulty_index_count = 0

vec2_append_upreg = {}

# row count
curr_count = 0

for interaction in df_Exrp_LR_in_celltype_pairs_DE_Control.index:
    
    print(interaction, list(df_Exrp_LR_in_celltype_pairs_DE_Control.index).index(interaction) + 1, 
          'out of', len(df_Exrp_LR_in_celltype_pairs_DE_Control.index))
    # current row
    curr_table = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_DE_Control.loc[interaction])
    curr_table = curr_table[curr_table[interaction] > 0]
    
    for celltype_pair in list(curr_table.index):
        
        # row by row
        vec2_append_upreg[str(curr_count)] = {}
        
        vec2_append_upreg[str(curr_count)]['interaction'] = interaction
        
        # getting genes, these are lists of length 1 for simple interactions and > 1 for complexes
        curr_partner_A_genes = Int2Gene[interaction]['partner_a']
        curr_partner_B_genes = Int2Gene[interaction]['partner_b']
                
        vec2_append_upreg[str(curr_count)]['partner_A_genes'] = curr_partner_A_genes
        vec2_append_upreg[str(curr_count)]['partner_B_genes'] = curr_partner_B_genes
           
        curr_celltype_A = celltype_pair.split('---')[0]
        curr_celltype_B = celltype_pair.split('---')[1]
        
        vec2_append_upreg[str(curr_count)]['celltype_A'] = curr_celltype_A
        vec2_append_upreg[str(curr_count)]['celltype_B'] = curr_celltype_B
        
        # are all partner_A genes DE in celltype_A and are all partner_B genes DE in celltype_B?
        # these DE table subsets have been filtered already according to cutoffs declared in the beginning of the notebook
        curr_celltype_A_DE_table_subset = DE_Control[DE_Control['cluster'] == curr_celltype_A]
        curr_celltype_A_DE_table_subset.set_index('Gene', inplace = True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_A_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_A])
        
        curr_celltype_B_DE_table_subset = DE_Control[DE_Control['cluster'] == curr_celltype_B]
        curr_celltype_B_DE_table_subset.set_index('Gene', inplace = True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_B_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_B])
        
        # if partner A is DE, add stats
        if all(elem in list(curr_celltype_A_DE_table_subset.index) for elem in curr_partner_A_genes):
            vec2_append_upreg[str(curr_count)]['is_partner_A_DE'] = True
            vec2_append_upreg[str(curr_count)]['logFC_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'logFC'])
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'adj.P.Val'])
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'percentExpr_ctrl'])
            
        else: # if not DE, add 'NA'
            vec2_append_upreg[str(curr_count)]['is_partner_A_DE'] = False
            vec2_append_upreg[str(curr_count)]['logFC_gene_A'] = 'NA'
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_A'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_per_df_full.loc[curr_partner_A_genes,curr_celltype_A])
            
        # if partner B is DE, add stats
        if all(elem in list(curr_celltype_B_DE_table_subset.index) for elem in curr_partner_B_genes):
            vec2_append_upreg[str(curr_count)]['is_partner_B_DE'] = True
            vec2_append_upreg[str(curr_count)]['logFC_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'logFC'])
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'adj.P.Val'])
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'percentExpr_ctrl'])
        else: # if not DE, add 'NA'
            vec2_append_upreg[str(curr_count)]['is_partner_B_DE'] = False
            vec2_append_upreg[str(curr_count)]['logFC_gene_B'] = 'NA'
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_B'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_per_df_full.loc[curr_partner_B_genes,curr_celltype_B])
            
        curr_count += 1

SIRPA_CD47 1 out of 94
LGALS9_HAVCR2 2 out of 94
CD226_NECTIN2 3 out of 94
PLAUR_integrin_a4b1_complex 4 out of 94
TGFB1_TGFBR3 5 out of 94
FCER2_integrin_aMb2_complex 6 out of 94
ICAM1_integrin_aMb2_complex 7 out of 94
GP1BA_integrin_aMb2_complex 8 out of 94
JAM3_integrin_aMb2_complex 9 out of 94
ICAM1_integrin_aXb2_complex 10 out of 94
HLA-C_KIR2DL3 11 out of 94
HLA-C_KIR2DL1 12 out of 94
CD8_receptor_LCK 13 out of 94
CD94:NKG2A_HLA-E 14 out of 94
CD94:NKG2C_HLA-E 15 out of 94
CD94:NKG2E_HLA-E 16 out of 94
TNFRSF13B_TNFSF13B 17 out of 94
TNFRSF17_TNFSF13B 18 out of 94
TNFRSF13C_TNFSF13B 19 out of 94
CD74_APP 20 out of 94
ICAM1_SPN 21 out of 94
ICAM1_ITGAL 22 out of 94
ICAM1_integrin_aLb2_complex 23 out of 94
ICAM2_integrin_aLb2_complex 24 out of 94
ICAM3_integrin_aLb2_complex 25 out of 94
ICAM4_integrin_aLb2_complex 26 out of 94
F11R_integrin_aLb2_complex 27 out of 94
FAS_FASLG 28 out of 94
HLA-A_KIR3DL1 29 out of 94
HLA-F_KIR3DL1 30 out of 94
HLA-F_KIR3DL2 31 out of 94
HLA-B_KIR3DL2

In [110]:
# outlining the final table format
df_output_upreg = pd.DataFrame(columns = ['interaction',
                                     'partner_A_genes',
                                     'partner_B_genes',
                                     'celltype_A',
                                     'celltype_B',
                                     'is_partner_A_DE',
                                     'logFC_gene_A',
                                     'adj_pval_gene_A',
                                     'percent_expr_gene_A',
                                     'is_partner_B_DE',
                                     'logFC_gene_B',
                                     'adj_pval_gene_B',
                                     'percent_expr_gene_B'],
                            index = list(vec2_append_upreg.keys()))

for i in list(vec2_append_upreg.keys()):
    curr_keys = list(vec2_append_upreg[i].keys())
    for col in curr_keys:
        df_output_upreg.loc[i,col] = vec2_append_upreg[i][col]
        
# getting rid of the square parentheses [] in all the values
cols2correct = ['partner_A_genes', 'partner_B_genes', 'logFC_gene_A', 'adj_pval_gene_A',
       'percent_expr_gene_A', 'logFC_gene_B',
       'adj_pval_gene_B', 'percent_expr_gene_B']

for row in list(df_output_upreg.index):
    for col in cols2correct:
        curr_value = df_output_upreg.loc[row, col] # with []
        if (curr_value != 'NA') & (len(curr_value) == 1): # ignoring NAs and lists of length > 1 (i.e., complexes)
            df_output_upreg.loc[row, col] = curr_value[0] # this is just to get the element - string if a gene, numerical value if it's a stat
        
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,SIRPA_CD47,SIRPA,CD47,CD14 mono,CD4 naïve T,False,,,0.326286,True,0.049988,0.0,0.431541
1,SIRPA_CD47,SIRPA,CD47,CD14 mono,CD8 memory T,False,,,0.326286,True,0.065938,0.001584,0.41104
2,SIRPA_CD47,SIRPA,CD47,CD16 mono,CD4 naïve T,False,,,0.269891,True,0.049988,0.0,0.431541
3,SIRPA_CD47,SIRPA,CD47,CD16 mono,CD8 memory T,False,,,0.269891,True,0.065938,0.001584,0.41104
4,SIRPA_CD47,SIRPA,CD47,cDC,CD4 naïve T,False,,,0.366483,True,0.049988,0.0,0.431541
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2443,CLEC2B_KLRF1,CLEC2B,KLRF1,cDC,NKT,False,,,0.714549,True,-0.182195,0.000081,0.627087
2444,CLEC2B_KLRF1,CLEC2B,KLRF1,pDC,NKT,False,,,0.493333,True,-0.182195,0.000081,0.627087
2445,CLEC2B_KLRF1,CLEC2B,KLRF1,γδT,NKT,False,,,0.583333,True,-0.182195,0.000081,0.627087
2446,CLEC2B_KLRF1,CLEC2B,KLRF1,CD8 memory T,CD8 memory T,True,0.266324,0.0,0.396442,False,,,0.106844


In [111]:
list(df_output_upreg.columns) == list(vec2_append_upreg['0'].keys())

True

In [112]:
# split tables into 2 tables, one for simple interactions and one for complex interactions (contains pseudo-interactions for each subunit)
#getting indices of complex interactions
complex_interaction_rows_upreg = []

for n_row in list(df_output_upreg.index):
    curr_partner_A_genes = df_output_upreg.loc[n_row, 'partner_A_genes']
    curr_partner_B_genes = df_output_upreg.loc[n_row, 'partner_B_genes']
    
    if isinstance(curr_partner_A_genes, list) or isinstance(curr_partner_B_genes, list): # if partner A or B is a complex
        complex_interaction_rows_upreg.append(n_row)
len(complex_interaction_rows_upreg)

475

In [113]:
df_output_upreg_complex = df_output_upreg.loc[complex_interaction_rows_upreg,:]
df_output_upreg_simple = df_output_upreg.drop(complex_interaction_rows_upreg, axis=0)
df_output_upreg_simple

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,SIRPA_CD47,SIRPA,CD47,CD14 mono,CD4 naïve T,False,,,0.326286,True,0.049988,0.0,0.431541
1,SIRPA_CD47,SIRPA,CD47,CD14 mono,CD8 memory T,False,,,0.326286,True,0.065938,0.001584,0.41104
2,SIRPA_CD47,SIRPA,CD47,CD16 mono,CD4 naïve T,False,,,0.269891,True,0.049988,0.0,0.431541
3,SIRPA_CD47,SIRPA,CD47,CD16 mono,CD8 memory T,False,,,0.269891,True,0.065938,0.001584,0.41104
4,SIRPA_CD47,SIRPA,CD47,cDC,CD4 naïve T,False,,,0.366483,True,0.049988,0.0,0.431541
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2443,CLEC2B_KLRF1,CLEC2B,KLRF1,cDC,NKT,False,,,0.714549,True,-0.182195,0.000081,0.627087
2444,CLEC2B_KLRF1,CLEC2B,KLRF1,pDC,NKT,False,,,0.493333,True,-0.182195,0.000081,0.627087
2445,CLEC2B_KLRF1,CLEC2B,KLRF1,γδT,NKT,False,,,0.583333,True,-0.182195,0.000081,0.627087
2446,CLEC2B_KLRF1,CLEC2B,KLRF1,CD8 memory T,CD8 memory T,True,0.266324,0.0,0.396442,False,,,0.106844


In [114]:
df_output_upreg_complex

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
20,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",CD14 mono,CD16 mono,True,0.201638,0.0,0.595376,False,,,"[0.5241809672386896, 0.6625065002600105]"
21,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",CD14 mono,CD4 memory T,True,0.201638,0.0,0.595376,False,,,"[0.478021978021978, 0.3644688644688644]"
22,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",CD14 mono,CD4 naïve T,True,0.201638,0.0,0.595376,False,,,"[0.3183694722356529, 0.3630378993565664]"
23,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",CD14 mono,CD8 memory T,True,0.201638,0.0,0.595376,False,,,"[0.4142030848329049, 0.5176735218508998]"
24,PLAUR_integrin_a4b1_complex,PLAUR,"[ITGB1, ITGA4]",CD14 mono,CD8 naïve T,True,0.201638,0.0,0.595376,False,,,"[0.145370758273984, 0.4537075827398408]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2351,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",CD8 memory T,CD14 mono,True,0.065938,0.001584,0.41104,False,,,"[0.2845060893098782, 0.9258288227334236]"
2352,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",CD8 memory T,CD16 mono,True,0.065938,0.001584,0.41104,False,,,"[0.4914196567862715, 0.999479979199168]"
2353,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",CD8 memory T,Neutrophil,True,0.065938,0.001584,0.41104,False,,,"[0.1505016722408027, 0.862876254180602]"
2354,CD47_SIRB1_complex,CD47,"[SIRPB1, TYROBP]",CD8 memory T,cDC,True,0.065938,0.001584,0.41104,False,,,"[0.2007366482504604, 0.9705340699815838]"


In [115]:
df_output_upreg_simple.shape

(1973, 13)

In [116]:
df_output_upreg_complex.shape

(475, 13)

In [117]:
df_output_upreg.shape

(2448, 13)

In [118]:
# checking whether any complexes interact with other complexes and how many subunits each complex contains here
n_subunits_upreg = []

for n_row in list(df_output_upreg.index):    
    curr_partner_A_genes = df_output_upreg.loc[n_row, 'partner_A_genes']
    curr_partner_B_genes = df_output_upreg.loc[n_row, 'partner_B_genes']
    
    if isinstance(curr_partner_A_genes, list) and isinstance(curr_partner_B_genes, list): # if partner A AND B is a complex
        print('row', n_row)
        print('both are complexes')
        
    if isinstance(curr_partner_A_genes, list): # if partner A is a complex
        print('row', n_row)
        print('curr_partner_A_genes',curr_partner_A_genes)
        print('partner A is a complex, len is:', len(curr_partner_A_genes))
        n_subunits_upreg.append(len(curr_partner_A_genes))
        
    if isinstance(curr_partner_B_genes, list): # if partner B is a complex
        print('row', n_row)
        print('curr_partner_B_genes',curr_partner_B_genes)
        print('partner B is a complex, len is:', len(curr_partner_B_genes))
        n_subunits_upreg.append(len(curr_partner_B_genes))

row 20
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 21
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 22
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 23
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 24
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 25
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 26
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 27
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 28
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 29
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 30
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 31
curr_partner_B_genes ['ITGB1', 'ITGA4']
partner B is a complex, len is: 2
row 32
curr_partner_B_genes 

In [119]:
# resulting complexes here have at most two subunits
np.unique(n_subunits_upreg, return_counts = True)

(array([2]), array([475]))

In [120]:
df_output_upreg_complex_member_1 = df_output_upreg_complex.copy()
df_output_upreg_complex_member_2 = df_output_upreg_complex.copy()

# splitting complex interaction entries by subunits / members
# for any values in these tables, if it's a list, save the 0th for df_output_upreg_complex_member_1 and 1st for df_output_upreg_complex_member_2
for n_row in list(df_output_upreg_complex.index):
    
    for col in df_output_upreg_complex.columns:
        if isinstance(df_output_upreg_complex.loc[n_row, col], list):
            df_output_upreg_complex_member_1.loc[n_row, col] = df_output_upreg_complex.loc[n_row, col][0]
            df_output_upreg_complex_member_2.loc[n_row, col] = df_output_upreg_complex.loc[n_row, col][1]

In [121]:
df_output_upreg_complex_member_1.index = [idx + '_member_1' for idx in df_output_upreg_complex_member_1.index]
df_output_upreg_complex_member_2.index = [idx + '_member_2' for idx in df_output_upreg_complex_member_2.index]

# getting all indices
idx_concat = list(df_output_upreg_complex_member_1.index) + list(df_output_upreg_complex_member_2.index)

# sorting by original index number, so that the order is member 1 then member 2
idx_concat.sort(key = lambda x: x.split('_')[0])
idx_concat

['100_member_1',
 '100_member_2',
 '101_member_1',
 '101_member_2',
 '102_member_1',
 '102_member_2',
 '103_member_1',
 '103_member_2',
 '104_member_1',
 '104_member_2',
 '105_member_1',
 '105_member_2',
 '106_member_1',
 '106_member_2',
 '107_member_1',
 '107_member_2',
 '108_member_1',
 '108_member_2',
 '109_member_1',
 '109_member_2',
 '110_member_1',
 '110_member_2',
 '111_member_1',
 '111_member_2',
 '112_member_1',
 '112_member_2',
 '113_member_1',
 '113_member_2',
 '114_member_1',
 '114_member_2',
 '115_member_1',
 '115_member_2',
 '116_member_1',
 '116_member_2',
 '117_member_1',
 '117_member_2',
 '1204_member_1',
 '1204_member_2',
 '1205_member_1',
 '1205_member_2',
 '1206_member_1',
 '1206_member_2',
 '1207_member_1',
 '1207_member_2',
 '1208_member_1',
 '1208_member_2',
 '1209_member_1',
 '1209_member_2',
 '1210_member_1',
 '1210_member_2',
 '1211_member_1',
 '1211_member_2',
 '1212_member_1',
 '1212_member_2',
 '1213_member_1',
 '1213_member_2',
 '1214_member_1',
 '1214_mem

In [122]:
df_output_upreg_complex_deconv = pd.concat([df_output_upreg_complex_member_1, df_output_upreg_complex_member_2])
df_output_upreg_complex_member_1

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
20_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,CD14 mono,CD16 mono,True,0.201638,0.0,0.595376,False,,,0.524181
21_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,CD14 mono,CD4 memory T,True,0.201638,0.0,0.595376,False,,,0.478022
22_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,CD14 mono,CD4 naïve T,True,0.201638,0.0,0.595376,False,,,0.318369
23_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,CD14 mono,CD8 memory T,True,0.201638,0.0,0.595376,False,,,0.414203
24_member_1,PLAUR_integrin_a4b1_complex,PLAUR,ITGB1,CD14 mono,CD8 naïve T,True,0.201638,0.0,0.595376,False,,,0.145371
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2351_member_1,CD47_SIRB1_complex,CD47,SIRPB1,CD8 memory T,CD14 mono,True,0.065938,0.001584,0.41104,False,,,0.284506
2352_member_1,CD47_SIRB1_complex,CD47,SIRPB1,CD8 memory T,CD16 mono,True,0.065938,0.001584,0.41104,False,,,0.49142
2353_member_1,CD47_SIRB1_complex,CD47,SIRPB1,CD8 memory T,Neutrophil,True,0.065938,0.001584,0.41104,False,,,0.150502
2354_member_1,CD47_SIRB1_complex,CD47,SIRPB1,CD8 memory T,cDC,True,0.065938,0.001584,0.41104,False,,,0.200737


In [123]:
df_output_upreg_complex_member_2

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
20_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,CD14 mono,CD16 mono,True,0.201638,0.0,0.595376,False,,,0.662507
21_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,CD14 mono,CD4 memory T,True,0.201638,0.0,0.595376,False,,,0.364469
22_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,CD14 mono,CD4 naïve T,True,0.201638,0.0,0.595376,False,,,0.363038
23_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,CD14 mono,CD8 memory T,True,0.201638,0.0,0.595376,False,,,0.517674
24_member_2,PLAUR_integrin_a4b1_complex,PLAUR,ITGA4,CD14 mono,CD8 naïve T,True,0.201638,0.0,0.595376,False,,,0.453708
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2351_member_2,CD47_SIRB1_complex,CD47,TYROBP,CD8 memory T,CD14 mono,True,0.065938,0.001584,0.41104,False,,,0.925829
2352_member_2,CD47_SIRB1_complex,CD47,TYROBP,CD8 memory T,CD16 mono,True,0.065938,0.001584,0.41104,False,,,0.99948
2353_member_2,CD47_SIRB1_complex,CD47,TYROBP,CD8 memory T,Neutrophil,True,0.065938,0.001584,0.41104,False,,,0.862876
2354_member_2,CD47_SIRB1_complex,CD47,TYROBP,CD8 memory T,cDC,True,0.065938,0.001584,0.41104,False,,,0.970534


In [124]:
df_output_upreg_complex_deconv = df_output_upreg_complex_deconv.loc[idx_concat,:]
df_output_upreg_complex_deconv

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
100_member_1,ICAM1_integrin_aMb2_complex,ICAM1,ITGB2,Plasmablast,CD14 mono,False,,,0.112187,True,0.060533,0.000002,0.861645
100_member_2,ICAM1_integrin_aMb2_complex,ICAM1,ITGAM,Plasmablast,CD14 mono,False,,,0.112187,True,-0.018491,0.042235,0.455156
101_member_1,ICAM1_integrin_aMb2_complex,ICAM1,ITGB2,Prolif NK,CD14 mono,False,,,0.10177,True,0.060533,0.000002,0.861645
101_member_2,ICAM1_integrin_aMb2_complex,ICAM1,ITGAM,Prolif NK,CD14 mono,False,,,0.10177,True,-0.018491,0.042235,0.455156
102_member_1,ICAM1_integrin_aMb2_complex,ICAM1,ITGB2,cDC,CD14 mono,False,,,0.416206,True,0.060533,0.000002,0.861645
...,...,...,...,...,...,...,...,...,...,...,...,...,...
97_member_2,ICAM1_integrin_aMb2_complex,ICAM1,ITGAM,Memory B,CD14 mono,False,,,0.16849,True,-0.018491,0.042235,0.455156
98_member_1,ICAM1_integrin_aMb2_complex,ICAM1,ITGB2,Neutrophil,CD14 mono,False,,,0.147157,True,0.060533,0.000002,0.861645
98_member_2,ICAM1_integrin_aMb2_complex,ICAM1,ITGAM,Neutrophil,CD14 mono,False,,,0.147157,True,-0.018491,0.042235,0.455156
99_member_1,ICAM1_integrin_aMb2_complex,ICAM1,ITGB2,Plasma B,CD14 mono,False,,,0.149398,True,0.060533,0.000002,0.861645


In [125]:
# save the final tables
df_output_upreg_complex_deconv.to_csv('/lustre/scratch117/cellgen/team292/ab55/20210429_readable_cellphone_interactions_DE_Control_Complex.csv')
df_output_upreg_simple.to_csv('/lustre/scratch117/cellgen/team292/ab55/20210429_readable_cellphone_interactions_DE_Control_Simple.csv')

End of Notebook