In [1]:
# prepare metadata, count, percent expression, and DEG tables here
import numpy as np
import pandas as pd
import seaborn as sns
import scanpy as sc
from glob import iglob
import anndata
import os
import sklearn
from sklearn.linear_model import LogisticRegression
import pickle
import scipy
import matplotlib as mpl
import matplotlib.pyplot as plt
np.random.seed(0)
sc.settings.verbosity = 3
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
sc.settings.set_figure_params(dpi = 180)

In [2]:
save_path = '/lustre/scratch117/cellgen/team292/ab55/'

In [3]:
adata = anndata.read_h5ad(save_path + "N3-pbmc-final-clustering.h5ad")
adata

AnnData object with n_obs × n_vars = 97499 × 22572
    obs: 'souporcell', 'demultiplexed', 'sample_names', 'log2p1_count', 'percent_mito', 'n_genes', 'batch', 'FolderName', 'Lane', 'Sort', 'Type', 'Donor Id', 'Age', 'Sex', 'Race', 'Ethnicity', 'BMI', 'Pre-existing heart disease', 'Pre-existing lung disease', 'Pre-existing kidney disease', 'Pre-existing diabetes', 'Pre-existing Hypertension', 'Pre-existing immunocompromised condition', 'Smoking', 'SARS-CoV-2 PCR', 'SARS-CoV-2 Ab', 'Symptomatic', 'Admitted to hospital', 'Highest level of respiratory support', 'Vasoactive agents required during hospitalization', '28-day death', 'scrublet_pred', 'scrublet_local_pred', 'scrublet_score', 'scrublet_cluster_score', 'filtered_cells', 'S_score', 'G2M_score', 'phase', 'leiden_sampl_cc', 'leidenres2_sampl_cc', 'DonorSubset', 'leiden_scvi_subset_cc', 'leidenres2_scvi_subset_cc', 'leiden_tvi_subset_cc', 'leidenres2_tvi_subset_cc', 'louvain', 'Celltype_Predictions', 'Prediction_Probabilities', 'Final

In [4]:
adata = anndata.AnnData(X = adata.raw.X, obs = adata.obs, var = adata.raw.var, obsm = adata.obsm).copy()
adata

AnnData object with n_obs × n_vars = 97499 × 33759
    obs: 'souporcell', 'demultiplexed', 'sample_names', 'log2p1_count', 'percent_mito', 'n_genes', 'batch', 'FolderName', 'Lane', 'Sort', 'Type', 'Donor Id', 'Age', 'Sex', 'Race', 'Ethnicity', 'BMI', 'Pre-existing heart disease', 'Pre-existing lung disease', 'Pre-existing kidney disease', 'Pre-existing diabetes', 'Pre-existing Hypertension', 'Pre-existing immunocompromised condition', 'Smoking', 'SARS-CoV-2 PCR', 'SARS-CoV-2 Ab', 'Symptomatic', 'Admitted to hospital', 'Highest level of respiratory support', 'Vasoactive agents required during hospitalization', '28-day death', 'scrublet_pred', 'scrublet_local_pred', 'scrublet_score', 'scrublet_cluster_score', 'filtered_cells', 'S_score', 'G2M_score', 'phase', 'leiden_sampl_cc', 'leidenres2_sampl_cc', 'DonorSubset', 'leiden_scvi_subset_cc', 'leidenres2_scvi_subset_cc', 'leiden_tvi_subset_cc', 'leidenres2_tvi_subset_cc', 'louvain', 'Celltype_Predictions', 'Prediction_Probabilities', 'Final

In [5]:
# filter lowly expressed genes
adata = adata[:, adata.var["feature_types"] != "Antibody Capture"].copy()
sc.pp.filter_genes(adata, min_cells = 3)
adata

filtered out 11187 genes that are detected in less than 3 cells


AnnData object with n_obs × n_vars = 97499 × 22380
    obs: 'souporcell', 'demultiplexed', 'sample_names', 'log2p1_count', 'percent_mito', 'n_genes', 'batch', 'FolderName', 'Lane', 'Sort', 'Type', 'Donor Id', 'Age', 'Sex', 'Race', 'Ethnicity', 'BMI', 'Pre-existing heart disease', 'Pre-existing lung disease', 'Pre-existing kidney disease', 'Pre-existing diabetes', 'Pre-existing Hypertension', 'Pre-existing immunocompromised condition', 'Smoking', 'SARS-CoV-2 PCR', 'SARS-CoV-2 Ab', 'Symptomatic', 'Admitted to hospital', 'Highest level of respiratory support', 'Vasoactive agents required during hospitalization', '28-day death', 'scrublet_pred', 'scrublet_local_pred', 'scrublet_score', 'scrublet_cluster_score', 'filtered_cells', 'S_score', 'G2M_score', 'phase', 'leiden_sampl_cc', 'leidenres2_sampl_cc', 'DonorSubset', 'leiden_scvi_subset_cc', 'leidenres2_scvi_subset_cc', 'leiden_tvi_subset_cc', 'leidenres2_tvi_subset_cc', 'louvain', 'Celltype_Predictions', 'Prediction_Probabilities', 'Final

In [6]:
dense_matrix = adata.X.todense()
dense_matrix[:9]

matrix([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 1., 0.]], dtype=float32)

In [7]:
# need normalized-only counts
sc.pp.normalize_per_cell(adata, counts_per_cell_after = 1e4)
dense_matrix = adata.X.todense()
dense_matrix[:9]

normalizing by total count per cell
    finished (0:00:01): normalized adata.X and added    'n_counts', counts per cell before normalization (adata.obs)


matrix([[ 0.      ,  0.      ,  0.      , ...,  0.      ,  0.      ,
          0.      ],
        [ 0.      ,  0.      ,  0.      , ...,  0.      ,  0.      ,
          0.      ],
        [ 0.      ,  0.      ,  0.      , ...,  0.      ,  0.      ,
          0.      ],
        ...,
        [ 0.      ,  0.      ,  0.      , ...,  0.      ,  0.      ,
          0.      ],
        [ 0.      ,  0.      ,  0.      , ...,  0.      ,  0.      ,
          0.      ],
        [ 0.      ,  0.      ,  0.      , ...,  0.      , 12.626263,
          0.      ]], dtype=float32)

In [8]:
np.unique(adata.obs['Final_Celltype'])

array(['CD14 mono', 'CD16 mono', 'CD4 memory T', 'CD4 naïve T',
       'CD8 memory T', 'CD8 naïve T', 'Exhausted B', 'HSC', 'Immature B',
       'MAIT', 'Memory B', 'NK CD56(bright)', 'NK CD56(dim)', 'NKT',
       'Naïve B', 'Neutrophil', 'Plasma B', 'Plasmablast', 'Platelets',
       'Prolif NK', 'Prolif T', 'RBC', 'Treg', 'cDC1', 'cDC2', 'cDC3',
       'pDC', 'γδT'], dtype=object)

In [9]:
donor2condition = {
     'cDC1': 'cDC',
     'cDC2': 'cDC',
     'cDC3': 'cDC',
     'pDC': 'pDC',
     'CD14 mono': 'CD14 mono',
     'CD16 mono': 'CD16 mono',
     'CD4 memory T': 'CD4 memory T',
     'CD4 naïve T': 'CD4 naïve T',
     'CD8 memory T': 'CD8 memory T',
     'CD8 naïve T': 'CD8 naïve T',
     'Exhausted B': 'Exhausted B',
     'HSC': 'HSC',
     'Immature B': 'Immature B',
     'MAIT': 'MAIT',
     'Memory B': 'Memory B',
     'NK CD56(bright)': 'NK CD56(bright)',
     'NK CD56(dim)': 'NK CD56(dim)',
     'NKT': 'NKT',
     'Naïve B': 'Naïve B',
     'Neutrophil': 'Neutrophil',
     'Plasma B': 'Plasma B',
     'Plasmablast': 'Plasmablast',
     'Platelets': 'Platelets',
     'Prolif NK': 'Prolif NK',
     'Prolif T': 'Prolif T',
     'RBC': 'RBC',
     'Treg': 'Treg',
     'γδT': 'γδT'}
adata.obs['General_Celltype'] = adata.obs['Final_Celltype'].map(donor2condition).astype('category')

In [10]:
np.unique(adata.obs['General_Celltype'])

array(['CD14 mono', 'CD16 mono', 'CD4 memory T', 'CD4 naïve T',
       'CD8 memory T', 'CD8 naïve T', 'Exhausted B', 'HSC', 'Immature B',
       'MAIT', 'Memory B', 'NK CD56(bright)', 'NK CD56(dim)', 'NKT',
       'Naïve B', 'Neutrophil', 'Plasma B', 'Plasmablast', 'Platelets',
       'Prolif NK', 'Prolif T', 'RBC', 'Treg', 'cDC', 'pDC', 'γδT'],
      dtype=object)

In [11]:
adata_count = anndata.AnnData(X = adata.X, var = adata.var, obs = adata.obs)
t = adata_count.X.toarray().T
df_expr_matrix = pd.DataFrame(data = t, columns = adata_count.obs.index, index = adata_count.var_names)
df_expr_matrix

Unnamed: 0,RV8919578_AAACCTGAGAAACCTA-1,RV8919578_AAACCTGAGAGCAATT-1,RV8919578_AAACCTGAGAGCTGGT-1,RV8919578_AAACCTGAGCGAAGGG-1,RV8919578_AAACCTGAGCGATGAC-1,RV8919578_AAACCTGAGCTGTTCA-1,RV8919578_AAACCTGAGGTGCTAG-1,RV8919578_AAACCTGAGTAGGTGC-1,RV8919578_AAACCTGAGTGGTAAT-1,RV8919578_AAACCTGAGTTCGCAT-1,...,RV8959686_TTTGTCACAAACCTAC-1,RV8959686_TTTGTCACACATTTCT-1,RV8959686_TTTGTCACAGCGATCC-1,RV8959686_TTTGTCACATATACGC-1,RV8959686_TTTGTCACATCTACGA-1,RV8959686_TTTGTCAGTATGAATG-1,RV8959686_TTTGTCAGTCGGCATC-1,RV8959686_TTTGTCAGTGACAAAT-1,RV8959686_TTTGTCAGTTGCCTCT-1,RV8959686_TTTGTCATCAGTCCCT-1
AL627309.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AL627309.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AL627309.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AL669831.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AL669831.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AL354822.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AC004556.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AC233755.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AC233755.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.626263,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
df_meta = pd.DataFrame(data={'Cell':list(adata_count.obs.index),
                             'cell_type':['celltype_'+str(i) for i in adata_count.obs['General_Celltype']]})
df_meta.set_index('Cell', inplace=True)
df_meta

Unnamed: 0_level_0,cell_type
Cell,Unnamed: 1_level_1
RV8919578_AAACCTGAGAAACCTA-1,celltype_NK CD56(bright)
RV8919578_AAACCTGAGAGCAATT-1,celltype_Naïve B
RV8919578_AAACCTGAGAGCTGGT-1,celltype_pDC
RV8919578_AAACCTGAGCGAAGGG-1,celltype_NK CD56(dim)
RV8919578_AAACCTGAGCGATGAC-1,celltype_NK CD56(bright)
...,...
RV8959686_TTTGTCAGTATGAATG-1,celltype_CD4 naïve T
RV8959686_TTTGTCAGTCGGCATC-1,celltype_Naïve B
RV8959686_TTTGTCAGTGACAAAT-1,celltype_NK CD56(bright)
RV8959686_TTTGTCAGTTGCCTCT-1,celltype_CD4 naïve T


In [13]:
%%time

savepath_meta = save_path + '20210402_cellphonedb_meta.tsv'
df_meta.to_csv(savepath_meta, sep = '\t')

print('saved metadata, saving counts now')

savepath_counts = save_path + '20210402_cellphonedb_counts.csv'
df_expr_matrix.to_csv(savepath_counts)

saved metadata, saving counts now
CPU times: user 45min 32s, sys: 7min 24s, total: 52min 57s
Wall time: 53min 55s


GEX cell proportions

In [14]:
df_expr_matrix_per_cell_type = {}

for ct in np.unique(adata.obs['General_Celltype']):
    print(ct)
    curr_subset_of_barcodes = list(adata[adata.obs['General_Celltype'] == ct].obs_names)
    df_expr_matrix_per_cell_type[ct] = df_expr_matrix.loc[:,curr_subset_of_barcodes]
    print(len(curr_subset_of_barcodes), 'cells of this cell type')
    print('subsetted a table of shape', df_expr_matrix_per_cell_type[ct].shape, '\n')

CD14 mono
23648 cells of this cell type
subsetted a table of shape (22380, 23648) 

CD16 mono
1923 cells of this cell type
subsetted a table of shape (22380, 1923) 

CD4 memory T
3276 cells of this cell type
subsetted a table of shape (22380, 3276) 

CD4 naïve T
26887 cells of this cell type
subsetted a table of shape (22380, 26887) 

CD8 memory T
6224 cells of this cell type
subsetted a table of shape (22380, 6224) 

CD8 naïve T
2387 cells of this cell type
subsetted a table of shape (22380, 2387) 

Exhausted B
510 cells of this cell type
subsetted a table of shape (22380, 510) 

HSC
270 cells of this cell type
subsetted a table of shape (22380, 270) 

Immature B
255 cells of this cell type
subsetted a table of shape (22380, 255) 

MAIT
223 cells of this cell type
subsetted a table of shape (22380, 223) 

Memory B
457 cells of this cell type
subsetted a table of shape (22380, 457) 

NK CD56(bright)
3638 cells of this cell type
subsetted a table of shape (22380, 3638) 

NK CD56(dim)
69

In [15]:
df_expr_matrix_per_cell_type['Treg'].astype(bool).sum(axis=1)/df_expr_matrix_per_cell_type['Treg'].shape[1]

AL627309.1    0.000000
AL627309.3    0.000000
AL627309.2    0.000000
AL669831.2    0.000000
AL669831.5    0.019608
                ...   
AL354822.1    0.003268
AC004556.1    0.009804
AC233755.2    0.006536
AC233755.1    0.022876
AC240274.1    0.006536
Length: 22380, dtype: float64

In [16]:
df_percentage_expressed = pd.DataFrame(index = df_expr_matrix.index,
                                      columns = np.unique(adata.obs['General_Celltype']))

In [17]:
for col in df_percentage_expressed.columns:
    print(col)
    df_percentage_expressed.loc[:,col] = df_expr_matrix_per_cell_type[col].astype(bool).sum(axis=1)/df_expr_matrix_per_cell_type[col].shape[1]

CD14 mono
CD16 mono
CD4 memory T
CD4 naïve T
CD8 memory T
CD8 naïve T
Exhausted B
HSC
Immature B
MAIT
Memory B
NK CD56(bright)
NK CD56(dim)
NKT
Naïve B
Neutrophil
Plasma B
Plasmablast
Platelets
Prolif NK
Prolif T
RBC
Treg
cDC
pDC
γδT


In [18]:
df_percentage_expressed

Unnamed: 0,CD14 mono,CD16 mono,CD4 memory T,CD4 naïve T,CD8 memory T,CD8 naïve T,Exhausted B,HSC,Immature B,MAIT,...,Plasma B,Plasmablast,Platelets,Prolif NK,Prolif T,RBC,Treg,cDC,pDC,γδT
AL627309.1,0.001057,0.001560,0.001221,0.001190,0.000482,0.001257,0.000000,0.011111,0.003922,0.000000,...,0.000000,0.002421,0.000693,0.000000,0.001179,0.000000,0.000000,0.001842,0.000000,0.00000
AL627309.3,0.000042,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000807,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000
AL627309.2,0.000803,0.001040,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000
AL669831.2,0.000085,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000
AL669831.5,0.013828,0.016121,0.008242,0.008963,0.006909,0.007541,0.015686,0.029630,0.007843,0.008969,...,0.033735,0.022599,0.003463,0.004425,0.024160,0.003839,0.019608,0.031308,0.013333,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AL354822.1,0.001184,0.002600,0.003053,0.003608,0.001928,0.002095,0.001961,0.003704,0.003922,0.004484,...,0.016867,0.001614,0.002078,0.013274,0.006482,0.000256,0.003268,0.003683,0.000000,0.00463
AC004556.1,0.034971,0.033281,0.002137,0.007996,0.004981,0.008379,0.021569,0.048148,0.003922,0.000000,...,0.081928,0.079096,0.002078,0.022124,0.030053,0.001792,0.009804,0.047882,0.000000,0.00463
AC233755.2,0.002453,0.000520,0.002747,0.002678,0.001607,0.001676,0.005882,0.000000,0.007843,0.000000,...,0.021687,0.020178,0.005540,0.004425,0.003536,0.003071,0.006536,0.001842,0.000000,0.00463
AC233755.1,0.009853,0.020281,0.011294,0.016625,0.011889,0.016757,0.013725,0.000000,0.058824,0.026906,...,0.091566,0.081517,0.010388,0.044248,0.004714,0.033786,0.022876,0.003683,0.093333,0.00463


In [19]:
df_percentage_expressed.to_csv(save_path + '20210402_PercentExpressed_for_cellphone.csv')

Load/combine DE tables

In [4]:
Celltype = ['CD14 mono', 'CD16 mono', 'CD4 memory T', 'CD4 naïve T', 'CD8 memory T', 'CD8 naïve T', 'cDC', 'Naïve B', 'NK CD56(bright)', 'NK CD56(dim)', 'NKT']
limma_DE_tables_RA = {}
for ct in Celltype:
    print(ct)
    limma_DE_tables_RA[ct] = pd.read_csv('/home/jovyan/COVID/NB6_CellPhoneDB/DE_Tables/' + ct + '_DE_table_RA_vs_rest_filtered.csv')
    limma_DE_tables_RA[ct]['cluster'] = [ct]*len(limma_DE_tables_RA[ct])
    
limma_DE_tables_RA['CD16 mono']

CD14 mono
CD16 mono
CD4 memory T
CD4 naïve T
CD8 memory T
CD8 naïve T
cDC
Naïve B
NK CD56(bright)
NK CD56(dim)
NKT


Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_case,AveExpr_ctrl,percentExpr_case,percentExpr_ctrl,cluster
0,TXNIP,-0.592604,4.998004e-39,8.595068e-35,1.841391,2.433995,0.961340,0.989577,CD16 mono
1,FKBP5,-0.419399,3.335520e-36,2.868047e-32,0.202345,0.621744,0.317010,0.600000,CD16 mono
2,FOS,-0.753282,8.747964e-34,5.014625e-30,1.918190,2.671471,0.889175,0.971987,CD16 mono
3,HLA-DRB5,-0.649534,6.331257e-32,1.814644e-28,0.687873,1.337407,0.737113,0.743974,CD16 mono
4,MYL12A,-0.382923,7.959697e-29,1.140691e-25,1.263243,1.646166,0.884021,0.952443,CD16 mono
...,...,...,...,...,...,...,...,...,...
188,EGR2,-0.029064,4.636964e-03,3.928171e-02,0.022893,0.051956,0.036082,0.076873,CD16 mono
189,CXCL10,-0.066075,5.176545e-03,4.223010e-02,0.059238,0.125313,0.061856,0.101629,CD16 mono
190,CTSA,-0.076655,5.288920e-03,4.282183e-02,0.458332,0.534987,0.587629,0.615635,CD16 mono
191,MBD2,-0.063351,5.937359e-03,4.649580e-02,0.275219,0.338569,0.440722,0.452769,CD16 mono


In [5]:
joint_DE_table_RA = pd.concat(limma_DE_tables_RA.values())
joint_DE_table_RA.to_csv('/home/jovyan/COVID/NB6_CellPhoneDB/DE_Tables/Joint_DE_table_RA_vs_rest_filtered.csv')
joint_DE_table_RA

Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_case,AveExpr_ctrl,percentExpr_case,percentExpr_ctrl,cluster
0,LGALS2,0.387839,0.000000e+00,0.000000e+00,0.652597,0.264758,0.569413,0.247351,CD14 mono
1,FOS,-0.543533,6.896551e-292,2.855862e-288,2.046344,2.589877,0.923695,0.951635,CD14 mono
2,CD63,-0.433710,1.118027e-281,3.858125e-278,0.885395,1.319104,0.736462,0.802894,CD14 mono
3,CLU,-0.278287,5.909354e-272,1.747903e-268,0.115825,0.394112,0.156219,0.362709,CD14 mono
4,HMOX1,-0.504810,1.691163e-228,3.501553e-225,0.600111,1.104921,0.558582,0.640481,CD14 mono
...,...,...,...,...,...,...,...,...,...
26,CD2,-0.137560,5.562531e-04,1.200101e-02,0.725809,0.863369,0.557915,0.597362,NKT
27,EPSTI1,-0.081279,6.638167e-04,1.383300e-02,0.144002,0.225281,0.138996,0.187742,NKT
28,IRF7,-0.093833,7.389212e-04,1.510081e-02,0.222031,0.315864,0.206564,0.258340,NKT
29,JPT1,-0.080550,1.461101e-03,2.600990e-02,0.208325,0.288875,0.200772,0.259891,NKT


In [6]:
limma_DE_tables_PS = {}
for ct in Celltype:
    print(ct)
    limma_DE_tables_PS[ct] = pd.read_csv('/home/jovyan/COVID/NB6_CellPhoneDB/DE_Tables/' + ct + '_DE_table_PS_vs_rest_filtered.csv')
    limma_DE_tables_PS[ct]['cluster'] = [ct]*len(limma_DE_tables_PS[ct])
    
limma_DE_tables_PS['CD16 mono']

CD14 mono
CD16 mono
CD4 memory T
CD4 naïve T
CD8 memory T
CD8 naïve T
cDC
Naïve B
NK CD56(bright)
NK CD56(dim)
NKT


Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_case,AveExpr_ctrl,percentExpr_case,percentExpr_ctrl,cluster
0,FKBP5,0.715163,7.852211e-116,1.929064e-112,1.100179,0.385016,0.833741,0.464333,CD16 mono
1,GLUL,0.923628,4.262178e-109,9.162084e-106,2.231796,1.308167,0.982885,0.908851,CD16 mono
2,MT-CO3,0.755382,1.055022e-97,2.015913e-94,3.472718,2.717336,1.000000,0.996037,CD16 mono
3,DDIT4,0.853405,4.702323e-96,7.351441e-93,1.361209,0.507803,0.863081,0.455086,CD16 mono
4,SAP30,0.394449,3.132792e-91,4.489551e-88,0.460872,0.066423,0.462103,0.096433,CD16 mono
...,...,...,...,...,...,...,...,...,...
261,HP1BP3,0.070453,7.159446e-03,4.239704e-02,0.551153,0.480701,0.584352,0.612285,CD16 mono
262,RPS3,0.093409,7.223974e-03,4.267629e-02,2.411319,2.317910,0.987775,0.984148,CD16 mono
263,IGSF6,-0.099375,7.475199e-03,4.390128e-02,1.000674,1.100049,0.731051,0.852708,CD16 mono
264,MT1F,0.026237,8.053764e-03,4.672759e-02,0.067068,0.040831,0.088020,0.070013,CD16 mono


In [7]:
joint_DE_table_PS = pd.concat(limma_DE_tables_PS.values())
joint_DE_table_PS.to_csv('/home/jovyan/COVID/NB6_CellPhoneDB/DE_Tables/Joint_DE_table_PS_vs_rest_filtered.csv')
joint_DE_table_PS

Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_case,AveExpr_ctrl,percentExpr_case,percentExpr_ctrl,cluster
0,MTSS1,0.894124,0.000000,0.000000,1.327510,0.433386,0.761057,0.407629,CD14 mono
1,PLIN2,1.020501,0.000000,0.000000,1.484773,0.464272,0.711932,0.457540,CD14 mono
2,SAT1,0.985907,0.000000,0.000000,3.495494,2.509587,0.989760,0.974216,CD14 mono
3,AREG,1.279460,0.000000,0.000000,1.920389,0.640930,0.797121,0.363513,CD14 mono
4,TPST1,0.543313,0.000000,0.000000,0.668993,0.125680,0.519887,0.157185,CD14 mono
...,...,...,...,...,...,...,...,...,...
47,ISG20,0.185423,0.000259,0.008250,1.098238,0.912815,0.735849,0.649805,NKT
48,CXCR4,-0.212548,0.000331,0.010089,1.628972,1.841520,0.864151,0.885863,NKT
49,KLF6,-0.223692,0.000440,0.012882,1.109934,1.333626,0.660377,0.751621,NKT
50,RPS3,0.087406,0.000928,0.023205,3.476708,3.389302,0.996226,1.000000,NKT


In [8]:
limma_DE_tables_MS = {}
for ct in Celltype:
    print(ct)
    limma_DE_tables_MS[ct] = pd.read_csv('/home/jovyan/COVID/NB6_CellPhoneDB/DE_Tables/' + ct + '_DE_table_MS_vs_rest_filtered.csv')
    limma_DE_tables_MS[ct]['cluster'] = [ct]*len(limma_DE_tables_MS[ct])

limma_DE_tables_MS['CD16 mono']

CD14 mono
CD16 mono
CD4 memory T
CD4 naïve T
CD8 memory T
CD8 naïve T
cDC
Naïve B
NK CD56(bright)
NK CD56(dim)
NKT


Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_case,AveExpr_ctrl,percentExpr_case,percentExpr_ctrl,cluster
0,TNFSF10,1.327972,1.481621e-298,2.547943e-294,1.644019,0.316047,0.904269,0.371304,CD16 mono
1,XAF1,0.903717,2.254806e-227,1.292530e-223,1.131184,0.227466,0.807245,0.320870,CD16 mono
2,HLA-DRB5,1.267790,6.951210e-212,2.988499e-208,1.964521,0.696731,0.922380,0.621739,CD16 mono
3,ISG15,1.208161,6.022281e-192,1.726086e-188,1.817287,0.609125,0.862872,0.613043,CD16 mono
4,GIMAP7,0.692032,5.202032e-184,1.277991e-180,0.888930,0.196897,0.804657,0.258261,CD16 mono
...,...,...,...,...,...,...,...,...,...
317,TXNIP,-0.092414,1.479586e-02,4.163711e-02,2.259160,2.351574,0.992238,0.978261,CD16 mono
318,IL27,0.013823,1.636442e-02,4.534638e-02,0.035403,0.021580,0.063389,0.042609,CD16 mono
319,CYB5R2,-0.008401,1.684176e-02,4.639983e-02,0.003909,0.012310,0.006468,0.020870,CD16 mono
320,EGR1,0.051699,1.790106e-02,4.891080e-02,0.260347,0.208649,0.285899,0.207826,CD16 mono


In [9]:
joint_DE_table_MS = pd.concat(limma_DE_tables_MS.values())
joint_DE_table_MS.to_csv('/home/jovyan/COVID/NB6_CellPhoneDB/DE_Tables/Joint_DE_table_MS_vs_rest_filtered.csv')
joint_DE_table_MS

Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_case,AveExpr_ctrl,percentExpr_case,percentExpr_ctrl,cluster
0,MNDA,1.146410,0.000000,0.000000,2.030917,0.884508,0.926096,0.671338,CD14 mono
1,GIMAP4,0.547464,0.000000,0.000000,0.657609,0.110145,0.580414,0.145095,CD14 mono
2,TNFSF10,0.560615,0.000000,0.000000,0.682024,0.121409,0.590317,0.156856,CD14 mono
3,GIMAP7,0.467760,0.000000,0.000000,0.551168,0.083408,0.534201,0.105469,CD14 mono
4,SAMHD1,0.737967,0.000000,0.000000,1.192707,0.454740,0.835687,0.460676,CD14 mono
...,...,...,...,...,...,...,...,...,...
43,MT-ND3,-0.130767,0.001654,0.011188,1.623948,1.754714,0.880412,0.906203,NKT
44,RPL37A,0.085446,0.002876,0.017507,2.589446,2.504000,0.995876,0.990166,NKT
45,RPL29,0.070024,0.003278,0.019629,2.924436,2.854411,0.997938,0.997731,NKT
46,RPS6,0.069916,0.005354,0.029036,2.907112,2.837196,1.000000,0.996218,NKT


In [5]:
limma_DE_tables_Control = {}
for ct in Celltype:
    print(ct)
    limma_DE_tables_Control[ct] = pd.read_csv('/home/jovyan/COVID/NB6_CellPhoneDB/DE_Tables/' + ct + '_DE_table_Control_vs_rest_filtered.csv')
    limma_DE_tables_Control[ct]['cluster'] = [ct]*len(limma_DE_tables_Control[ct])
    
limma_DE_tables_Control['CD16 mono']

CD14 mono
CD16 mono
CD4 memory T
CD4 naïve T
CD8 memory T
CD8 naïve T
cDC
Naïve B
NK CD56(bright)
NK CD56(dim)
NKT


Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_case,AveExpr_ctrl,percentExpr_case,percentExpr_ctrl,cluster
0,TNFSF10,0.811874,1.294421e-54,2.226016e-50,0.998894,0.187019,0.658599,0.260623,CD16 mono
1,RPL36A,0.512178,2.184167e-49,1.252037e-45,1.477930,0.965752,0.950318,0.821530,CD16 mono
2,MS4A7,0.751508,1.870860e-47,6.434637e-44,1.320468,0.568960,0.814013,0.580737,CD16 mono
3,PTPRC,0.578212,8.274052e-46,1.778611e-42,1.627592,1.049381,0.935669,0.855524,CD16 mono
4,CARD16,0.487785,2.532796e-41,3.629707e-38,1.284277,0.796492,0.899363,0.784703,CD16 mono
...,...,...,...,...,...,...,...,...,...
231,MT1F,-0.027780,7.948702e-03,4.189207e-02,0.041312,0.069091,0.066242,0.107649,CD16 mono
232,MT1X,-0.074398,8.346857e-03,4.341830e-02,0.234843,0.309240,0.273248,0.354108,CD16 mono
233,LYZ,0.156976,8.817369e-03,4.548059e-02,2.693379,2.536403,0.973885,0.977337,CD16 mono
234,KLF3,-0.086549,9.520034e-03,4.837944e-02,0.638398,0.724947,0.659236,0.725212,CD16 mono


In [11]:
joint_DE_table_Control = pd.concat(limma_DE_tables_Control.values())
joint_DE_table_Control.to_csv('/home/jovyan/COVID/NB6_CellPhoneDB/DE_Tables/Joint_DE_table_Control_vs_rest_filtered.csv')
joint_DE_table_Control

Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_case,AveExpr_ctrl,percentExpr_case,percentExpr_ctrl,cluster
0,MT-ND4L,0.699686,0.000000,0.000000,2.108854,1.409167,0.959475,0.892411,CD14 mono
1,MT-ND3,0.555052,0.000000,0.000000,2.405833,1.850780,0.981132,0.957673,CD14 mono
2,MT-ND4,0.502666,0.000000,0.000000,2.714005,2.211339,0.989171,0.978370,CD14 mono
3,MT-ATP6,0.498666,0.000000,0.000000,2.629306,2.130639,0.988078,0.979489,CD14 mono
4,IGLV2-23,-0.104248,0.000000,0.000000,0.002640,0.106888,0.003500,0.112996,CD14 mono
...,...,...,...,...,...,...,...,...,...
32,MT-CO2,0.101612,0.000361,0.006406,3.343557,3.241945,0.999211,0.996289,NKT
33,TYROBP,-0.173622,0.000786,0.012112,1.706277,1.879899,0.767350,0.897959,NKT
34,TOMM7,0.120765,0.000966,0.014078,1.526897,1.406132,0.889590,0.851577,NKT
35,MT-ND2,0.115930,0.002174,0.027052,1.677349,1.561419,0.914038,0.883117,NKT


End of Notebook