# Preparing input from adata to cellphone input

In [1]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import numpy as np
import pandas as pd
import seaborn as sns
import scanpy as sc
from glob import iglob
import anndata
# requires 'pip install scrublet'
#import scrublet as scr
import os
import sklearn
from sklearn.linear_model import LogisticRegression
import matplotlib as mpl
import scipy
import matplotlib.pyplot as plt
import pickle

In [2]:
np.random.seed(0)

In [3]:
%%bash

pip freeze

aiohttp==3.6.2
airr==1.3.1
alembic==1.3.0
anndata==0.7.5
annoy==1.16.2
async-generator==1.10
async-timeout==3.0.1
attrs==19.3.0
backcall==0.1.0
bbknn==1.3.6
bleach==3.1.0
blinker==1.4
CellPhoneDB==2.1.5
certifi==2019.9.11
certipy==0.1.3
cffi==1.13.2
chardet==3.0.4
click==6.7
conda==4.7.12
conda-package-handling==1.6.0
cryptography==2.8
cycler==0.10.0
Cython==0.29.14
decorator==4.4.1
defusedxml==0.6.0
entrypoints==0.3
fbpca==1.0
geosketch==0.3
get-version==2.1
h5py==2.10.0
idna==2.7
imageio==2.9.0
importlib-metadata==0.23
intervaltree==2.1.0
ipykernel==5.1.3
ipython==7.9.0
ipython-genutils==0.2.0
jedi==0.15.1
Jinja2==2.10.3
joblib==0.14.0
json5==0.8.5
jsonschema==3.1.1
jupyter-client==5.3.3
jupyter-core==4.5.0
jupyter-rsession-proxy==1.0b6
jupyter-server-proxy==1.2.0
jupyterhub==1.0.0
jupyterlab==1.2.1
jupyterlab-server==1.0.6
kiwisolver==1.1.0
legacy-api-wrap==1.2
leidenalg==0.7.0
llvmlite==0.30.0
loompy==3.0.6
louvain==0.6.1
Mako==1.1.0
MarkupSafe==1.1.1
matplotlib==3.3.4
mistune==0.8

In [4]:
sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
sc.settings.set_figure_params(dpi=80)  # low dpi (dots per inch) yields small inline figures

-----
anndata     0.7.5
scanpy      1.6.0
sinfo       0.3.1
-----
PIL                 8.1.0
anndata             0.7.5
attr                19.3.0
backcall            0.1.0
cffi                1.13.2
constants           NA
cycler              0.10.0
cython_runtime      NA
dateutil            2.8.1
decorator           4.4.1
get_version         2.1
h5py                2.10.0
highs_wrapper       NA
igraph              0.7.1
importlib_metadata  0.23
ipykernel           5.1.3
ipython_genutils    0.2.0
jedi                0.15.1
joblib              0.14.0
kiwisolver          1.1.0
legacy_api_wrap     1.2
leidenalg           0.7.0
llvmlite            0.30.0
louvain             0.6.1
matplotlib          3.3.4
more_itertools      NA
mpl_toolkits        NA
natsort             6.2.0
numba               0.46.0
numexpr             2.7.0
numpy               1.17.4
packaging           19.2
pandas              0.25.3
parso               0.5.1
pexpect             4.7.0
pickleshare         0.7.5
pkg_resou

In [5]:
save_path = '/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/'

## Reding the fully analysed object


In [6]:
# reading the fully analysed object, final annotation in 'cell_type_final', normalised, log tr and scaled in .X (HVG-subsetted)
# norm, log tr in .raw.X, all genes
adata_hvg = sc.read(save_path+'adata_hvg_bbknn_by_sample_validation_cohort_final_manifold_with_annotation_20210217.h5ad')

In [7]:
adata_hvg.var['feature_types-0'].value_counts()

Gene Expression     2476
Antibody Capture     145
nan                    6
Name: feature_types-0, dtype: int64

In [8]:
adata_hvg

AnnData object with n_obs × n_vars = 95064 × 2627
    obs: 'batch', 'bh_pval', 'cell_id', 'dataset', 'n_counts', 'n_genes', 'percent_mito', 'sample', 'scrublet_cluster_score', 'scrublet_score', 'stimulation', 'technique', 'is_doublet', 'donor', 'cell_type', 'prelim_annot', 'celltype_predictions', 'probabilities', 'S_score', 'G2M_score', 'phase', 'n_counts_protein', 'n_counts_protein_lognorm', 'louvain', 'barcode', 'Age', 'Sex', 'Age of disease onset', 'Autoimmunity', 'Gastrointestinal disease', 'Lung disease (bronchiectasis)', 'Lymphoadenopathy ', 'Splenomegaly', 'History of hypogammaglobulinemia, IgA deficiency or immunodeficiency in other family members', 'CVID_status', 'GC_celltype_predictions_all_genes', 'probabilities_GC_all_genes'
    var: 'gene_ids-0', 'feature_types-0', 'genome-0', 'n_cells-0', 'gene_ids-1', 'feature_types-1', 'genome-1', 'n_cells-1', 'gene_ids-2', 'feature_types-2', 'genome-2', 'n_cells-2', 'gene_ids-3', 'feature_types-3', 'genome-3', 'n_cells-3', 'gene_ids-4'

In [9]:
# reading the raw count data
# I have already filtered lowly expressed genes and cells with low counts before, so adata_raw is filtered, but NOT HVG-subsetted
# values are raw (you can check - should be integers)
adata_raw = sc.read(save_path+'adata_raw_filtered_validation_cohort.h5ad')

In [10]:
# subsetting to only final cells
adata_raw = adata_raw[adata_hvg.obs_names,:].copy()

In [12]:
adata_raw.obs['cell_type'] = adata_hvg.obs.loc[adata_raw.obs_names,'cell_type']

In [13]:
# normalising raw counts
sc.pp.normalize_per_cell(adata_raw, counts_per_cell_after=1e4)

normalizing by total count per cell
    finished (0:01:13): normalized adata.X and added    'n_counts', counts per cell before normalization (adata.obs)


In [14]:
# normalised count values
adata_count = anndata.AnnData(X=adata_raw.X, var=adata_raw.var, obs=adata_raw.obs)
t = adata_count.X.toarray().T
# Set cell ids as column index and gene ids as row index
df_expr_matrix = pd.DataFrame(data=t, columns= adata_count.obs.index, index=adata_count.var_names)

In [15]:
df_expr_matrix

index,TGGCGCAGTCCGAACC-1,GTATTCTTCAACACGT-1,CTACATTCATTAACCG-1,GATCAGTAGAGGACGG-1,GACTGCGCATGAACCT-1,TACTCGCGTTTGCATG-1,GGGCATCGTCCCTTGT-1-1,CGCCAAGGTAGCTCCG-1,ACATGGTAGCTGATAA-1,CTTACCGTCGATCCCT-1,...,TCAGGATAGGGTGTTG-1,CTAGTGATCGATAGAA-1,TAGCCGGTCTTGTCAT-1,GAAATGAGTGCACGAA-1,CGGAGTCGTGTGACCC-1,CAGCTAACAGGATTGG-1,TTTGTCACATCCGCGA-1,TCGGTAACAGGTCCAC-1,CACACAAAGTTACGGG-1,CAAGTTGCAGGATTGG-1
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
RP11-34P13.7,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
FO538757.2,0.0,0.589136,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.968523,0.0,0.0,0.0,1.153735,0.0,0.0,0.0
AP006222.2,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
RP4-669L17.10,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
RP5-857K21.4,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CTD-2541M15.3,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
THEGL,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
KIAA1644,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
RP11-132A1.3,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0


In [17]:
df_meta = pd.DataFrame(data={'Cell':list(adata_count.obs.index),
                             'cell_type':[ 'celltype_'+str(i) for i in adata_count.obs['cell_type']] })
df_meta.set_index('Cell', inplace=True)
df_meta

Unnamed: 0_level_0,cell_type
Cell,Unnamed: 1_level_1
TGGCGCAGTCCGAACC-1,celltype_NK_CD16_bright
GTATTCTTCAACACGT-1,celltype_Monocytes_non-classical
CTACATTCATTAACCG-1,celltype_NK_CD16_bright
GATCAGTAGAGGACGG-1,celltype_Monocytes_intermediate
GACTGCGCATGAACCT-1,celltype_Monocytes_intermediate
...,...
CAGCTAACAGGATTGG-1,celltype_T8_activated
TTTGTCACATCCGCGA-1,celltype_cDC1
TCGGTAACAGGTCCAC-1,celltype_Monocytes_intermediate
CACACAAAGTTACGGG-1,celltype_NK_CD16_bright


In [18]:
%%time

# started at 17:37 18.01.2021
# takes a while
# saving for cellphonedb

savepath_meta = save_path + '20210218_cellphonedb_meta.tsv'
df_meta.to_csv(savepath_meta, sep = '\t')

print('saved metadata, saving counts now')

savepath_counts = save_path + '20210218_cellphonedb_counts.csv'
df_expr_matrix.to_csv(savepath_counts)

saved metadata, saving counts now
CPU times: user 42min 43s, sys: 13min 5s, total: 55min 49s
Wall time: 56min 45s


In [None]:
# frim here you can go and launch notebook S3

# Preparing a table of expression proportions

Matrix of genes (rows) per celltypes (columns) containing the proportion [0-1] of cells in a celltype expressing the gene

In [19]:
np.unique(adata_hvg.obs['cell_type'], return_counts=True)

(array(['B_cells_memory', 'B_cells_memory_activated', 'B_cells_naive',
        'B_cells_naive_activated', 'MAIT_cells', 'Macrophages',
        'Monocytes_classical', 'Monocytes_intermediate',
        'Monocytes_non-classical', 'NK_CD16_bright',
        'NK_CD16_bright_activated', 'NK_CD56_bright',
        'NK_CD56_bright_activated', 'Plasma_cells', 'Precursor_cells',
        'T4_activated', 'T4_memory', 'T4_naive', 'T8_activated',
        'T8_naive', 'TCM_CD8+', 'TEM_CD8+', 'TMRA_CD8+', 'T_gd', 'T_regs',
        'cDC1', 'cDC2', 'iNKT_cells', 'pDC'], dtype=object),
 array([ 1538,  5290,  1694,  1276,  1173,  3804,  5474,  4203,  1639,
         5294,  1090,   395,  1154,   524,   412, 10030,  7047,  5360,
         4851,  3116,  3276,  4124,  1444,  1387,  7206,  5903,   591,
         5636,   133]))

In [20]:
df_expr_matrix_per_cell_type = {}

for ct in np.unique(adata_hvg.obs['cell_type']):
    print(ct)
    curr_subset_of_barcodes = list(adata_hvg[adata_hvg.obs['cell_type'] == ct].obs_names)
    df_expr_matrix_per_cell_type[ct] = df_expr_matrix.loc[:,curr_subset_of_barcodes]
    print(len(curr_subset_of_barcodes), 'cells of this cell type')
    print('subsetted a table of shape', df_expr_matrix_per_cell_type[ct].shape, '\n')

B_cells_memory
1538 cells of this cell type
subsetted a table of shape (21749, 1538) 

B_cells_memory_activated
5290 cells of this cell type
subsetted a table of shape (21749, 5290) 

B_cells_naive
1694 cells of this cell type
subsetted a table of shape (21749, 1694) 

B_cells_naive_activated
1276 cells of this cell type
subsetted a table of shape (21749, 1276) 

MAIT_cells
1173 cells of this cell type
subsetted a table of shape (21749, 1173) 

Macrophages
3804 cells of this cell type
subsetted a table of shape (21749, 3804) 

Monocytes_classical
5474 cells of this cell type
subsetted a table of shape (21749, 5474) 

Monocytes_intermediate
4203 cells of this cell type
subsetted a table of shape (21749, 4203) 

Monocytes_non-classical
1639 cells of this cell type
subsetted a table of shape (21749, 1639) 

NK_CD16_bright
5294 cells of this cell type
subsetted a table of shape (21749, 5294) 

NK_CD16_bright_activated
1090 cells of this cell type
subsetted a table of shape (21749, 1090) 



In [21]:
# how many non-zero elements does this row vector contain? in proportion
df_expr_matrix_per_cell_type['T_regs'].astype(bool).sum(axis=1)/df_expr_matrix_per_cell_type['T_regs'].shape[1]

index
RP11-34P13.7     0.000139
FO538757.2       0.166805
AP006222.2       0.001249
RP4-669L17.10    0.001804
RP5-857K21.4     0.000278
                   ...   
CTD-2541M15.3    0.000000
THEGL            0.000000
KIAA1644         0.000000
RP11-132A1.3     0.000139
RP11-111H13.1    0.000555
Length: 21749, dtype: float64

In [22]:
df_percentage_expressed = pd.DataFrame(index = df_expr_matrix.index,
                                      columns=np.unique(adata_hvg.obs['cell_type']))

In [23]:
for col in df_percentage_expressed.columns:
    print(col)
    df_percentage_expressed.loc[:,col] = df_expr_matrix_per_cell_type[col].astype(bool).sum(axis=1)/df_expr_matrix_per_cell_type[col].shape[1]

B_cells_memory
B_cells_memory_activated
B_cells_naive
B_cells_naive_activated
MAIT_cells
Macrophages
Monocytes_classical
Monocytes_intermediate
Monocytes_non-classical
NK_CD16_bright
NK_CD16_bright_activated
NK_CD56_bright
NK_CD56_bright_activated
Plasma_cells
Precursor_cells
T4_activated
T4_memory
T4_naive
T8_activated
T8_naive
TCM_CD8+
TEM_CD8+
TMRA_CD8+
T_gd
T_regs
cDC1
cDC2
iNKT_cells
pDC


In [24]:
df_percentage_expressed

Unnamed: 0_level_0,B_cells_memory,B_cells_memory_activated,B_cells_naive,B_cells_naive_activated,MAIT_cells,Macrophages,Monocytes_classical,Monocytes_intermediate,Monocytes_non-classical,NK_CD16_bright,...,T8_naive,TCM_CD8+,TEM_CD8+,TMRA_CD8+,T_gd,T_regs,cDC1,cDC2,iNKT_cells,pDC
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
RP11-34P13.7,0.000000,0.000378,0.000590,0.000000,0.000000,0.000526,0.000183,0.000238,0.000000,0.000000,...,0.000321,0.000305,0.000000,0.000000,0.000721,0.000139,0.000847,0.000000,0.000177,0.000000
FO538757.2,0.118336,0.242155,0.086777,0.072884,0.071611,0.256835,0.220497,0.168689,0.205003,0.065168,...,0.068678,0.102259,0.056984,0.076870,0.075703,0.166805,0.197018,0.099831,0.091022,0.067669
AP006222.2,0.001951,0.003403,0.000590,0.000784,0.000853,0.006046,0.004202,0.004759,0.002441,0.000378,...,0.000642,0.001221,0.000485,0.000693,0.000721,0.001249,0.002710,0.000000,0.000532,0.000000
RP4-669L17.10,0.001951,0.002836,0.002361,0.000784,0.000853,0.001577,0.000365,0.000714,0.002441,0.000189,...,0.000963,0.000305,0.000485,0.002078,0.002163,0.001804,0.001355,0.000000,0.001065,0.000000
RP5-857K21.4,0.000650,0.000000,0.000000,0.000000,0.000000,0.000000,0.000548,0.000000,0.000000,0.000000,...,0.000321,0.000305,0.000000,0.000000,0.000000,0.000278,0.000339,0.000000,0.000177,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CTD-2541M15.3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
THEGL,0.000000,0.000000,0.000000,0.000000,0.000000,0.000263,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000169,0.000000,0.000000,0.000000
KIAA1644,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000610,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000508,0.006768,0.000000,0.000000
RP11-132A1.3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000139,0.000000,0.000000,0.000355,0.000000


In [25]:
df_expr_matrix

index,TGGCGCAGTCCGAACC-1,GTATTCTTCAACACGT-1,CTACATTCATTAACCG-1,GATCAGTAGAGGACGG-1,GACTGCGCATGAACCT-1,TACTCGCGTTTGCATG-1,GGGCATCGTCCCTTGT-1-1,CGCCAAGGTAGCTCCG-1,ACATGGTAGCTGATAA-1,CTTACCGTCGATCCCT-1,...,TCAGGATAGGGTGTTG-1,CTAGTGATCGATAGAA-1,TAGCCGGTCTTGTCAT-1,GAAATGAGTGCACGAA-1,CGGAGTCGTGTGACCC-1,CAGCTAACAGGATTGG-1,TTTGTCACATCCGCGA-1,TCGGTAACAGGTCCAC-1,CACACAAAGTTACGGG-1,CAAGTTGCAGGATTGG-1
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
RP11-34P13.7,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
FO538757.2,0.0,0.589136,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.968523,0.0,0.0,0.0,1.153735,0.0,0.0,0.0
AP006222.2,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
RP4-669L17.10,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
RP5-857K21.4,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CTD-2541M15.3,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
THEGL,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
KIAA1644,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
RP11-132A1.3,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0


In [27]:
# save 
df_percentage_expressed.to_csv(save_path + 'PercentExpressed_for_cellphone_20210218.csv')

In [28]:
save_path

'/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/'

# Creating a joint DE table out of limma outputs

In [29]:
# reading in limma tables for all the cell types

limma_DE_tables = {}

for ct in np.unique(adata_hvg.obs['cell_type']):
    print(ct)
    
    limma_DE_tables[ct] = pd.read_csv('/home/jovyan/notebooks/Vento_Lab/CVID/202009_new_analysis_revision/CITE_all_samples_analysis/CVID/scTranscriptomics_CITE/limma_DEG/validation_cohort_new_20210217/20210217_' + ct + '_CVID_vs_' + ct + '_CONTROL_limma_DEGs.csv')
    # adding 'cluster' column on the cell type this table is produced for
    limma_DE_tables[ct]['cluster'] = [ct]*len(limma_DE_tables[ct])

B_cells_memory
B_cells_memory_activated
B_cells_naive
B_cells_naive_activated
MAIT_cells
Macrophages
Monocytes_classical
Monocytes_intermediate
Monocytes_non-classical
NK_CD16_bright
NK_CD16_bright_activated
NK_CD56_bright
NK_CD56_bright_activated
Plasma_cells
Precursor_cells
T4_activated
T4_memory
T4_naive
T8_activated
T8_naive
TCM_CD8+
TEM_CD8+
TMRA_CD8+
T_gd
T_regs
cDC1
cDC2
iNKT_cells
pDC


In [30]:
limma_DE_tables['B_cells_memory']

Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_cluster,AveExpr_rest,percentExpr_cluster,percentExpr_rest,cluster
0,CD73(Ecto-5'-nucleotidase),-0.972167,1.004392e-34,1.603813e-30,1.075824,2.047991,0.532946,0.735178,B_cells_memory
1,IgD,0.892972,2.104098e-32,1.679912e-28,2.001557,1.108585,0.783915,0.565217,B_cells_memory
2,CD32,0.858412,1.258348e-22,6.697768e-19,4.160845,3.302433,0.936047,0.849802,B_cells_memory
3,IGHG3,0.405874,3.555500e-22,1.419356e-18,0.599779,0.193905,0.371124,0.128458,B_cells_memory
4,IGHM,0.439509,9.379230e-17,2.995351e-13,1.090960,0.651451,0.611434,0.371542,B_cells_memory
...,...,...,...,...,...,...,...,...,...
15963,IGHV4-28,-0.000011,9.994302e-01,9.996120e-01,0.042565,0.042576,0.025194,0.021739,B_cells_memory
15964,MXRA7,-0.000002,9.995573e-01,9.996120e-01,0.002382,0.002383,0.001938,0.001976,B_cells_memory
15965,TJAP1,-0.000007,9.995683e-01,9.996120e-01,0.044451,0.044458,0.036822,0.041502,B_cells_memory
15966,PYURF,-0.000017,9.996014e-01,9.996120e-01,0.378141,0.378158,0.288760,0.276680,B_cells_memory


In [31]:
# without any filtering
joint_DE_table = pd.concat(limma_DE_tables.values())

In [32]:
joint_DE_table

Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_cluster,AveExpr_rest,percentExpr_cluster,percentExpr_rest,cluster
0,CD73(Ecto-5'-nucleotidase),-0.972167,1.004392e-34,1.603813e-30,1.075824,2.047991,0.532946,0.735178,B_cells_memory
1,IgD,0.892972,2.104098e-32,1.679912e-28,2.001557,1.108585,0.783915,0.565217,B_cells_memory
2,CD32,0.858412,1.258348e-22,6.697768e-19,4.160845,3.302433,0.936047,0.849802,B_cells_memory
3,IGHG3,0.405874,3.555500e-22,1.419356e-18,0.599779,0.193905,0.371124,0.128458,B_cells_memory
4,IGHM,0.439509,9.379230e-17,2.995351e-13,1.090960,0.651451,0.611434,0.371542,B_cells_memory
...,...,...,...,...,...,...,...,...,...
11554,ABCF3,-0.000026,9.993354e-01,9.996097e-01,0.024162,0.024187,0.014085,0.032258,pDC
11555,DDX31,0.000032,9.993502e-01,9.996097e-01,0.038317,0.038285,0.028169,0.032258,pDC
11556,CACUL1,0.000019,9.997470e-01,9.998807e-01,0.087572,0.087553,0.056338,0.080645,pDC
11557,NUCB1,-0.000024,9.998530e-01,9.998807e-01,0.490545,0.490569,0.323944,0.338710,pDC


In [33]:
save_path

'/lustre/scratch117/cellgen/team292/aa22/adata_objects/202009_CVID_revision/'

In [34]:
joint_DE_table.to_csv(save_path + 'joint_DEGs_list_all_cell_types_for_cellphone_20210218.csv')