# Determine DEGs in Deseq2 Data

### 1. Import Required Packages
### 2. Import Data
### 3. Concat All Data Into One Dataframe


## <br> 1. Import Required Packages

In [15]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import gseapy as gp

Set figure parameters.

In [16]:
%config InlineBackend.print_figure_kwargs={'facecolor' : "w"}
%config InlineBackend.figure_format='retina'
pd.options.display.max_colwidth = 200
#plt.rcParams['font.sans-serif']=['Arial']
plt.rcParams['pdf.fonttype'] = 'truetype'

## <br> 2. Import Deseq2 Data

In [17]:
Deseq2_Master = pd.read_csv('../04_DEGs_Mining---WALD/Results/Deseq2_Master_Wald.txt', delimiter = '\t', index_col=0)

Deseq2_Master['Time'] = Deseq2_Master['Time'].astype('category')
Deseq2_Master['Time'] = Deseq2_Master['Time'].cat.reorder_categories([2,4,8,12,18,24,72])

In [18]:
Deseq2_Master

Unnamed: 0,Gene,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,Celltype,Time,Fold-Change
0,Gm42418,9721.280933,0.090394,0.450800,0.200520,0.841074,0.998335,B_Cell,2,1.064661
1,Malat1,3845.002291,-0.101792,0.159584,-0.637858,0.523566,0.998335,B_Cell,2,0.931875
2,Cmss1,1590.724138,0.309259,0.446208,0.693083,0.488257,0.998335,B_Cell,2,1.239071
3,Foxp1,795.056702,-0.041252,0.180903,-0.228032,0.819622,0.998335,B_Cell,2,0.971811
4,Camk1d,746.328996,0.123394,0.308227,0.400334,0.688911,0.998335,B_Cell,2,1.089294
...,...,...,...,...,...,...,...,...,...,...
626194,Tfpi,1.631608,-0.321517,1.136329,-0.282943,0.777220,0.999076,T_Cell,72,0.800228
626195,Tmem128,1.647822,-1.621279,1.074936,-1.508257,0.131489,0.999076,T_Cell,72,0.325047
626196,Wbp1,1.682889,-0.317524,1.098354,-0.289091,0.772512,0.999076,T_Cell,72,0.802446
626197,Zfp324,1.478046,-0.332261,1.205061,-0.275721,0.782762,0.999076,T_Cell,72,0.794291


In [19]:
Deseq2_Master['Celltype'].unique()

array(['B_Cell', 'Cholangiocyte', 'Hepatocyte', 'HSC', 'EC', 'Macrophage',
       'Neutrophil', 'pDC', 'PF', 'T_Cell'], dtype=object)

## <br> 3. Import all gene lists

#### Background Gene Lists

In [20]:
B_Cell_All_Genes = pd.read_csv('../04_DEGs_Mining---WALD/Results/04b_All_Genes_In_Celltype/B_Cell_All_Genes.txt', delimiter='\t', header=None, names=['Gene'])
B_Cell_All_Genes = B_Cell_All_Genes['Gene'].tolist()

Cholangiocyte_All_Genes = pd.read_csv('../04_DEGs_Mining---WALD/Results/04b_All_Genes_In_Celltype/Cholangiocyte_All_Genes.txt', delimiter='\t', header=None, names=['Gene'])
Cholangiocyte_All_Genes = Cholangiocyte_All_Genes['Gene'].tolist()

Hepatocyte_All_Genes = pd.read_csv('../04_DEGs_Mining---WALD/Results/04b_All_Genes_In_Celltype/Hepatocyte_All_Genes.txt', delimiter='\t', header=None, names=['Gene'])
Hepatocyte_All_Genes = Hepatocyte_All_Genes['Gene'].tolist()

HSC_All_Genes = pd.read_csv('../04_DEGs_Mining---WALD/Results/04b_All_Genes_In_Celltype/HSC_All_Genes.txt', delimiter='\t', header=None, names=['Gene'])
HSC_All_Genes = HSC_All_Genes['Gene'].tolist()

LSEC_All_Genes = pd.read_csv('../04_DEGs_Mining---WALD/Results/04b_All_Genes_In_Celltype/EC_All_Genes.txt', delimiter='\t', header=None, names=['Gene'])
LSEC_All_Genes = LSEC_All_Genes['Gene'].tolist()

Macrophage_All_Genes = pd.read_csv('../04_DEGs_Mining---WALD/Results/04b_All_Genes_In_Celltype/Macrophage_All_Genes.txt', delimiter='\t', header=None, names=['Gene'])
Macrophage_All_Genes = Macrophage_All_Genes['Gene'].tolist()

Neutrophil_All_Genes = pd.read_csv('../04_DEGs_Mining---WALD/Results/04b_All_Genes_In_Celltype/Neutrophil_All_Genes.txt', delimiter='\t', header=None, names=['Gene'])
Neutrophil_All_Genes = Neutrophil_All_Genes['Gene'].tolist()

pDC_All_Genes = pd.read_csv('../04_DEGs_Mining---WALD/Results/04b_All_Genes_In_Celltype/pDC_All_Genes.txt', delimiter='\t', header=None, names=['Gene'])
pDC_All_Genes = pDC_All_Genes['Gene'].tolist()

PF_All_Genes = pd.read_csv('../04_DEGs_Mining---WALD/Results/04b_All_Genes_In_Celltype/PF_All_Genes.txt', delimiter='\t', header=None, names=['Gene'])
PF_All_Genes = PF_All_Genes['Gene'].tolist()

T_Cell_All_Genes = pd.read_csv('../04_DEGs_Mining---WALD/Results/04b_All_Genes_In_Celltype/T_Cell_All_Genes.txt', delimiter='\t', header=None, names=['Gene'])
T_Cell_All_Genes = T_Cell_All_Genes['Gene'].tolist()

In [21]:
print("B_Cell_All_Genes:", len(B_Cell_All_Genes))
print("Cholangiocyte_All_Genes:", len(Cholangiocyte_All_Genes))
print("Hepatocyte_All_Genes:", len(Hepatocyte_All_Genes))
print("HSC_All_Genes:", len(HSC_All_Genes))
print("EC_All_Genes:", len(LSEC_All_Genes))
print("Macrophage_All_Genes:", len(Macrophage_All_Genes))
print("Neutrophil_All_Genes:", len(Neutrophil_All_Genes))
print("pDC_All_Genes:", len(pDC_All_Genes))
print("PF_All_Genes:", len(PF_All_Genes))
print("T_Cell_All_Genes:", len(T_Cell_All_Genes))

B_Cell_All_Genes: 9128
Cholangiocyte_All_Genes: 5574
Hepatocyte_All_Genes: 17250
HSC_All_Genes: 12232
EC_All_Genes: 13426
Macrophage_All_Genes: 11242
Neutrophil_All_Genes: 1649
pDC_All_Genes: 3049
PF_All_Genes: 6374
T_Cell_All_Genes: 9533


#### All DEGs

In [22]:
B_Cell_All_DEGs = pd.read_csv('../04_DEGs_Mining---WALD/Results/04b_All_DEGs/B_Cell.txt', delimiter='\t', header=None, names=['Gene'])
B_Cell_All_DEGs = B_Cell_All_DEGs['Gene'].tolist()

Cholangiocyte_All_DEGs = pd.read_csv('../04_DEGs_Mining---WALD/Results/04b_All_DEGs/Cholangiocyte.txt', delimiter='\t', header=None, names=['Gene'])
Cholangiocyte_All_DEGs = Cholangiocyte_All_DEGs['Gene'].tolist()

Hepatocyte_All_DEGs = pd.read_csv('../04_DEGs_Mining---WALD/Results/04b_All_DEGs/Hepatocyte.txt', delimiter='\t', header=None, names=['Gene'])
Hepatocyte_All_DEGs = Hepatocyte_All_DEGs['Gene'].tolist()

HSC_All_DEGs = pd.read_csv('../04_DEGs_Mining---WALD/Results/04b_All_DEGs/HSC.txt', delimiter='\t', header=None, names=['Gene'])
HSC_All_DEGs = HSC_All_DEGs['Gene'].tolist()

LSEC_All_DEGs = pd.read_csv('../04_DEGs_Mining---WALD/Results/04b_All_DEGs/EC.txt', delimiter='\t', header=None, names=['Gene'])
LSEC_All_DEGs = LSEC_All_DEGs['Gene'].tolist()

Macrophage_All_DEGs = pd.read_csv('../04_DEGs_Mining---WALD/Results/04b_All_DEGs/Macrophage.txt', delimiter='\t', header=None, names=['Gene'])
Macrophage_All_DEGs = Macrophage_All_DEGs['Gene'].tolist()

Neutrophil_All_DEGs = pd.read_csv('../04_DEGs_Mining---WALD/Results/04b_All_DEGs/Neutrophil.txt', delimiter='\t', header=None, names=['Gene'])
Neutrophil_All_DEGs = Neutrophil_All_DEGs['Gene'].tolist()

pDC_All_DEGs = pd.read_csv('../04_DEGs_Mining---WALD/Results/04b_All_DEGs/pDC.txt', delimiter='\t', header=None, names=['Gene'])
pDC_All_DEGs = pDC_All_DEGs['Gene'].tolist()

PF_All_DEGs = pd.read_csv('../04_DEGs_Mining---WALD/Results/04b_All_DEGs/PF.txt', delimiter='\t', header=None, names=['Gene'])
PF_All_DEGs = PF_All_DEGs['Gene'].tolist()

T_Cell_All_DEGs = pd.read_csv('../04_DEGs_Mining---WALD/Results/04b_All_DEGs/T_Cell.txt', delimiter='\t', header=None, names=['Gene'])
T_Cell_All_DEGs = T_Cell_All_DEGs['Gene'].tolist()

In [23]:
print("B_Cell_All_DEGs:", len(B_Cell_All_DEGs))
print("Cholangiocyte_All_DEGs:", len(Cholangiocyte_All_DEGs))
print("Hepatocyte_All_DEGs:", len(Hepatocyte_All_DEGs))
print("HSC_All_DEGs:", len(HSC_All_DEGs))
print("EC_All_DEGs:", len(LSEC_All_DEGs))
print("Macrophage_All_DEGs:", len(Macrophage_All_DEGs))
print("Neutrophil_All_DEGs:", len(Neutrophil_All_DEGs))
print("pDC_All_DEGs:", len(pDC_All_DEGs))
print("PF_All_DEGs:", len(PF_All_DEGs))
print("T_Cell_All_DEGs:", len(T_Cell_All_DEGs))

B_Cell_All_DEGs: 128
Cholangiocyte_All_DEGs: 14
Hepatocyte_All_DEGs: 4785
HSC_All_DEGs: 373
EC_All_DEGs: 710
Macrophage_All_DEGs: 161
Neutrophil_All_DEGs: 0
pDC_All_DEGs: 5
PF_All_DEGs: 33
T_Cell_All_DEGs: 140


#### Unique to Celltype DEGs

In [24]:
Unique_to_B_Cell = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs/Unique_to_B_Cell.txt', delimiter='\t', header=None, names=['Gene'])
Unique_to_B_Cell = Unique_to_B_Cell['Gene'].tolist()

Unique_to_Cholangiocyte = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs/Unique_to_Cholangiocyte.txt', delimiter='\t', header=None, names=['Gene'])
Unique_to_Cholangiocyte = Unique_to_Cholangiocyte['Gene'].tolist()

Unique_to_Hepatocyte = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs/Unique_to_Hepatocyte.txt', delimiter='\t', header=None, names=['Gene'])
Unique_to_Hepatocyte = Unique_to_Hepatocyte['Gene'].tolist()

Unique_to_HSC = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs/Unique_to_HSC.txt', delimiter='\t', header=None, names=['Gene'])
Unique_to_HSC = Unique_to_HSC['Gene'].tolist()

Unique_to_LSEC = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs/Unique_to_EC.txt', delimiter='\t', header=None, names=['Gene'])
Unique_to_LSEC = Unique_to_LSEC['Gene'].tolist()

Unique_to_Macrophage = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs/Unique_to_Macrophage.txt', delimiter='\t', header=None, names=['Gene'])
Unique_to_Macrophage = Unique_to_Macrophage['Gene'].tolist()

Unique_to_Neutrophil = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs/Unique_to_Neutrophil.txt', delimiter='\t', header=None, names=['Gene'])
Unique_to_Neutrophil = Unique_to_Neutrophil['Gene'].tolist()

Unique_to_pDC = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs/Unique_to_pDC.txt', delimiter='\t', header=None, names=['Gene'])
Unique_to_pDC = Unique_to_pDC['Gene'].tolist()

Unique_to_PF = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs/Unique_to_PF.txt', delimiter='\t', header=None, names=['Gene'])
Unique_to_PF = Unique_to_PF['Gene'].tolist()

Unique_to_T_Cell = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs/Unique_to_T_Cell.txt', delimiter='\t', header=None, names=['Gene'])
Unique_to_T_Cell = Unique_to_T_Cell['Gene'].tolist()



In [25]:
print("Unique_to_B_Cell:", len(Unique_to_B_Cell))
print("Unique_to_Cholangiocyte:", len(Unique_to_Cholangiocyte))
print("Unique_to_Hepatocyte:", len(Unique_to_Hepatocyte))
print("Unique_to_HSC:", len(Unique_to_HSC))
print("Unique_to_EC:", len(Unique_to_LSEC))
print("Unique_to_Macrophage:", len(Unique_to_Macrophage))
print("Unique_to_Neutrophil:", len(Unique_to_Neutrophil))
print("Unique_to_pDC:", len(Unique_to_pDC))
print("Unique_to_PF:", len(Unique_to_PF))
print("Unique_to_T_Cell:", len(Unique_to_T_Cell))

Unique_to_B_Cell: 5
Unique_to_Cholangiocyte: 2
Unique_to_Hepatocyte: 4071
Unique_to_HSC: 86
Unique_to_EC: 226
Unique_to_Macrophage: 6
Unique_to_Neutrophil: 0
Unique_to_pDC: 0
Unique_to_PF: 7
Unique_to_T_Cell: 9


#### Unique to Celltype DEGs with DREs

In [26]:
Unique_with_DREs_to_B_Cell = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs_with_DREs/Unique_with_DREs_to_B_Cell.txt', delimiter='\t', header=None, names=['Gene'])
Unique_with_DREs_to_B_Cell = Unique_with_DREs_to_B_Cell['Gene'].tolist()

Unique_with_DREs_to_Cholangiocyte = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs_with_DREs/Unique_with_DREs_to_Cholangiocyte.txt', delimiter='\t', header=None, names=['Gene'])
Unique_with_DREs_to_Cholangiocyte = Unique_with_DREs_to_Cholangiocyte['Gene'].tolist()

Unique_with_DREs_to_Hepatocyte = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs_with_DREs/Unique_with_DREs_to_Hepatocyte.txt', delimiter='\t', header=None, names=['Gene'])
Unique_with_DREs_to_Hepatocyte = Unique_with_DREs_to_Hepatocyte['Gene'].tolist()

Unique_with_DREs_to_HSC = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs_with_DREs/Unique_with_DREs_to_HSC.txt', delimiter='\t', header=None, names=['Gene'])
Unique_with_DREs_to_HSC = Unique_with_DREs_to_HSC['Gene'].tolist()

Unique_with_DREs_to_LSEC = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs_with_DREs/Unique_with_DREs_to_EC.txt', delimiter='\t', header=None, names=['Gene'])
Unique_with_DREs_to_LSEC = Unique_with_DREs_to_LSEC['Gene'].tolist()

Unique_with_DREs_to_Macrophage = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs_with_DREs/Unique_with_DREs_to_Macrophage.txt', delimiter='\t', header=None, names=['Gene'])
Unique_with_DREs_to_Macrophage = Unique_with_DREs_to_Macrophage['Gene'].tolist()

Unique_with_DREs_to_Neutrophil = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs_with_DREs/Unique_with_DREs_to_Neutrophil.txt', delimiter='\t', header=None, names=['Gene'])
Unique_with_DREs_to_Neutrophil = Unique_with_DREs_to_Neutrophil['Gene'].tolist()

Unique_with_DREs_to_pDC = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs_with_DREs/Unique_with_DREs_to_pDC.txt', delimiter='\t', header=None, names=['Gene'])
Unique_with_DREs_to_pDC = Unique_with_DREs_to_pDC['Gene'].tolist()

Unique_with_DREs_to_PF = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs_with_DREs/Unique_with_DREs_to_PF.txt', delimiter='\t', header=None, names=['Gene'])
Unique_with_DREs_to_PF = Unique_with_DREs_to_PF['Gene'].tolist()

Unique_with_DREs_to_T_Cell = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs_with_DREs/Unique_with_DREs_to_T_Cell.txt', delimiter='\t', header=None, names=['Gene'])
Unique_with_DREs_to_T_Cell = Unique_with_DREs_to_T_Cell['Gene'].tolist()



In [27]:
print("Unique_with_DREs_to_B_Cell:", len(Unique_with_DREs_to_B_Cell))
print("Unique_with_DREs_to_Cholangiocyte:", len(Unique_with_DREs_to_Cholangiocyte))
print("Unique_with_DREs_to_Hepatocyte:", len(Unique_with_DREs_to_Hepatocyte))
print("Unique_with_DREs_to_HSC:", len(Unique_with_DREs_to_HSC))
print("Unique_with_DREs_to_EC:", len(Unique_with_DREs_to_LSEC))
print("Unique_with_DREs_to_Macrophage:", len(Unique_with_DREs_to_Macrophage))
print("Unique_with_DREs_to_Neutrophil:", len(Unique_with_DREs_to_Neutrophil))
print("Unique_with_DREs_to_pDC:", len(Unique_with_DREs_to_pDC))
print("Unique_with_DREs_to_PF:", len(Unique_with_DREs_to_PF))
print("Unique_with_DREs_to_T_Cell:", len(Unique_with_DREs_to_T_Cell))

Unique_with_DREs_to_B_Cell: 5
Unique_with_DREs_to_Cholangiocyte: 2
Unique_with_DREs_to_Hepatocyte: 2916
Unique_with_DREs_to_HSC: 79
Unique_with_DREs_to_EC: 183
Unique_with_DREs_to_Macrophage: 6
Unique_with_DREs_to_Neutrophil: 0
Unique_with_DREs_to_pDC: 0
Unique_with_DREs_to_PF: 4
Unique_with_DREs_to_T_Cell: 9


#### Unique to Celltype DEGs with AHR

In [28]:
Unique_with_AHR_to_B_Cell = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs_with_AHR/Unique_with_AHR_Binding_to_B_Cell.txt', delimiter='\t', header=None, names=['Gene'])
Unique_with_AHR_to_B_Cell = Unique_with_AHR_to_B_Cell['Gene'].tolist()

Unique_with_AHR_to_Cholangiocyte = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs_with_AHR/Unique_with_AHR_Binding_to_Cholangiocyte.txt', delimiter='\t', header=None, names=['Gene'])
Unique_with_AHR_to_Cholangiocyte = Unique_with_AHR_to_Cholangiocyte['Gene'].tolist()

Unique_with_AHR_to_Hepatocyte = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs_with_AHR/Unique_with_AHR_Binding_to_Hepatocyte.txt', delimiter='\t', header=None, names=['Gene'])
Unique_with_AHR_to_Hepatocyte = Unique_with_AHR_to_Hepatocyte['Gene'].tolist()

Unique_with_AHR_to_HSC = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs_with_AHR/Unique_with_AHR_Binding_to_HSC.txt', delimiter='\t', header=None, names=['Gene'])
Unique_with_AHR_to_HSC = Unique_with_AHR_to_HSC['Gene'].tolist()

Unique_with_AHR_to_LSEC = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs_with_AHR/Unique_with_AHR_Binding_to_EC.txt', delimiter='\t', header=None, names=['Gene'])
Unique_with_AHR_to_LSEC = Unique_with_AHR_to_LSEC['Gene'].tolist()

Unique_with_AHR_to_Macrophage = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs_with_AHR/Unique_with_AHR_Binding_to_Macrophage.txt', delimiter='\t', header=None, names=['Gene'])
Unique_with_AHR_to_Macrophage = Unique_with_AHR_to_Macrophage['Gene'].tolist()

Unique_with_AHR_to_Neutrophil = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs_with_AHR/Unique_with_AHR_Binding_to_Neutrophil.txt', delimiter='\t', header=None, names=['Gene'])
Unique_with_AHR_to_Neutrophil = Unique_with_AHR_to_Neutrophil['Gene'].tolist()

Unique_with_AHR_to_pDC = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs_with_AHR/Unique_with_AHR_Binding_to_pDC.txt', delimiter='\t', header=None, names=['Gene'])
Unique_with_AHR_to_pDC = Unique_with_AHR_to_pDC['Gene'].tolist()

Unique_with_AHR_to_PF = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs_with_AHR/Unique_with_AHR_Binding_to_PF.txt', delimiter='\t', header=None, names=['Gene'])
Unique_with_AHR_to_PF = Unique_with_AHR_to_PF['Gene'].tolist()

Unique_with_AHR_to_T_Cell = pd.read_csv('../04_DEGs_Mining---WALD/Results/04d_Unique_to_Celltypes_DEGs_with_AHR/Unique_with_AHR_Binding_to_T_Cell.txt', delimiter='\t', header=None, names=['Gene'])
Unique_with_AHR_to_T_Cell = Unique_with_AHR_to_T_Cell['Gene'].tolist()



In [29]:
print("Unique_with_AHR_to_B_Cell:", len(Unique_with_AHR_to_B_Cell))
print("Unique_with_AHR_to_Cholangiocyte:", len(Unique_with_AHR_to_Cholangiocyte))
print("Unique_with_AHR_to_Hepatocyte:", len(Unique_with_AHR_to_Hepatocyte))
print("Unique_with_AHR_to_HSC:", len(Unique_with_AHR_to_HSC))
print("Unique_with_AHR_to_EC:", len(Unique_with_AHR_to_LSEC))
print("Unique_with_AHR_to_Macrophage:", len(Unique_with_AHR_to_Macrophage))
print("Unique_with_AHR_to_Neutrophil:", len(Unique_with_AHR_to_Neutrophil))
print("Unique_with_AHR_to_pDC:", len(Unique_with_AHR_to_pDC))
print("Unique_with_AHR_to_PF:", len(Unique_with_AHR_to_PF))
print("Unique_with_AHR_to_T_Cell:", len(Unique_with_AHR_to_T_Cell))

Unique_with_AHR_to_B_Cell: 5
Unique_with_AHR_to_Cholangiocyte: 1
Unique_with_AHR_to_Hepatocyte: 1550
Unique_with_AHR_to_HSC: 40
Unique_with_AHR_to_EC: 102
Unique_with_AHR_to_Macrophage: 2
Unique_with_AHR_to_Neutrophil: 0
Unique_with_AHR_to_pDC: 0
Unique_with_AHR_to_PF: 3
Unique_with_AHR_to_T_Cell: 5


## <br> 5. Enrichr Analysis

### 5.1 All DEGs

In [30]:
Enrichr_All_DEGs_B_Cell = gp.enrichr(gene_list=B_Cell_All_DEGs,
                            background=B_Cell_All_Genes,
                            gene_sets="TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt",
                            organism='mouse',
                            outdir="./Results/All_DEGs/B_Cell_Wald_Enrichr",
                            verbose=True, # see what's going on behind the scenes
                            )

Enrichr_All_DEGs_Cholangiocyte = gp.enrichr(gene_list=Cholangiocyte_All_DEGs,
                            background=Cholangiocyte_All_Genes,
                            gene_sets="TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt",
                            organism='mouse',
                            outdir="./Results/All_DEGs/Cholangiocyte_Wald_Enrichr",
                            verbose=True, # see what's going on behind the scenes
                            )

Enrichr_All_DEGs_Hepatocyte = gp.enrichr(gene_list=Hepatocyte_All_DEGs,
                            background=Hepatocyte_All_Genes,
                            gene_sets="TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt",
                            organism='mouse',
                            outdir="./Results/All_DEGs/Hepatocyte_Wald_Enrichr",
                            verbose=True, # see what's going on behind the scenes
                            )

Enrichr_All_DEGs_HSC = gp.enrichr(gene_list=HSC_All_DEGs,
                            background=HSC_All_Genes,
                            gene_sets="TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt",
                            organism='mouse',
                            outdir="./Results/All_DEGs/HSC_Wald_Enrichr",
                            verbose=True, # see what's going on behind the scenes
                            )

Enrichr_All_DEGs_LSEC = gp.enrichr(gene_list=LSEC_All_DEGs,
                            background=LSEC_All_Genes,
                            gene_sets="TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt",
                            organism='mouse',
                            outdir="./Results/All_DEGs/EC_Wald_Enrichr",
                            verbose=True, # see what's going on behind the scenes
                            )

Enrichr_All_DEGs_Macrophage = gp.enrichr(gene_list=Macrophage_All_DEGs,
                            background=Macrophage_All_Genes,
                            gene_sets="TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt",
                            organism='mouse',
                            outdir="./Results/All_DEGs/Macrophage_Wald_Enrichr",
                            verbose=True, # see what's going on behind the scenes
                            )

# Enrichr_All_DEGs_Neutrophil = gp.enrichr(gene_list=Neutrophil_All_DEGs,
#                             background=Neutrophil_All_Genes,
#                             gene_sets="TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt",
#                             organism='mouse',
#                             outdir="./Results/All_DEGs/Neutrophil_Wald_Enrichr",
#                             verbose=True, # see what's going on behind the scenes
#                             )

Enrichr_All_DEGs_pDC = gp.enrichr(gene_list=pDC_All_DEGs,
                            background=pDC_All_Genes,
                            gene_sets="TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt",
                            organism='mouse',
                            outdir="./Results/All_DEGs/pDC_Wald_Enrichr",
                            verbose=True, # see what's going on behind the scenes
                            )

Enrichr_All_DEGs_PF = gp.enrichr(gene_list=PF_All_DEGs,
                            background=PF_All_Genes,
                            gene_sets="TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt",
                            organism='mouse',
                            outdir="./Results/All_DEGs/PF_Wald_Enrichr",
                            verbose=True, # see what's going on behind the scenes
                            )


Enrichr_All_DEGs_T_Cell = gp.enrichr(gene_list=T_Cell_All_DEGs,
                            background=T_Cell_All_Genes,
                            gene_sets="TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt",
                            organism='mouse',
                            outdir="./Results/All_DEGs/T_Cell_Wald_Enrichr",
                            verbose=True, # see what's going on behind the scenes
                            )

2023-09-21 11:53:28,257 [INFO] User defined gene sets is given: TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt
2023-09-21 11:53:28,392 [INFO] Run: TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt 
2023-09-21 11:53:30,269 [INFO] Save enrichment results for TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt 
2023-09-21 11:53:31,509 [INFO] Done.
2023-09-21 11:53:31,511 [INFO] User defined gene sets is given: TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt
2023-09-21 11:53:31,699 [INFO] Run: TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt 
2023-09-21 11:53:32,151 [INFO] Save enrichment results for TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt 
2023-09-21 11:53:33,308 [INFO] Done.
2023-09-21 11:53:33,320 [INFO] User defined gene sets is given: TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt
2023-09-21 11:53:33,415 [INFO] Run: TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt 
2023-09-21 11:53:47,535 [INFO] Save enrichment results for TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt 
2023-09-21 11:53:48,318 [INFO] Done.
2023-09-21 11:53:48,321 [INFO] User d

In [31]:
Enrichr_All_DEGs_B_Cell_RESULTS = Enrichr_All_DEGs_B_Cell.res2d
Enrichr_All_DEGs_Cholangiocyte_RESULTS = Enrichr_All_DEGs_Cholangiocyte.res2d
Enrichr_All_DEGs_Hepatocyte_RESULTS = Enrichr_All_DEGs_Hepatocyte.res2d
Enrichr_All_DEGs_HSC_RESULTS = Enrichr_All_DEGs_HSC.res2d
Enrichr_All_DEGs_LSEC_RESULTS = Enrichr_All_DEGs_LSEC.res2d
Enrichr_All_DEGs_Macrophage_RESULTS = Enrichr_All_DEGs_Macrophage.res2d
Enrichr_All_DEGs_pDC_RESULTS = Enrichr_All_DEGs_pDC.res2d
Enrichr_All_DEGs_PF_RESULTS = Enrichr_All_DEGs_PF.res2d
Enrichr_All_DEGs_T_Cell_RESULTS = Enrichr_All_DEGs_T_Cell.res2d

Enrichr_All_DEGs_B_Cell_RESULTS['Celltype'] = 'B_Cell'
Enrichr_All_DEGs_Cholangiocyte_RESULTS['Celltype'] = 'Cholangiocyte'
Enrichr_All_DEGs_Hepatocyte_RESULTS['Celltype'] = 'Hepatocyte'
Enrichr_All_DEGs_HSC_RESULTS['Celltype'] = 'HSC'
Enrichr_All_DEGs_LSEC_RESULTS['Celltype'] = 'EC'
Enrichr_All_DEGs_Macrophage_RESULTS['Celltype'] = 'Macrophage'
Enrichr_All_DEGs_pDC_RESULTS['Celltype'] = 'pDC'
Enrichr_All_DEGs_PF_RESULTS['Celltype'] = 'PF'
Enrichr_All_DEGs_T_Cell_RESULTS['Celltype'] = 'T_Cell'

# Concatenate all the DataFrames into one
Enrichr_All_DEGs_MASTER_Wald = pd.concat([Enrichr_All_DEGs_B_Cell_RESULTS,
                               Enrichr_All_DEGs_Cholangiocyte_RESULTS,
                               Enrichr_All_DEGs_Hepatocyte_RESULTS,
                               Enrichr_All_DEGs_HSC_RESULTS,
                               Enrichr_All_DEGs_LSEC_RESULTS,
                               Enrichr_All_DEGs_Macrophage_RESULTS,
                               Enrichr_All_DEGs_pDC_RESULTS,
                               Enrichr_All_DEGs_PF_RESULTS,  
                               Enrichr_All_DEGs_T_Cell_RESULTS], ignore_index=True)

# Remove the "Gene_set" column from the DataFrame
Enrichr_All_DEGs_MASTER_Wald = Enrichr_All_DEGs_MASTER_Wald.drop("Gene_set", axis=1)

# Remove the string "Ensembl" from every term
Enrichr_All_DEGs_MASTER_Wald['Term'] = Enrichr_All_DEGs_MASTER_Wald['Term'].str.replace('_Ensembl', '')

#Esport Data
output_file_path = './Results/Enrichr_All_DEGs_MASTER.txt'
Enrichr_All_DEGs_MASTER_Wald.to_csv(output_file_path, sep='\t', index=True, header=True)

# Print the modified DataFrame
Enrichr_All_DEGs_MASTER_Wald

Unnamed: 0,Term,Overlap,P-value,Adjusted P-value,Odds Ratio,Combined Score,Genes,Celltype
0,BIOCARTA_MM_ACUTE_MYOCARDIAL_INFARCTION,1/8,0.106867,0.239933,12.623529,28.228344,Fga,B_Cell
1,BIOCARTA_MM_AKT_SIGNALING_PATHWAY,1/15,0.191031,0.317764,6.917268,11.450299,Ghr,B_Cell
2,BIOCARTA_MM_CARM1_AND_REGULATION_OF_THE_ESTROGEN_RECEPTOR,1/22,0.267319,0.383303,4.761569,6.281992,Esr1,B_Cell
3,BIOCARTA_MM_CD40L_SIGNALING_PATHWAY,1/12,0.155970,0.285197,8.580235,15.942858,Traf3,B_Cell
4,BIOCARTA_MM_DOWNREGULATED_OF_MTA-3_IN_ER-NEGATIVE_BREAST_TUMORS,1/9,0.119402,0.252348,11.293498,24.001581,Esr1,B_Cell
...,...,...,...,...,...,...,...,...
44528,WIKIPATHWAYS_MM_TNF_ALPHA_SIGNALING_PATHWAY-WP231,1/70,0.646342,0.689262,1.443377,0.629927,Ikbkg,T_Cell
44529,WIKIPATHWAYS_MM_TOLL-LIKE_RECEPTOR_SIGNALING_PATHWAY-WP75,1/51,0.530709,0.592917,1.979852,1.254319,Ikbkg,T_Cell
44530,WIKIPATHWAYS_MM_TRIACYLGLYCERIDE_SYNTHESIS-WP325,1/17,0.222540,0.363021,5.847312,8.786449,Plpp3,T_Cell
44531,WIKIPATHWAYS_MM_TRYPTOPHAN_METABOLISM-WP465,4/34,0.001468,0.045283,9.077401,59.220959,Tdo2;Aox1;Cyp1a2;Cyp1a1,T_Cell


### 5.2 Unique to Celltype DEGs

In [32]:
Enrichr_Unique_to_B_Cell = gp.enrichr(gene_list=Unique_to_B_Cell,
                            background=B_Cell_All_Genes,
                            gene_sets="TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt",
                            organism='mouse',
                            outdir="./Results/Unique_to_Celltype/B_Cell_Wald_Enrichr",
                            verbose=True, # see what's going on behind the scenes
                            )

Enrichr_Unique_to_Cholangiocyte = gp.enrichr(gene_list=Unique_to_Cholangiocyte,
                            background=Cholangiocyte_All_Genes,
                            gene_sets="TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt",
                            organism='mouse',
                            outdir="./Results/Unique_to_Celltype/Cholangiocyte_Wald_Enrichr",
                            verbose=True, # see what's going on behind the scenes
                            )

Enrichr_Unique_to_Hepatocyte = gp.enrichr(gene_list=Unique_to_Hepatocyte,
                            background=Hepatocyte_All_Genes,
                            gene_sets="TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt",
                            organism='mouse',
                            outdir="./Results/Unique_to_Celltype/Hepatocyte_Wald_Enrichr",
                            verbose=True, # see what's going on behind the scenes
                            )

Enrichr_Unique_to_HSC = gp.enrichr(gene_list=Unique_to_HSC,
                            background=HSC_All_Genes,
                            gene_sets="TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt",
                            organism='mouse',
                            outdir="./Results/Unique_to_Celltype/HSC_Wald_Enrichr",
                            verbose=True, # see what's going on behind the scenes
                            )

Enrichr_Unique_to_LSEC = gp.enrichr(gene_list=Unique_to_LSEC,
                            background=LSEC_All_Genes,
                            gene_sets="TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt",
                            organism='mouse',
                            outdir="./Results/Unique_to_Celltype/EC_Wald_Enrichr",
                            verbose=True, # see what's going on behind the scenes
                            )

Enrichr_Unique_to_Macrophage = gp.enrichr(gene_list=Unique_to_Macrophage,
                            background=Macrophage_All_Genes,
                            gene_sets="TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt",
                            organism='mouse',
                            outdir="./Results/Unique_to_Celltype/Macrophage_Wald_Enrichr",
                            verbose=True, # see what's going on behind the scenes
                            )

# Enrichr_Unique_to_Neutrophil = gp.enrichr(gene_list=Unique_to_Neutrophil,
#                             background=Neutrophil_All_Genes,
#                             gene_sets="TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt",
#                             organism='mouse',
#                             outdir="./Results/Unique_to_Celltype/Neutrophil_Wald_Enrichr",
#                             verbose=True, # see what's going on behind the scenes
#                             )

# Enrichr_Unique_to_pDC = gp.enrichr(gene_list=Unique_to_pDC,
#                             background=pDC_All_Genes,
#                             gene_sets="TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt",
#                             organism='mouse',
#                             outdir="./Results/Unique_to_Celltype/pDC_Wald_Enrichr",
#                             verbose=True, # see what's going on behind the scenes
#                             )

# Enrichr_Unique_to_PF = gp.enrichr(gene_list=Unique_to_PF,
#                             background=PF_All_Genes,
#                             gene_sets="TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt",
#                             organism='mouse',
#                             outdir="./Results/Unique_to_Celltype/PF_Wald_Enrichr",
#                             verbose=True, # see what's going on behind the scenes
#                             )


# Enrichr_Unique_to_T_Cell = gp.enrichr(gene_list=Unique_to_T_Cell,
#                             background=T_Cell_All_Genes,
#                             gene_sets="TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt",
#                             organism='mouse',
#                             outdir="./Results/Unique_to_Celltype/T_Cell_Wald_Enrichr",
#                             verbose=True, # see what's going on behind the scenes
#                             )

2023-09-21 11:54:11,733 [INFO] User defined gene sets is given: TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt
2023-09-21 11:54:11,826 [INFO] Run: TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt 
2023-09-21 11:54:12,223 [INFO] Save enrichment results for TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt 
2023-09-21 11:54:13,414 [INFO] Done.
2023-09-21 11:54:13,427 [INFO] User defined gene sets is given: TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt
2023-09-21 11:54:13,597 [INFO] Run: TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt 
2023-09-21 11:54:13,696 [INFO] Save enrichment results for TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt 
2023-09-21 11:54:14,755 [INFO] Done.
2023-09-21 11:54:14,757 [INFO] User defined gene sets is given: TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt
2023-09-21 11:54:14,849 [INFO] Run: TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt 
2023-09-21 11:54:27,891 [INFO] Save enrichment results for TZ_mGSKB-parsed_Hep-Secretome_Symbol.gmt 
2023-09-21 11:54:28,886 [INFO] Done.
2023-09-21 11:54:28,888 [INFO] User d

In [33]:
Enrichr_Unique_to_B_Cell_RESULTS = Enrichr_Unique_to_B_Cell.res2d
Enrichr_Unique_to_Cholangiocyte_RESULTS = Enrichr_Unique_to_Cholangiocyte.res2d
Enrichr_Unique_to_Hepatocyte_RESULTS = Enrichr_Unique_to_Hepatocyte.res2d
Enrichr_Unique_to_HSC_RESULTS = Enrichr_Unique_to_HSC.res2d
Enrichr_Unique_to_LSEC_RESULTS = Enrichr_Unique_to_LSEC.res2d
Enrichr_Unique_to_Macrophage_RESULTS = Enrichr_Unique_to_Macrophage.res2d
# Enrichr_Unique_to_pDC_RESULTS = Enrichr_Unique_to_pDC.res2d
# Enrichr_Unique_to_PF_RESULTS = Enrichr_Unique_to_PF.res2d
# Enrichr_Unique_to_T_Cell_RESULTS = Enrichr_Unique_to_T_Cell.res2d

Enrichr_Unique_to_B_Cell_RESULTS['Celltype'] = 'B_Cell'
Enrichr_Unique_to_Cholangiocyte_RESULTS['Celltype'] = 'Cholangiocyte'
Enrichr_Unique_to_Hepatocyte_RESULTS['Celltype'] = 'Hepatocyte'
Enrichr_Unique_to_HSC_RESULTS['Celltype'] = 'HSC'
Enrichr_Unique_to_LSEC_RESULTS['Celltype'] = 'EC'
Enrichr_Unique_to_Macrophage_RESULTS['Celltype'] = 'Macrophage'
# Enrichr_Unique_to_pDC_RESULTS['Celltype'] = 'pDC'
# Enrichr_Unique_to_PF_RESULTS['Celltype'] = 'PF'
# Enrichr_Unique_to_T_Cell_RESULTS['Celltype'] = 'T_Cell'

# Concatenate all the DataFrames into one
Enrichr_Unique_to_Celltype_MASTER_Wald = pd.concat([Enrichr_Unique_to_B_Cell_RESULTS,
                               Enrichr_Unique_to_Cholangiocyte_RESULTS,
                               Enrichr_Unique_to_Hepatocyte_RESULTS,
                               Enrichr_Unique_to_HSC_RESULTS,
                               Enrichr_Unique_to_LSEC_RESULTS,
                               Enrichr_Unique_to_Macrophage_RESULTS], ignore_index=True)

# Remove the "Gene_set" column from the DataFrame
Enrichr_Unique_to_Celltype_MASTER_Wald = Enrichr_Unique_to_Celltype_MASTER_Wald.drop("Gene_set", axis=1)

# Remove the string "Ensembl" from every term
Enrichr_Unique_to_Celltype_MASTER_Wald['Term'] = Enrichr_Unique_to_Celltype_MASTER_Wald['Term'].str.replace('_Ensembl', '')

#Esport Data
output_file_path = './Results/Enrichr_Unique_to_Celltype_MASTER.txt'
Enrichr_Unique_to_Celltype_MASTER_Wald.to_csv(output_file_path, sep='\t', index=True, header=True)

# Print the modified DataFrame
Enrichr_Unique_to_Celltype_MASTER_Wald

Unnamed: 0,Term,Overlap,P-value,Adjusted P-value,Odds Ratio,Combined Score,Genes,Celltype
0,BIOCARTA_MM_CARM1_AND_REGULATION_OF_THE_ESTROGEN_RECEPTOR,1/22,0.011995,0.023200,134.911111,596.742032,Esr1,B_Cell
1,BIOCARTA_MM_DOWNREGULATED_OF_MTA-3_IN_ER-NEGATIVE_BREAST_TUMORS,1/9,0.004921,0.017719,319.982456,1700.448411,Esr1,B_Cell
2,BIOCARTA_MM_ROLE_OF_ERBB2_IN_SIGNAL_TRANSDUCTION_AND_ONCOLOGY,1/17,0.009279,0.020510,173.552381,812.217697,Esr1,B_Cell
3,EHMN_MM_BILE_ACID_BIOSYNTHESIS,1/30,0.016329,0.026594,99.437158,409.166213,Scp2,B_Cell
4,EHMN_MM_GLYCOSPHINGOLIPID_BIOSYNTHESIS_GANGLIOSERIES,1/9,0.004921,0.017719,319.982456,1700.448411,St3gal5,B_Cell
...,...,...,...,...,...,...,...,...
22638,TF_MM_FRIARD_MEIS1,1/192,0.098217,0.146378,15.655962,36.330872,St18,Macrophage
22639,TZ_DIURNAL_GENES,1/4551,0.955589,0.955589,0.400957,0.018214,Usp12,Macrophage
22640,TZ_SCS_MACROPHAGES,4/896,0.000528,0.025885,20.773787,156.780397,Amz1;Usp12;Mgat4a;Tbxas1,Macrophage
22641,WIKIPATHWAYS_MM_EICOSANOID_SYNTHESIS-WP167,1/13,0.006920,0.058022,226.858586,1128.252161,Tbxas1,Macrophage
