# Determine DEGs in Deseq2 LRT Data

### 1. Import Required Packages
### 2. Import Data
### 3. Concat All Data Into One Dataframe
### 3. Find significant genes for each celltype


## <br> 1. Import Required Packages

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

## <br> 2. Import Deseq2 Data

In [2]:
B_Cell_LRT = pd.read_csv('../03_Differential_Gene_Expression_Analysis---Deseq2/Results/B_Cells_LRT.txt', delimiter = '\t').reset_index()
Cholangiocyte_LRT = pd.read_csv('../03_Differential_Gene_Expression_Analysis---Deseq2/Results/Cholangiocytes_LRT.txt', delimiter = '\t').reset_index()
Hepatocyte_LRT = pd.read_csv('../03_Differential_Gene_Expression_Analysis---Deseq2/Results/Hepatocytes_LRT.txt', delimiter = '\t').reset_index()
HSC_LRT = pd.read_csv('../03_Differential_Gene_Expression_Analysis---Deseq2/Results/HSCs_LRT.txt', delimiter = '\t').reset_index()
LSEC_LRT = pd.read_csv('../03_Differential_Gene_Expression_Analysis---Deseq2/Results/ECs_LRT.txt', delimiter = '\t').reset_index()
Macrophage_LRT = pd.read_csv('../03_Differential_Gene_Expression_Analysis---Deseq2/Results/Macrophages_LRT.txt', delimiter = '\t').reset_index()
Neutrophil_LRT = pd.read_csv('../03_Differential_Gene_Expression_Analysis---Deseq2/Results/Neutrophils_LRT.txt', delimiter = '\t').reset_index()
pDC_LRT = pd.read_csv('../03_Differential_Gene_Expression_Analysis---Deseq2/Results/pDCs_LRT.txt', delimiter = '\t').reset_index()
PF_LRT = pd.read_csv('../03_Differential_Gene_Expression_Analysis---Deseq2/Results/PFs_LRT.txt', delimiter = '\t').reset_index()
T_Cell_LRT = pd.read_csv('../03_Differential_Gene_Expression_Analysis---Deseq2/Results/T_Cells_LRT.txt', delimiter = '\t').reset_index()

In [3]:
# Rename "padj" column to "padj_Celltype" in each DataFrame
B_Cell_LRT.rename(columns={'padj': 'padj_B_Cell'}, inplace=True)
Cholangiocyte_LRT.rename(columns={'padj': 'padj_Cholangiocyte'}, inplace=True)
Hepatocyte_LRT.rename(columns={'padj': 'padj_Hepatocyte'}, inplace=True)
HSC_LRT.rename(columns={'padj': 'padj_HSC'}, inplace=True)
LSEC_LRT.rename(columns={'padj': 'padj_LSEC'}, inplace=True)
Macrophage_LRT.rename(columns={'padj': 'padj_Macrophage'}, inplace=True)
Neutrophil_LRT.rename(columns={'padj': 'padj_Neutrophil'}, inplace=True)
pDC_LRT.rename(columns={'padj': 'padj_pDC'}, inplace=True)
PF_LRT.rename(columns={'padj': 'padj_PF'}, inplace=True)
T_Cell_LRT.rename(columns={'padj': 'padj_T_Cell'}, inplace=True)

In [4]:
# Merge all DataFrames based on "index" column
merged_df = pd.merge(B_Cell_LRT, Cholangiocyte_LRT, on='index', suffixes=('_B_Cell', '_Cholangiocyte'), how='outer')
merged_df = pd.merge(merged_df, Hepatocyte_LRT, on='index', suffixes=('', '_Hepatocyte'), how='outer')
merged_df = pd.merge(merged_df, HSC_LRT, on='index', suffixes=('', '_HSC'), how='outer')
merged_df = pd.merge(merged_df, LSEC_LRT, on='index', suffixes=('', '_LSEC'), how='outer')
merged_df = pd.merge(merged_df, Macrophage_LRT, on='index', suffixes=('', '_Macrophage'), how='outer')
merged_df = pd.merge(merged_df, Neutrophil_LRT, on='index', suffixes=('', '_Neutrophil'), how='outer')
merged_df = pd.merge(merged_df, pDC_LRT, on='index', suffixes=('', '_pDC'), how='outer')
merged_df = pd.merge(merged_df, PF_LRT, on='index', suffixes=('', '_PF'), how='outer')
merged_df = pd.merge(merged_df, T_Cell_LRT, on='index', suffixes=('', '_T_Cell'), how='outer')


In [5]:
# Keep only "index" column and "padj_Celltype" columns
merged_df = merged_df[['index', 'padj_B_Cell', 'padj_Cholangiocyte', 'padj_Hepatocyte', 'padj_HSC',
                       'padj_LSEC', 'padj_Macrophage', 'padj_Neutrophil', 'padj_pDC', 'padj_PF', 'padj_T_Cell']]


In [6]:
merged_df

Unnamed: 0,index,padj_B_Cell,padj_Cholangiocyte,padj_Hepatocyte,padj_HSC,padj_LSEC,padj_Macrophage,padj_Neutrophil,padj_pDC,padj_PF,padj_T_Cell
0,Gm42418,0.541213,0.458789,0.013423,0.713926,0.294861,0.224956,0.999469,0.999823,0.856809,0.880532
1,Malat1,0.993333,0.999986,0.174310,0.771468,0.825937,0.999991,0.999469,0.999823,0.999968,0.999592
2,Cmss1,0.999953,0.999986,0.104654,0.999946,0.682680,0.999991,0.999469,0.999823,0.999968,0.999592
3,Foxp1,0.999953,0.999986,0.000830,0.810051,0.951791,0.999991,0.999469,0.999823,0.999968,0.999592
4,Camk1d,0.946550,0.999986,0.026259,0.819480,0.580589,0.409247,0.999469,0.999823,0.999968,0.999592
...,...,...,...,...,...,...,...,...,...,...,...
18057,Klrc3,,,,,,,,,,
18058,Cnga1,,,,,,,,,,
18059,Klra4,,,,,,,,,,
18060,Klra9,,,,,,,,,,


In [7]:
merged_df.to_csv('./Results/Master_Deseq2_LRT.txt', sep='\t', index=True, header=True)


## <br> 4. Find significant genes for each celltype

In [8]:
# Filter rows where "padj" <= 0.05 and extract values of "index" and "padj" columns
B_Cell_genes = B_Cell_LRT.loc[B_Cell_LRT['padj_B_Cell'] <= 0.05, ['index', 'padj_B_Cell']]
Cholangiocyte_genes = Cholangiocyte_LRT.loc[Cholangiocyte_LRT['padj_Cholangiocyte'] <= 0.05, ['index', 'padj_Cholangiocyte']]
Hepatocyte_genes = Hepatocyte_LRT.loc[Hepatocyte_LRT['padj_Hepatocyte'] <= 0.05, ['index', 'padj_Hepatocyte']]
HSC_genes = HSC_LRT.loc[HSC_LRT['padj_HSC'] <= 0.05, ['index', 'padj_HSC']]
LSEC_genes = LSEC_LRT.loc[LSEC_LRT['padj_LSEC'] <= 0.05, ['index', 'padj_LSEC']]
Macrophage_genes = Macrophage_LRT.loc[Macrophage_LRT['padj_Macrophage'] <= 0.05, ['index', 'padj_Macrophage']]
Neutrophil_genes = Neutrophil_LRT.loc[Neutrophil_LRT['padj_Neutrophil'] <= 0.05, ['index', 'padj_Neutrophil']]
pDC_genes = pDC_LRT.loc[pDC_LRT['padj_pDC'] <= 0.05, ['index', 'padj_pDC']]
PF_genes = PF_LRT.loc[PF_LRT['padj_PF'] <= 0.05, ['index', 'padj_PF']]
T_Cell_genes = T_Cell_LRT.loc[T_Cell_LRT['padj_T_Cell'] <= 0.05, ['index', 'padj_T_Cell']]

# Rename "index" column to "Symbol"
B_Cell_genes = B_Cell_genes.rename(columns={'index': 'Symbol'})
Cholangiocyte_genes = Cholangiocyte_genes.rename(columns={'index': 'Symbol'})
Hepatocyte_genes = Hepatocyte_genes.rename(columns={'index': 'Symbol'})
HSC_genes = HSC_genes.rename(columns={'index': 'Symbol'})
LSEC_genes = LSEC_genes.rename(columns={'index': 'Symbol'})
Macrophage_genes = Macrophage_genes.rename(columns={'index': 'Symbol'})
Neutrophil_genes = Neutrophil_genes.rename(columns={'index': 'Symbol'})
pDC_genes = pDC_genes.rename(columns={'index': 'Symbol'})
PF_genes = PF_genes.rename(columns={'index': 'Symbol'})
T_Cell_genes = T_Cell_genes.rename(columns={'index': 'Symbol'})

# Sort rows based on "padj" column in ascending order
B_Cell_genes = B_Cell_genes.sort_values('padj_B_Cell')
Cholangiocyte_genes = Cholangiocyte_genes.sort_values('padj_Cholangiocyte')
Hepatocyte_genes = Hepatocyte_genes.sort_values('padj_Hepatocyte')
HSC_genes = HSC_genes.sort_values('padj_HSC')
LSEC_genes = LSEC_genes.sort_values('padj_LSEC')
Macrophage_genes = Macrophage_genes.sort_values('padj_Macrophage')
Neutrophil_genes = Neutrophil_genes.sort_values('padj_Neutrophil')
pDC_genes = pDC_genes.sort_values('padj_pDC')
PF_genes = PF_genes.sort_values('padj_PF')
T_Cell_genes = T_Cell_genes.sort_values('padj_T_Cell')

# Reset index
B_Cell_genes = B_Cell_genes.reset_index(drop=True)
Cholangiocyte_genes = Cholangiocyte_genes.reset_index(drop=True)
Hepatocyte_genes = Hepatocyte_genes.reset_index(drop=True)
HSC_genes = HSC_genes.reset_index(drop=True)
LSEC_genes = LSEC_genes.reset_index(drop=True)
Macrophage_genes = Macrophage_genes.reset_index(drop=True)
Neutrophil_genes = Neutrophil_genes.reset_index(drop=True)
pDC_genes = pDC_genes.reset_index(drop=True)
PF_genes = PF_genes.reset_index(drop=True)
T_Cell_genes = T_Cell_genes.reset_index(drop=True)

# Export DataFrame as a column with rankings
B_Cell_genes.to_csv('./Results/B_Cell_LRT_0.05_genes.txt', sep='\t', header=True, index=True)
Cholangiocyte_genes.to_csv('./Results/Cholangiocyte_LRT_0.05_genes.txt', sep='\t', header=True, index=True)
Hepatocyte_genes.to_csv('./Results/Hepatocyte_LRT_0.05_genes.txt', sep='\t', header=True, index=True)
HSC_genes.to_csv('./Results/HSC_LRT_0.05_genes.txt', sep='\t', header=True, index=True)
LSEC_genes.to_csv('./Results/EC_LRT_0.05_genes.txt', sep='\t', header=True, index=True)
Macrophage_genes.to_csv('./Results/Macrophage_LRT_0.05_genes.txt', sep='\t', header=True, index=True)
Neutrophil_genes.to_csv('./Results/Neutrophil_LRT_0.05_genes.txt', sep='\t', header=True, index=True)
pDC_genes.to_csv('./Results/pDC_LRT_0.05_genes.txt', sep='\t', header=True, index=True)
PF_genes.to_csv('./Results/PF_LRT_0.05_genes.txt', sep='\t', header=True, index=True)
T_Cell_genes.to_csv('./Results/T_Cell_LRT_0.05_genes.txt', sep='\t', header=True, index=True)
