# Concatonate All Deseq2 Data with AHR, DRE, RefGenome Metadata

## <br> 1. Import Required Packages

In [1]:
import numpy as np
import pandas as pd

## <br> 2. Import Deseq2 Data and All Other Metadata

In [3]:
# Load DRE count data and clean it
pDREs_Master = pd.read_csv('../../99_mm39_DREs/02_CleanUp_DRE_BED_File/DRE_Counts_by_Gene.txt', 
                            delimiter = '\t')

# Drop rows with missing values
pDREs_Master = pDREs_Master.dropna()

# Rename columns for consistency
pDREs_Master = pDREs_Master.rename(columns={'gene_name': 'Gene', 'count': 'pDRE'})
pDREs_Master

Unnamed: 0,Gene,pDRE
1,Spire1,75
2,Wwox,47
3,Apbb2,43
4,Sdk1,38
5,Apoc2,37
...,...,...
23597,lnc14044,1
23598,lnc30181,1
23599,lnc30180,1
23600,Akain1,1


In [4]:
# Load AHR binding count data and rename columns
AHR_Master = pd.read_csv('../../99_mm39_AHR_Enrichment/02_CleanUp_BED_File/AHR_Binding_Counts.txt', 
                            delimiter = '\t')

AHR_Master = AHR_Master.rename(columns={'gene_name': 'Gene', 'count': 'AHR'})

AHR_Master

Unnamed: 0,Gene,AHR
0,,2652
1,lnc10851,153
2,lnc7308,51
3,Hjurp,44
4,lnc3293,43
...,...,...
21275,Ralbp1,1
21276,lnc30141,1
21277,lnc30142,1
21278,lnc30143,1


In [16]:
# Load and rename columns in gene annotation file for consistency
Gene_Type = pd.read_csv('../../00_Karri_et_al_GTF/MGI_and_Karri_Annotations_Restructured_mm39.txt', 
                           delimiter = '\t')


Gene_Type = Gene_Type.rename(columns={'chrom': 'Chr', 
                                        'source': 'Genebody',
                                        'feature': 'Exon',
                                        'start': 'Start',
                                        'end': 'End',
                                        'score': 'Score',
                                        'strand': 'Strand',
                                        'frame': 'Frame',
                                        'gene_name': 'Gene',
                                        'gene_biotype': 'Type',
                                        'NR_annotation': 'NR_Annotation',
                                        'mgi_symbol': 'MGI_Symbol'
                                       })


Gene_Type.columns

  Gene_Type = pd.read_csv('../../00_Karri_et_al_GTF/MGI_and_Karri_Annotations_Restructured_mm39.txt',


Index(['Chr', 'Start', 'End', 'blank', 'Score', 'Strand', 'Genebody', 'Exon',
       'Frame', 'mgi_chr', 'mgi_start', 'mgi_end', 'MGI_Symbol', 'mgi_score',
       'mgi_strand', 'gene_id', 'transcript_id', 'Gene', 'Type',
       'NR_Annotation', 'NR_annotation2'],
      dtype='object')

In [5]:
# Load DESeq2 output files for all cell types across multiple dose comparisons (e.g., 0.01 vs 0, 0.03 vs 0, etc.)
# Each file is read into a separate DataFrame and index is reset for consistency
directory = './Results/03b_Deseq2_Output/'

Deseq2_B_Cell_0_01_v_0 = pd.read_csv(directory + 'B_Cells_0.01_v_0.txt', delimiter = '\t').reset_index()
Deseq2_B_Cell_0_03_v_0 = pd.read_csv(directory + 'B_Cells_0.03_v_0.txt', delimiter = '\t').reset_index()
Deseq2_B_Cell_0_1_v_0 = pd.read_csv(directory + 'B_Cells_0.1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_B_Cell_0_3_v_0 = pd.read_csv(directory + 'B_Cells_0.3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_B_Cell_1_v_0 = pd.read_csv(directory + 'B_Cells_1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_B_Cell_3_v_0 = pd.read_csv(directory + 'B_Cells_3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_B_Cell_10_v_0 = pd.read_csv(directory + 'B_Cells_10_v_0.txt', delimiter = '\t').reset_index()
Deseq2_B_Cell_30_v_0 = pd.read_csv(directory + 'B_Cells_30_v_0.txt', delimiter = '\t').reset_index()

Deseq2_Centrilobular_Hepatocyte_0_01_v_0 = pd.read_csv(directory + 'Centrilobular_Hepatocytes_0.01_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Centrilobular_Hepatocyte_0_03_v_0 = pd.read_csv(directory + 'Centrilobular_Hepatocytes_0.03_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Centrilobular_Hepatocyte_0_1_v_0 = pd.read_csv(directory + 'Centrilobular_Hepatocytes_0.1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Centrilobular_Hepatocyte_0_3_v_0 = pd.read_csv(directory + 'Centrilobular_Hepatocytes_0.3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Centrilobular_Hepatocyte_1_v_0 = pd.read_csv(directory + 'Centrilobular_Hepatocytes_1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Centrilobular_Hepatocyte_3_v_0 = pd.read_csv(directory + 'Centrilobular_Hepatocytes_3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Centrilobular_Hepatocyte_10_v_0 = pd.read_csv(directory + 'Centrilobular_Hepatocytes_10_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Centrilobular_Hepatocyte_30_v_0 = pd.read_csv(directory + 'Centrilobular_Hepatocytes_30_v_0.txt', delimiter = '\t').reset_index()

Deseq2_Cholangiocyte_0_01_v_0 = pd.read_csv(directory + 'Cholangiocytes_0.01_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Cholangiocyte_0_03_v_0 = pd.read_csv(directory + 'Cholangiocytes_0.03_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Cholangiocyte_0_1_v_0 = pd.read_csv(directory + 'Cholangiocytes_0.1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Cholangiocyte_0_3_v_0 = pd.read_csv(directory + 'Cholangiocytes_0.3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Cholangiocyte_1_v_0 = pd.read_csv(directory + 'Cholangiocytes_1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Cholangiocyte_3_v_0 = pd.read_csv(directory + 'Cholangiocytes_3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Cholangiocyte_10_v_0 = pd.read_csv(directory + 'Cholangiocytes_10_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Cholangiocyte_30_v_0 = pd.read_csv(directory + 'Cholangiocytes_30_v_0.txt', delimiter = '\t').reset_index()

Deseq2_HSC_0_01_v_0 = pd.read_csv(directory + 'HSCs_0.01_v_0.txt', delimiter = '\t').reset_index()
Deseq2_HSC_0_03_v_0 = pd.read_csv(directory + 'HSCs_0.03_v_0.txt', delimiter = '\t').reset_index()
Deseq2_HSC_0_1_v_0 = pd.read_csv(directory + 'HSCs_0.1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_HSC_0_3_v_0 = pd.read_csv(directory + 'HSCs_0.3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_HSC_1_v_0 = pd.read_csv(directory + 'HSCs_1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_HSC_3_v_0 = pd.read_csv(directory + 'HSCs_3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_HSC_10_v_0 = pd.read_csv(directory + 'HSCs_10_v_0.txt', delimiter = '\t').reset_index()
Deseq2_HSC_30_v_0 = pd.read_csv(directory + 'HSCs_30_v_0.txt', delimiter = '\t').reset_index()

Deseq2_LSEC_0_01_v_0 = pd.read_csv(directory + 'LSECs_0.01_v_0.txt', delimiter = '\t').reset_index()
Deseq2_LSEC_0_03_v_0 = pd.read_csv(directory + 'LSECs_0.03_v_0.txt', delimiter = '\t').reset_index()
Deseq2_LSEC_0_1_v_0 = pd.read_csv(directory + 'LSECs_0.1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_LSEC_0_3_v_0 = pd.read_csv(directory + 'LSECs_0.3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_LSEC_1_v_0 = pd.read_csv(directory + 'LSECs_1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_LSEC_3_v_0 = pd.read_csv(directory + 'LSECs_3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_LSEC_10_v_0 = pd.read_csv(directory + 'LSECs_10_v_0.txt', delimiter = '\t').reset_index()
Deseq2_LSEC_30_v_0 = pd.read_csv(directory + 'LSECs_30_v_0.txt', delimiter = '\t').reset_index()

Deseq2_Macrophage_0_01_v_0 = pd.read_csv(directory + 'Macrophages_0.01_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Macrophage_0_03_v_0 = pd.read_csv(directory + 'Macrophages_0.03_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Macrophage_0_1_v_0 = pd.read_csv(directory + 'Macrophages_0.1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Macrophage_0_3_v_0 = pd.read_csv(directory + 'Macrophages_0.3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Macrophage_1_v_0 = pd.read_csv(directory + 'Macrophages_1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Macrophage_3_v_0 = pd.read_csv(directory + 'Macrophages_3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Macrophage_10_v_0 = pd.read_csv(directory + 'Macrophages_10_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Macrophage_30_v_0 = pd.read_csv(directory + 'Macrophages_30_v_0.txt', delimiter = '\t').reset_index()

Deseq2_Neutrophil_0_01_v_0 = pd.read_csv(directory + 'Neutrophils_0.01_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Neutrophil_0_03_v_0 = pd.read_csv(directory + 'Neutrophils_0.03_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Neutrophil_0_1_v_0 = pd.read_csv(directory + 'Neutrophils_0.1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Neutrophil_0_3_v_0 = pd.read_csv(directory + 'Neutrophils_0.3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Neutrophil_1_v_0 = pd.read_csv(directory + 'Neutrophils_1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Neutrophil_3_v_0 = pd.read_csv(directory + 'Neutrophils_3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Neutrophil_10_v_0 = pd.read_csv(directory + 'Neutrophils_10_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Neutrophil_30_v_0 = pd.read_csv(directory + 'Neutrophils_30_v_0.txt', delimiter = '\t').reset_index()

Deseq2_PF_0_01_v_0 = pd.read_csv(directory + 'PFs_0.01_v_0.txt', delimiter = '\t').reset_index()
Deseq2_PF_0_03_v_0 = pd.read_csv(directory + 'PFs_0.03_v_0.txt', delimiter = '\t').reset_index()
Deseq2_PF_0_1_v_0 = pd.read_csv(directory + 'PFs_0.1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_PF_0_3_v_0 = pd.read_csv(directory + 'PFs_0.3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_PF_1_v_0 = pd.read_csv(directory + 'PFs_1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_PF_3_v_0 = pd.read_csv(directory + 'PFs_3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_PF_10_v_0 = pd.read_csv(directory + 'PFs_10_v_0.txt', delimiter = '\t').reset_index()
Deseq2_PF_30_v_0 = pd.read_csv(directory + 'PFs_30_v_0.txt', delimiter = '\t').reset_index()

Deseq2_Periportal_Hepatocyte_0_01_v_0 = pd.read_csv(directory + 'Periportal_Hepatocytes_0.01_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Periportal_Hepatocyte_0_03_v_0 = pd.read_csv(directory + 'Periportal_Hepatocytes_0.03_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Periportal_Hepatocyte_0_1_v_0 = pd.read_csv(directory + 'Periportal_Hepatocytes_0.1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Periportal_Hepatocyte_0_3_v_0 = pd.read_csv(directory + 'Periportal_Hepatocytes_0.3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Periportal_Hepatocyte_1_v_0 = pd.read_csv(directory + 'Periportal_Hepatocytes_1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Periportal_Hepatocyte_3_v_0 = pd.read_csv(directory + 'Periportal_Hepatocytes_3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Periportal_Hepatocyte_10_v_0 = pd.read_csv(directory + 'Periportal_Hepatocytes_10_v_0.txt', delimiter = '\t').reset_index()
Deseq2_Periportal_Hepatocyte_30_v_0 = pd.read_csv(directory + 'Periportal_Hepatocytes_30_v_0.txt', delimiter = '\t').reset_index()

Deseq2_T_Cell_0_01_v_0 = pd.read_csv(directory + 'T_Cells_0.01_v_0.txt', delimiter = '\t').reset_index()
Deseq2_T_Cell_0_03_v_0 = pd.read_csv(directory + 'T_Cells_0.03_v_0.txt', delimiter = '\t').reset_index()
Deseq2_T_Cell_0_1_v_0 = pd.read_csv(directory + 'T_Cells_0.1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_T_Cell_0_3_v_0 = pd.read_csv(directory + 'T_Cells_0.3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_T_Cell_1_v_0 = pd.read_csv(directory + 'T_Cells_1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_T_Cell_3_v_0 = pd.read_csv(directory + 'T_Cells_3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_T_Cell_10_v_0 = pd.read_csv(directory + 'T_Cells_10_v_0.txt', delimiter = '\t').reset_index()
Deseq2_T_Cell_30_v_0 = pd.read_csv(directory + 'T_Cells_30_v_0.txt', delimiter = '\t').reset_index()

Deseq2_pDC_0_01_v_0 = pd.read_csv(directory + 'pDCs_0.01_v_0.txt', delimiter = '\t').reset_index()
Deseq2_pDC_0_03_v_0 = pd.read_csv(directory + 'pDCs_0.03_v_0.txt', delimiter = '\t').reset_index()
Deseq2_pDC_0_1_v_0 = pd.read_csv(directory + 'pDCs_0.1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_pDC_0_3_v_0 = pd.read_csv(directory + 'pDCs_0.3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_pDC_1_v_0 = pd.read_csv(directory + 'pDCs_1_v_0.txt', delimiter = '\t').reset_index()
Deseq2_pDC_3_v_0 = pd.read_csv(directory + 'pDCs_3_v_0.txt', delimiter = '\t').reset_index()
Deseq2_pDC_10_v_0 = pd.read_csv(directory + 'pDCs_10_v_0.txt', delimiter = '\t').reset_index()
Deseq2_pDC_30_v_0 = pd.read_csv(directory + 'pDCs_30_v_0.txt', delimiter = '\t').reset_index()



In [6]:
# Add 'Celltype' column to each DESeq2 result DataFrame to label the corresponding cell type for downstream analysis
Deseq2_B_Cell_0_01_v_0['Celltype'] = 'B Cells'
Deseq2_B_Cell_0_03_v_0['Celltype'] = 'B Cells'
Deseq2_B_Cell_0_1_v_0['Celltype'] = 'B Cells'
Deseq2_B_Cell_0_3_v_0['Celltype'] = 'B Cells'
Deseq2_B_Cell_1_v_0['Celltype'] = 'B Cells'
Deseq2_B_Cell_3_v_0['Celltype'] = 'B Cells'
Deseq2_B_Cell_10_v_0['Celltype'] = 'B Cells'
Deseq2_B_Cell_30_v_0['Celltype'] = 'B Cells'

Deseq2_Centrilobular_Hepatocyte_0_01_v_0['Celltype'] = 'Centrilobular Hepatocytes'
Deseq2_Centrilobular_Hepatocyte_0_03_v_0['Celltype'] = 'Centrilobular Hepatocytes'
Deseq2_Centrilobular_Hepatocyte_0_1_v_0['Celltype'] = 'Centrilobular Hepatocytes'
Deseq2_Centrilobular_Hepatocyte_0_3_v_0['Celltype'] = 'Centrilobular Hepatocytes'
Deseq2_Centrilobular_Hepatocyte_1_v_0['Celltype'] = 'Centrilobular Hepatocytes'
Deseq2_Centrilobular_Hepatocyte_3_v_0['Celltype'] = 'Centrilobular Hepatocytes'
Deseq2_Centrilobular_Hepatocyte_10_v_0['Celltype'] = 'Centrilobular Hepatocytes'
Deseq2_Centrilobular_Hepatocyte_30_v_0['Celltype'] = 'Centrilobular Hepatocytes'

Deseq2_Cholangiocyte_0_01_v_0['Celltype'] = 'Cholangiocytes'
Deseq2_Cholangiocyte_0_03_v_0['Celltype'] = 'Cholangiocytes'
Deseq2_Cholangiocyte_0_1_v_0['Celltype'] = 'Cholangiocytes'
Deseq2_Cholangiocyte_0_3_v_0['Celltype'] = 'Cholangiocytes'
Deseq2_Cholangiocyte_1_v_0['Celltype'] = 'Cholangiocytes'
Deseq2_Cholangiocyte_3_v_0['Celltype'] = 'Cholangiocytes'
Deseq2_Cholangiocyte_10_v_0['Celltype'] = 'Cholangiocytes'
Deseq2_Cholangiocyte_30_v_0['Celltype'] = 'Cholangiocytes'

Deseq2_HSC_0_01_v_0['Celltype'] = 'HSCs'
Deseq2_HSC_0_03_v_0['Celltype'] = 'HSCs'
Deseq2_HSC_0_1_v_0['Celltype'] = 'HSCs'
Deseq2_HSC_0_3_v_0['Celltype'] = 'HSCs'
Deseq2_HSC_1_v_0['Celltype'] = 'HSCs'
Deseq2_HSC_3_v_0['Celltype'] = 'HSCs'
Deseq2_HSC_10_v_0['Celltype'] = 'HSCs'
Deseq2_HSC_30_v_0['Celltype'] = 'HSCs'

Deseq2_LSEC_0_01_v_0['Celltype'] = 'LSECs'
Deseq2_LSEC_0_03_v_0['Celltype'] = 'LSECs'
Deseq2_LSEC_0_1_v_0['Celltype'] = 'LSECs'
Deseq2_LSEC_0_3_v_0['Celltype'] = 'LSECs'
Deseq2_LSEC_1_v_0['Celltype'] = 'LSECs'
Deseq2_LSEC_3_v_0['Celltype'] = 'LSECs'
Deseq2_LSEC_10_v_0['Celltype'] = 'LSECs'
Deseq2_LSEC_30_v_0['Celltype'] = 'LSECs'

Deseq2_Macrophage_0_01_v_0['Celltype'] = 'Macrophages'
Deseq2_Macrophage_0_03_v_0['Celltype'] = 'Macrophages'
Deseq2_Macrophage_0_1_v_0['Celltype'] = 'Macrophages'
Deseq2_Macrophage_0_3_v_0['Celltype'] = 'Macrophages'
Deseq2_Macrophage_1_v_0['Celltype'] = 'Macrophages'
Deseq2_Macrophage_3_v_0['Celltype'] = 'Macrophages'
Deseq2_Macrophage_10_v_0['Celltype'] = 'Macrophages'
Deseq2_Macrophage_30_v_0['Celltype'] = 'Macrophages'

Deseq2_Neutrophil_0_01_v_0['Celltype'] = 'Neutrophils'
Deseq2_Neutrophil_0_03_v_0['Celltype'] = 'Neutrophils'
Deseq2_Neutrophil_0_1_v_0['Celltype'] = 'Neutrophils'
Deseq2_Neutrophil_0_3_v_0['Celltype'] = 'Neutrophils'
Deseq2_Neutrophil_1_v_0['Celltype'] = 'Neutrophils'
Deseq2_Neutrophil_3_v_0['Celltype'] = 'Neutrophils'
Deseq2_Neutrophil_10_v_0['Celltype'] = 'Neutrophils'
Deseq2_Neutrophil_30_v_0['Celltype'] = 'Neutrophils'

Deseq2_PF_0_01_v_0['Celltype'] = 'PFs'
Deseq2_PF_0_03_v_0['Celltype'] = 'PFs'
Deseq2_PF_0_1_v_0['Celltype'] = 'PFs'
Deseq2_PF_0_3_v_0['Celltype'] = 'PFs'
Deseq2_PF_1_v_0['Celltype'] = 'PFs'
Deseq2_PF_3_v_0['Celltype'] = 'PFs'
Deseq2_PF_10_v_0['Celltype'] = 'PFs'
Deseq2_PF_30_v_0['Celltype'] = 'PFs'

Deseq2_Periportal_Hepatocyte_0_01_v_0['Celltype'] = 'Periportal Hepatocytes'
Deseq2_Periportal_Hepatocyte_0_03_v_0['Celltype'] = 'Periportal Hepatocytes'
Deseq2_Periportal_Hepatocyte_0_1_v_0['Celltype'] = 'Periportal Hepatocytes'
Deseq2_Periportal_Hepatocyte_0_3_v_0['Celltype'] = 'Periportal Hepatocytes'
Deseq2_Periportal_Hepatocyte_1_v_0['Celltype'] = 'Periportal Hepatocytes'
Deseq2_Periportal_Hepatocyte_3_v_0['Celltype'] = 'Periportal Hepatocytes'
Deseq2_Periportal_Hepatocyte_10_v_0['Celltype'] = 'Periportal Hepatocytes'
Deseq2_Periportal_Hepatocyte_30_v_0['Celltype'] = 'Periportal Hepatocytes'

Deseq2_T_Cell_0_01_v_0['Celltype'] = 'T Cells'
Deseq2_T_Cell_0_03_v_0['Celltype'] = 'T Cells'
Deseq2_T_Cell_0_1_v_0['Celltype'] = 'T Cells'
Deseq2_T_Cell_0_3_v_0['Celltype'] = 'T Cells'
Deseq2_T_Cell_1_v_0['Celltype'] = 'T Cells'
Deseq2_T_Cell_3_v_0['Celltype'] = 'T Cells'
Deseq2_T_Cell_10_v_0['Celltype'] = 'T Cells'
Deseq2_T_Cell_30_v_0['Celltype'] = 'T Cells'

Deseq2_pDC_0_01_v_0['Celltype'] = 'pDCs'
Deseq2_pDC_0_03_v_0['Celltype'] = 'pDCs'
Deseq2_pDC_0_1_v_0['Celltype'] = 'pDCs'
Deseq2_pDC_0_3_v_0['Celltype'] = 'pDCs'
Deseq2_pDC_1_v_0['Celltype'] = 'pDCs'
Deseq2_pDC_3_v_0['Celltype'] = 'pDCs'
Deseq2_pDC_10_v_0['Celltype'] = 'pDCs'
Deseq2_pDC_30_v_0['Celltype'] = 'pDCs'



In [7]:
# Assign 'Dose' labels to each DESeq2 result DataFrame to indicate the treatment concentration compared to control
Deseq2_B_Cell_0_01_v_0['Dose'] = '0.01'
Deseq2_B_Cell_0_03_v_0['Dose'] = '0.03'
Deseq2_B_Cell_0_1_v_0['Dose'] = '0.1'
Deseq2_B_Cell_0_3_v_0['Dose'] = '0.3'
Deseq2_B_Cell_1_v_0['Dose'] = '1'
Deseq2_B_Cell_3_v_0['Dose'] = '3'
Deseq2_B_Cell_10_v_0['Dose'] = '10'
Deseq2_B_Cell_30_v_0['Dose'] = '30'

Deseq2_Centrilobular_Hepatocyte_0_01_v_0['Dose'] = '0.01'
Deseq2_Centrilobular_Hepatocyte_0_03_v_0['Dose'] = '0.03'
Deseq2_Centrilobular_Hepatocyte_0_1_v_0['Dose'] = '0.1'
Deseq2_Centrilobular_Hepatocyte_0_3_v_0['Dose'] = '0.3'
Deseq2_Centrilobular_Hepatocyte_1_v_0['Dose'] = '1'
Deseq2_Centrilobular_Hepatocyte_3_v_0['Dose'] = '3'
Deseq2_Centrilobular_Hepatocyte_10_v_0['Dose'] = '10'
Deseq2_Centrilobular_Hepatocyte_30_v_0['Dose'] = '30'

Deseq2_Cholangiocyte_0_01_v_0['Dose'] = '0.01'
Deseq2_Cholangiocyte_0_03_v_0['Dose'] = '0.03'
Deseq2_Cholangiocyte_0_1_v_0['Dose'] = '0.1'
Deseq2_Cholangiocyte_0_3_v_0['Dose'] = '0.3'
Deseq2_Cholangiocyte_1_v_0['Dose'] = '1'
Deseq2_Cholangiocyte_3_v_0['Dose'] = '3'
Deseq2_Cholangiocyte_10_v_0['Dose'] = '10'
Deseq2_Cholangiocyte_30_v_0['Dose'] = '30'

Deseq2_HSC_0_01_v_0['Dose'] = '0.01'
Deseq2_HSC_0_03_v_0['Dose'] = '0.03'
Deseq2_HSC_0_1_v_0['Dose'] = '0.1'
Deseq2_HSC_0_3_v_0['Dose'] = '0.3'
Deseq2_HSC_1_v_0['Dose'] = '1'
Deseq2_HSC_3_v_0['Dose'] = '3'
Deseq2_HSC_10_v_0['Dose'] = '10'
Deseq2_HSC_30_v_0['Dose'] = '30'

Deseq2_LSEC_0_01_v_0['Dose'] = '0.01'
Deseq2_LSEC_0_03_v_0['Dose'] = '0.03'
Deseq2_LSEC_0_1_v_0['Dose'] = '0.1'
Deseq2_LSEC_0_3_v_0['Dose'] = '0.3'
Deseq2_LSEC_1_v_0['Dose'] = '1'
Deseq2_LSEC_3_v_0['Dose'] = '3'
Deseq2_LSEC_10_v_0['Dose'] = '10'
Deseq2_LSEC_30_v_0['Dose'] = '30'

Deseq2_Macrophage_0_01_v_0['Dose'] = '0.01'
Deseq2_Macrophage_0_03_v_0['Dose'] = '0.03'
Deseq2_Macrophage_0_1_v_0['Dose'] = '0.1'
Deseq2_Macrophage_0_3_v_0['Dose'] = '0.3'
Deseq2_Macrophage_1_v_0['Dose'] = '1'
Deseq2_Macrophage_3_v_0['Dose'] = '3'
Deseq2_Macrophage_10_v_0['Dose'] = '10'
Deseq2_Macrophage_30_v_0['Dose'] = '30'

Deseq2_Neutrophil_0_01_v_0['Dose'] = '0.01'
Deseq2_Neutrophil_0_03_v_0['Dose'] = '0.03'
Deseq2_Neutrophil_0_1_v_0['Dose'] = '0.1'
Deseq2_Neutrophil_0_3_v_0['Dose'] = '0.3'
Deseq2_Neutrophil_1_v_0['Dose'] = '1'
Deseq2_Neutrophil_3_v_0['Dose'] = '3'
Deseq2_Neutrophil_10_v_0['Dose'] = '10'
Deseq2_Neutrophil_30_v_0['Dose'] = '30'

Deseq2_PF_0_01_v_0['Dose'] = '0.01'
Deseq2_PF_0_03_v_0['Dose'] = '0.03'
Deseq2_PF_0_1_v_0['Dose'] = '0.1'
Deseq2_PF_0_3_v_0['Dose'] = '0.3'
Deseq2_PF_1_v_0['Dose'] = '1'
Deseq2_PF_3_v_0['Dose'] = '3'
Deseq2_PF_10_v_0['Dose'] = '10'
Deseq2_PF_30_v_0['Dose'] = '30'

Deseq2_Periportal_Hepatocyte_0_01_v_0['Dose'] = '0.01'
Deseq2_Periportal_Hepatocyte_0_03_v_0['Dose'] = '0.03'
Deseq2_Periportal_Hepatocyte_0_1_v_0['Dose'] = '0.1'
Deseq2_Periportal_Hepatocyte_0_3_v_0['Dose'] = '0.3'
Deseq2_Periportal_Hepatocyte_1_v_0['Dose'] = '1'
Deseq2_Periportal_Hepatocyte_3_v_0['Dose'] = '3'
Deseq2_Periportal_Hepatocyte_10_v_0['Dose'] = '10'
Deseq2_Periportal_Hepatocyte_30_v_0['Dose'] = '30'

Deseq2_T_Cell_0_01_v_0['Dose'] = '0.01'
Deseq2_T_Cell_0_03_v_0['Dose'] = '0.03'
Deseq2_T_Cell_0_1_v_0['Dose'] = '0.1'
Deseq2_T_Cell_0_3_v_0['Dose'] = '0.3'
Deseq2_T_Cell_1_v_0['Dose'] = '1'
Deseq2_T_Cell_3_v_0['Dose'] = '3'
Deseq2_T_Cell_10_v_0['Dose'] = '10'
Deseq2_T_Cell_30_v_0['Dose'] = '30'

Deseq2_pDC_0_01_v_0['Dose'] = '0.01'
Deseq2_pDC_0_03_v_0['Dose'] = '0.03'
Deseq2_pDC_0_1_v_0['Dose'] = '0.1'
Deseq2_pDC_0_3_v_0['Dose'] = '0.3'
Deseq2_pDC_1_v_0['Dose'] = '1'
Deseq2_pDC_3_v_0['Dose'] = '3'
Deseq2_pDC_10_v_0['Dose'] = '10'
Deseq2_pDC_30_v_0['Dose'] = '30'

In [8]:
Deseq2_Periportal_Hepatocyte_0_01_v_0[Deseq2_Periportal_Hepatocyte_0_01_v_0['index'] == 'Xlr4a']

Unnamed: 0,index,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,Celltype,Dose
23217,Xlr4a,5.349154,-2.648294,2.64747,-1.000311,0.31716,1.0,Periportal Hepatocytes,0.01


## <br> 3. Concat All Data Into One Dataframe

In [9]:
#Concat into one dataframe
Deseq2_Master = pd.concat([Deseq2_B_Cell_0_01_v_0, 
                            Deseq2_B_Cell_0_03_v_0, 
                            Deseq2_B_Cell_0_1_v_0, 
                            Deseq2_B_Cell_0_3_v_0, 
                            Deseq2_B_Cell_1_v_0, 
                            Deseq2_B_Cell_3_v_0, 
                            Deseq2_B_Cell_10_v_0, 
                            Deseq2_B_Cell_30_v_0, 
                            Deseq2_Centrilobular_Hepatocyte_0_01_v_0, 
                            Deseq2_Centrilobular_Hepatocyte_0_03_v_0, 
                            Deseq2_Centrilobular_Hepatocyte_0_1_v_0, 
                            Deseq2_Centrilobular_Hepatocyte_0_3_v_0, 
                            Deseq2_Centrilobular_Hepatocyte_1_v_0, 
                            Deseq2_Centrilobular_Hepatocyte_3_v_0, 
                            Deseq2_Centrilobular_Hepatocyte_10_v_0, 
                            Deseq2_Centrilobular_Hepatocyte_30_v_0, 
                            Deseq2_Cholangiocyte_0_01_v_0, 
                            Deseq2_Cholangiocyte_0_03_v_0, 
                            Deseq2_Cholangiocyte_0_1_v_0, 
                            Deseq2_Cholangiocyte_0_3_v_0, 
                            Deseq2_Cholangiocyte_1_v_0, 
                            Deseq2_Cholangiocyte_3_v_0, 
                            Deseq2_Cholangiocyte_10_v_0, 
                            Deseq2_Cholangiocyte_30_v_0, 
                            Deseq2_HSC_0_01_v_0, 
                            Deseq2_HSC_0_03_v_0, 
                            Deseq2_HSC_0_1_v_0, 
                            Deseq2_HSC_0_3_v_0, 
                            Deseq2_HSC_1_v_0, 
                            Deseq2_HSC_3_v_0, 
                            Deseq2_HSC_10_v_0, 
                            Deseq2_HSC_30_v_0, 
                            Deseq2_LSEC_0_01_v_0, 
                            Deseq2_LSEC_0_03_v_0, 
                            Deseq2_LSEC_0_1_v_0, 
                            Deseq2_LSEC_0_3_v_0, 
                            Deseq2_LSEC_1_v_0, 
                            Deseq2_LSEC_3_v_0, 
                            Deseq2_LSEC_10_v_0, 
                            Deseq2_LSEC_30_v_0, 
                            Deseq2_Macrophage_0_01_v_0, 
                            Deseq2_Macrophage_0_03_v_0, 
                            Deseq2_Macrophage_0_1_v_0, 
                            Deseq2_Macrophage_0_3_v_0, 
                            Deseq2_Macrophage_1_v_0, 
                            Deseq2_Macrophage_3_v_0, 
                            Deseq2_Macrophage_10_v_0, 
                            Deseq2_Macrophage_30_v_0, 
                            Deseq2_Neutrophil_0_01_v_0, 
                            Deseq2_Neutrophil_0_03_v_0, 
                            Deseq2_Neutrophil_0_1_v_0, 
                            Deseq2_Neutrophil_0_3_v_0, 
                            Deseq2_Neutrophil_1_v_0, 
                            Deseq2_Neutrophil_3_v_0, 
                            Deseq2_Neutrophil_10_v_0, 
                            Deseq2_Neutrophil_30_v_0, 
                            Deseq2_PF_0_01_v_0, 
                            Deseq2_PF_0_03_v_0, 
                            Deseq2_PF_0_1_v_0, 
                            Deseq2_PF_0_3_v_0, 
                            Deseq2_PF_1_v_0, 
                            Deseq2_PF_3_v_0, 
                            Deseq2_PF_10_v_0, 
                            Deseq2_PF_30_v_0, 
                            Deseq2_Periportal_Hepatocyte_0_01_v_0, 
                            Deseq2_Periportal_Hepatocyte_0_03_v_0, 
                            Deseq2_Periportal_Hepatocyte_0_1_v_0, 
                            Deseq2_Periportal_Hepatocyte_0_3_v_0, 
                            Deseq2_Periportal_Hepatocyte_1_v_0, 
                            Deseq2_Periportal_Hepatocyte_3_v_0, 
                            Deseq2_Periportal_Hepatocyte_10_v_0, 
                            Deseq2_Periportal_Hepatocyte_30_v_0, 
                            Deseq2_T_Cell_0_01_v_0, 
                            Deseq2_T_Cell_0_03_v_0, 
                            Deseq2_T_Cell_0_1_v_0, 
                            Deseq2_T_Cell_0_3_v_0, 
                            Deseq2_T_Cell_1_v_0, 
                            Deseq2_T_Cell_3_v_0, 
                            Deseq2_T_Cell_10_v_0, 
                            Deseq2_T_Cell_30_v_0, 
                            Deseq2_pDC_0_01_v_0, 
                            Deseq2_pDC_0_03_v_0, 
                            Deseq2_pDC_0_1_v_0, 
                            Deseq2_pDC_0_3_v_0, 
                            Deseq2_pDC_1_v_0, 
                            Deseq2_pDC_3_v_0, 
                            Deseq2_pDC_10_v_0, 
                            Deseq2_pDC_30_v_0 ],
                          ignore_index=True)


In [10]:
# Rename 'index' to 'Gene' and compute fold change from log2FoldChange
Deseq2_Master = Deseq2_Master.rename(columns={'index': 'Gene'})
Deseq2_Master['Fold-Change'] = pow(2, Deseq2_Master['log2FoldChange'])


Deseq2_Master

Unnamed: 0,Gene,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,Celltype,Dose,Fold-Change
0,lnc31752,5632.003911,0.060929,0.161392,0.377519,0.705788,1.000000,B Cells,0.01,1.043137
1,lnc4822,3146.387654,-0.368362,0.385441,-0.955691,0.339228,1.000000,B Cells,0.01,0.774662
2,Foxp1,941.129495,-0.079404,0.359311,-0.220991,0.825099,1.000000,B Cells,0.01,0.946448
3,Ebf1,616.758548,-0.548443,0.564479,-0.971590,0.331254,1.000000,B Cells,0.01,0.683758
4,Dpyd,924.659365,0.007061,0.575075,0.012278,0.990204,1.000000,B Cells,0.01,1.004906
...,...,...,...,...,...,...,...,...,...,...
1124699,lnc1191,1.467205,0.158104,1.308503,0.120828,0.903827,0.955543,pDCs,30,1.115820
1124700,lnc1440,2.409755,-4.176886,1.226191,-3.406392,0.000658,0.004671,pDCs,30,0.055288
1124701,lnc1686,0.496732,3.818752,4.487664,0.850944,0.394800,0.614526,pDCs,30,14.111033
1124702,lnc2311,2.592510,-4.979600,1.133711,-4.392302,0.000011,0.000141,pDCs,30,0.031695


In [12]:
Gene_Type

Unnamed: 0,Chr,Start,End,blank,Score,Strand,Genebody,Exon,Frame,mgi_chr,...,mgi_end,mgi_symbol,mgi_score,mgi_strand,gene_id,transcript_id,Gene,Type,NR_Annotation,NR_annotation2
0,chr1,3224554,3224816,.,1000,-,Genebody_mm10_lnc48261,exon,.,.,...,-1,.,-1,.,lnc_inter_chr1_15560,lnc_inter_chr1_15560,lnc15560,lncRNA,lnc15560,
1,chr1,3273138,3274559,.,1000,-,Genebody_mm10_lnc48261,exon,.,.,...,-1,.,-1,.,lnc_inter_chr1_15561,lnc_inter_chr1_15561,lnc15561,lncRNA,lnc15561,
2,chr1,3275710,3276991,.,1000,-,Genebody_mm10_lnc48261,exon,.,.,...,-1,.,-1,.,lnc_inter_chr1_15562,lnc_inter_chr1_15562,lnc15562,lncRNA,lnc15562,
3,chr1,3284704,3741721,.,1000,-,GB_RefseqNM19801,exon,.,chr1,...,3729127,Gm19938,1000,-,Xkr4,Xkr4,Xkr4,NM,,
4,chr1,3536809,3583776,.,1000,+,EnsmblNR4698,exon,.,chr1,...,3583776,Gm1992,1000,+,Gm1992,Gm1992,Gm1992,antisense,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84244,chrY,90614769,90617133,.,1000,-,EnsmblNR4698,exon,.,chrY,...,90617133,Gm28300,1000,-,Gm28300,Gm28300,Gm28300,lincRNA,,
84245,chrY,90676614,90678894,.,1000,-,EnsmblNR4698,exon,.,chrY,...,90678894,Gm28301,1000,-,Gm28301,Gm28301,Gm28301,lincRNA,,
84246,chrY,90762408,90766319,.,1000,-,GB_RefseqNR2077,exon,.,chrY,...,90766736,G530011O06Riky,1000,-,G530011O06Rik@chrY(-),G530011O06Rik@chrY(-),G530011O06Rik@chrY(-),NR,,
84247,chrY,90796710,90827734,.,1000,+,GB_RefseqNM#GB_RefseqNR1159,exon,.,chrY,...,90827734,Erdr1y,1000,+,Erdr1,Erdr1,Erdr1,NM#NR,,


In [17]:
# Merge Deseq2_Master with all metadata tables
Deseq2_Master2 = pd.merge(Deseq2_Master, Gene_Type, left_on='Gene', right_on='Gene', how='left').fillna("NA")
Deseq2_Master3 = pd.merge(Deseq2_Master2, AHR_Master, left_on='Gene', right_on='Gene', how='left').fillna(0)
Deseq2_Master4 = pd.merge(Deseq2_Master3, pDREs_Master, left_on='Gene', right_on='Gene', how='left').fillna(0)

Deseq2_Master4[['pDRE', 'AHR']] = Deseq2_Master4[['pDRE', 'AHR']].astype(int)

Deseq2_Master_Working = Deseq2_Master4[['Gene', 'MGI_Symbol', 'Celltype', 'Dose','Type', 'pDRE', 'AHR',  'baseMean',
                                        'Chr','Start','End', 'Strand',  'NR_Annotation',
                                      'log2FoldChange', 'Fold-Change', 'lfcSE', 'stat', 'pvalue', 'padj']]
Deseq2_Master_Working


Unnamed: 0,Gene,MGI_Symbol,Celltype,Dose,Type,pDRE,AHR,baseMean,Chr,Start,End,Strand,NR_Annotation,log2FoldChange,Fold-Change,lfcSE,stat,pvalue,padj
0,lnc31752,Malat1,B Cells,0.01,lncRNA,0,2,5632.003911,chr19,5842642,5852983,-,lnc31752_Mascrna#Malat1,0.060929,1.043137,0.161392,0.377519,0.705788,1.0
1,lnc4822,.,B Cells,0.01,lncRNA,0,1,3146.387654,chr6,3181101,3202501,+,lnc4822,-0.368362,0.774662,0.385441,-0.955691,0.339228,1.0
2,Foxp1,9130401L11Rik,B Cells,0.01,NM,13,19,941.129495,chr6,98902302,99161922,-,,-0.079404,0.946448,0.359311,-0.220991,0.825099,1.0
3,Foxp1,6030492E11Rik,B Cells,0.01,NM,13,19,941.129495,chr6,98902302,99161922,-,,-0.079404,0.946448,0.359311,-0.220991,0.825099,1.0
4,Foxp1,Gm38865,B Cells,0.01,NM,13,19,941.129495,chr6,98902302,99161922,-,,-0.079404,0.946448,0.359311,-0.220991,0.825099,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1443131,lnc1440,.,pDCs,30,lncRNA,0,0,2.409755,chr2,69058214,69067863,-,lnc1440,-4.176886,0.055288,1.226191,-3.406392,0.000658,0.004671
1443132,lnc1686,Gm34484,pDCs,30,lncRNA,0,1,0.496732,chr2,124519234,124553367,-,lnc1686,3.818752,14.111033,4.487664,0.850944,0.3948,0.614526
1443133,lnc2311,Gm40040,pDCs,30,lncRNA,3,3,2.592510,chr3,41395983,41447665,-,lnc2311_Platr4Gm40040,-4.979600,0.031695,1.133711,-4.392302,0.000011,0.000141
1443134,lnc2311,Platr4,pDCs,30,lncRNA,3,3,2.592510,chr3,41395983,41447665,-,lnc2311_Platr4Gm40040,-4.979600,0.031695,1.133711,-4.392302,0.000011,0.000141


In [20]:
Deseq2_Master_Working.to_csv('./Results/03d_MASTER_Deseq2_Concatonated/RDDR_Deseq2_Master_Wald_with_MGI.txt', sep='\t')