# Determine DEGs in Deseq2 Data

### 1. Import Required Packages
### 2. Import Data
### 3. Determine DEGs for each celltype and time


## <br> 1. Import Required Packages

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

Set figure parameters.

In [2]:
%config InlineBackend.print_figure_kwargs={'facecolor' : "w"}
%config InlineBackend.figure_format='retina'
pd.options.display.max_colwidth = 200
#plt.rcParams['font.sans-serif']=['Arial']
plt.rcParams['pdf.fonttype'] = 'truetype'

## <br> 2. Import Deseq2 Data

In [3]:
Deseq2_Master = pd.read_csv('./Results/Deseq2_Master_Wald-EC_CELLS_ONLY.txt', delimiter = '\t', index_col=0)

Deseq2_Master['Time'] = Deseq2_Master['Time'].astype('category')
Deseq2_Master['Time'] = Deseq2_Master['Time'].cat.reorder_categories([2,4,8,12,18,24,72])

In [4]:
Deseq2_Master

Unnamed: 0,Gene,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,Celltype,Time,Fold-Change
0,Gm42418,4648.083478,0.364905,0.483332,0.754978,0.450262,0.999125,LyECs,2,1.287797
1,Malat1,2629.622767,-0.212570,0.354544,-0.599557,0.548801,0.999125,LyECs,2,0.862999
2,Cmss1,774.181280,0.902527,0.577271,1.563438,0.117950,0.999125,LyECs,2,1.869337
3,Neat1,598.588051,0.155406,0.404867,0.383844,0.701094,0.999125,LyECs,2,1.113735
4,Dpyd,533.282076,-0.335522,0.366885,-0.914513,0.360447,0.999125,LyECs,2,0.792498
...,...,...,...,...,...,...,...,...,...,...
421647,Ubxn2a,1.071625,0.379140,1.356764,0.279444,0.779904,0.997212,Unknown,72,1.300566
421648,Utp11,0.892990,-3.091406,1.708405,-1.809527,0.070369,0.997212,Unknown,72,0.117326
421649,Vdac1,0.842426,1.551787,1.599392,0.970236,0.331929,0.997212,Unknown,72,2.931801
421650,Wdfy1,0.964786,0.870753,1.503533,0.579138,0.562496,0.997212,Unknown,72,1.828617


## <br> 3. Determine DEGs for each celltype and time

In [5]:
##############
# All DEGs
##############
LyECs_DEGs_All_Unique = Deseq2_Master[(Deseq2_Master['Celltype'] == 'LyECs') &
                                           ((Deseq2_Master['Fold-Change'] >= 1.5) | (Deseq2_Master['Fold-Change'] <= 1/1.5)) &
                                           (Deseq2_Master['padj'] <= 0.05)]

LyECs_DEGs_All_Unique_List = LyECs_DEGs_All_Unique[['Gene']].drop_duplicates(keep='first').copy()
LyECs_DEGs_All_Unique_List.sort_values(by='Gene', inplace=True)
LyECs_DEGs_All_Unique_List.to_csv('./Results/04b_All_DEGs/LyECs.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Induced Only
############################
LyECs_DEGs_Induced = Deseq2_Master[(Deseq2_Master['Celltype'] == 'LyECs') &
                               (Deseq2_Master['Fold-Change'] >= 1.5) &
                               (Deseq2_Master['padj'] <= 0.05)]

LyECs_DEGs_Induced_Counts = LyECs_DEGs_Induced.groupby('Time').size().reset_index(name='Induced')
LyECs_Induced_DEGs_list = LyECs_DEGs_Induced['Gene'].drop_duplicates(keep='first').sort_values()
LyECs_Induced_DEGs_list.to_csv('./Results/04b_All_DEGs/LyECs_Induced.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Induced in All 7 time points
############################
LyECs_Induced_All_counts = LyECs_DEGs_Induced['Gene'].value_counts()
LyECs_Induced_All_Results = LyECs_Induced_All_counts[LyECs_Induced_All_counts == 7]
LyECs_Induced_All_Results_List = pd.DataFrame(sorted(LyECs_Induced_All_Results.reset_index()['Gene']))
LyECs_Induced_All_Results_List.to_csv('./Results/04b_All_DEGs/LyECs_Induced_In_All_7_Timepoints.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Repressed Only
############################
LyECs_DEGs_Repressed = Deseq2_Master[(Deseq2_Master['Celltype'] == 'LyECs') &
                               (Deseq2_Master['Fold-Change'] <= 1/1.5) &
                               (Deseq2_Master['padj'] <= 0.05)]                                                                      

LyECs_DEGs_Repressed_Counts = LyECs_DEGs_Repressed.groupby('Time').size().reset_index(name='Repressed')   
LyECs_DEGs_Repressed_list = LyECs_DEGs_Repressed['Gene'].drop_duplicates(keep='first').sort_values()
LyECs_DEGs_Repressed_list.to_csv('./Results/04b_All_DEGs/LyECs_Repressed.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Repressed in All 7 time points
############################
LyECs_Repressed_All_counts = LyECs_DEGs_Repressed['Gene'].value_counts()
LyECs_Repressed_All_Results = LyECs_Repressed_All_counts[LyECs_Repressed_All_counts == 7]
LyECs_Repressed_All_Results_List = pd.DataFrame(sorted(LyECs_Repressed_All_Results.reset_index()['Gene']))
LyECs_Repressed_All_Results_List.to_csv('./Results/04b_All_DEGs/LyECs_Repressed_In_All_7_Timepoints.txt', sep='\t', index=False, header=False)



############################
## Make All DEG count Table
############################
LyECs_Counts = pd.concat([LyECs_DEGs_Induced_Counts, LyECs_DEGs_Repressed_Counts], axis=1)
LyECs_Counts = LyECs_Counts[['Time','Induced', 'Repressed']]


############################
#All Genes in Celltype(Background)
############################
LyECs_All_Genes = Deseq2_Master[(Deseq2_Master['Celltype'] == 'LyECs')]

LyECs_All_All_Genes = pd.DataFrame(LyECs_All_Genes['Gene'].drop_duplicates(keep='first').tolist())
LyECs_All_All_Genes.to_csv('./Results/04b_All_Genes_In_Celltype/LyECs_All_Genes.txt', sep='\t', index=False, header=False)



print("LyECs")
print("----------------------------")
print("Induced in all time points:", len(LyECs_Induced_All_Results))
print()
print("Repressed in all time points:", len(LyECs_Repressed_All_Results))
print()
print("Total number of DEGs:", len(LyECs_DEGs_All_Unique_List))
print("--- Induced:", len(LyECs_Induced_DEGs_list))
print("--- Repressed:", len(LyECs_DEGs_Repressed_list))
print()
print("All Genes in Celltype:",len(LyECs_All_All_Genes))
print()
print(LyECs_Counts)

LyECs
----------------------------
Induced in all time points: 3

Repressed in all time points: 0

Total number of DEGs: 89
--- Induced: 62
--- Repressed: 27

All Genes in Celltype: 9195

  Time Time  Induced  Repressed
0    2    2       21          3
1    4    4       37         14
2    8    8        7          0
3   12   12       17          2
4   18   18       11          1
5   24   24        9          0
6   72   72       14          7


In [9]:
##############
# All DEGs
##############
Midzonal_LSECs_DEGs_All_Unique = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Midzonal LSECs') &
                                           ((Deseq2_Master['Fold-Change'] >= 1.5) | (Deseq2_Master['Fold-Change'] <= 1/1.5)) &
                                           (Deseq2_Master['padj'] <= 0.05)]

Midzonal_LSECs_DEGs_All_Unique_List = Midzonal_LSECs_DEGs_All_Unique[['Gene']].drop_duplicates(keep='first').copy()
Midzonal_LSECs_DEGs_All_Unique_List.sort_values(by='Gene', inplace=True)
Midzonal_LSECs_DEGs_All_Unique_List.to_csv('./Results/04b_All_DEGs/Midzonal_LSECs.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Induced Only
############################
Midzonal_LSECs_DEGs_Induced = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Midzonal LSECs') &
                               (Deseq2_Master['Fold-Change'] >= 1.5) &
                               (Deseq2_Master['padj'] <= 0.05)]

Midzonal_LSECs_DEGs_Induced_Counts = Midzonal_LSECs_DEGs_Induced.groupby('Time').size().reset_index(name='Induced')
Midzonal_LSECs_Induced_DEGs_list = Midzonal_LSECs_DEGs_Induced['Gene'].drop_duplicates(keep='first').sort_values()
Midzonal_LSECs_Induced_DEGs_list.to_csv('./Results/04b_All_DEGs/Midzonal_LSECs_Induced.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Induced in All 7 time points
############################
Midzonal_LSECs_Induced_All_counts = Midzonal_LSECs_DEGs_Induced['Gene'].value_counts()
Midzonal_LSECs_Induced_All_Results = Midzonal_LSECs_Induced_All_counts[Midzonal_LSECs_Induced_All_counts == 7]
Midzonal_LSECs_Induced_All_Results_List = pd.DataFrame(sorted(Midzonal_LSECs_Induced_All_Results.reset_index()['Gene']))
Midzonal_LSECs_Induced_All_Results_List.to_csv('./Results/04b_All_DEGs/Midzonal_LSECs_Induced_In_All_7_Timepoints.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Repressed Only
############################
Midzonal_LSECs_DEGs_Repressed = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Midzonal LSECs') &
                               (Deseq2_Master['Fold-Change'] <= 1/1.5) &
                               (Deseq2_Master['padj'] <= 0.05)]                                                                      

Midzonal_LSECs_DEGs_Repressed_Counts = Midzonal_LSECs_DEGs_Repressed.groupby('Time').size().reset_index(name='Repressed')   
Midzonal_LSECs_DEGs_Repressed_list = Midzonal_LSECs_DEGs_Repressed['Gene'].drop_duplicates(keep='first').sort_values()
Midzonal_LSECs_DEGs_Repressed_list.to_csv('./Results/04b_All_DEGs/Midzonal_LSECs_Repressed.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Repressed in All 7 time points
############################
Midzonal_LSECs_Repressed_All_counts = Midzonal_LSECs_DEGs_Repressed['Gene'].value_counts()
Midzonal_LSECs_Repressed_All_Results = Midzonal_LSECs_Repressed_All_counts[Midzonal_LSECs_Repressed_All_counts == 7]
Midzonal_LSECs_Repressed_All_Results_List = pd.DataFrame(sorted(Midzonal_LSECs_Repressed_All_Results.reset_index()['Gene']))
Midzonal_LSECs_Repressed_All_Results_List.to_csv('./Results/04b_All_DEGs/Midzonal_LSECs_Repressed_In_All_7_Timepoints.txt', sep='\t', index=False, header=False)



############################
## Make All DEG count Table
############################
Midzonal_LSECs_Counts = pd.concat([Midzonal_LSECs_DEGs_Induced_Counts, Midzonal_LSECs_DEGs_Repressed_Counts], axis=1)
Midzonal_LSECs_Counts = Midzonal_LSECs_Counts[['Time','Induced', 'Repressed']]


############################
#All Genes in Celltype(Background)
############################
Midzonal_LSECs_All_Genes = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Midzonal LSECs')]

Midzonal_LSECs_All_All_Genes = pd.DataFrame(Midzonal_LSECs_All_Genes['Gene'].drop_duplicates(keep='first').tolist())
Midzonal_LSECs_All_All_Genes.to_csv('./Results/04b_All_Genes_In_Celltype/Midzonal_LSECs_All_Genes.txt', sep='\t', index=False, header=False)



print("Midzonal_LSECs")
print("----------------------------")
print("Induced in all time points:", len(Midzonal_LSECs_Induced_All_Results))
print()
print("Repressed in all time points:", len(Midzonal_LSECs_Repressed_All_Results))
print()
print("Total number of DEGs:", len(Midzonal_LSECs_DEGs_All_Unique_List))
print("--- Induced:", len(Midzonal_LSECs_Induced_DEGs_list))
print("--- Repressed:", len(Midzonal_LSECs_DEGs_Repressed_list))
print()
print("All Genes in Celltype:",len(Midzonal_LSECs_All_All_Genes))
print()
print(Midzonal_LSECs_Counts)

Midzonal_LSECs
----------------------------
Induced in all time points: 9

Repressed in all time points: 0

Total number of DEGs: 380
--- Induced: 240
--- Repressed: 141

All Genes in Celltype: 10583

  Time Time  Induced  Repressed
0    2    2      145         50
1    4    4      115         44
2    8    8       30          2
3   12   12       53         22
4   18   18       45          6
5   24   24       30          4
6   72   72       42         34


In [10]:
##############
# All DEGs
##############
PECs_DEGs_All_Unique = Deseq2_Master[(Deseq2_Master['Celltype'] == 'PECs') &
                                           ((Deseq2_Master['Fold-Change'] >= 1.5) | (Deseq2_Master['Fold-Change'] <= 1/1.5)) &
                                           (Deseq2_Master['padj'] <= 0.05)]

PECs_DEGs_All_Unique_List = PECs_DEGs_All_Unique[['Gene']].drop_duplicates(keep='first').copy()
PECs_DEGs_All_Unique_List.sort_values(by='Gene', inplace=True)
PECs_DEGs_All_Unique_List.to_csv('./Results/04b_All_DEGs/PECs.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Induced Only
############################
PECs_DEGs_Induced = Deseq2_Master[(Deseq2_Master['Celltype'] == 'PECs') &
                               (Deseq2_Master['Fold-Change'] >= 1.5) &
                               (Deseq2_Master['padj'] <= 0.05)]

PECs_DEGs_Induced_Counts = PECs_DEGs_Induced.groupby('Time').size().reset_index(name='Induced')
PECs_Induced_DEGs_list = PECs_DEGs_Induced['Gene'].drop_duplicates(keep='first').sort_values()
PECs_Induced_DEGs_list.to_csv('./Results/04b_All_DEGs/PECs_Induced.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Induced in All 7 time points
############################
PECs_Induced_All_counts = PECs_DEGs_Induced['Gene'].value_counts()
PECs_Induced_All_Results = PECs_Induced_All_counts[PECs_Induced_All_counts == 7]
PECs_Induced_All_Results_List = pd.DataFrame(sorted(PECs_Induced_All_Results.reset_index()['Gene']))
PECs_Induced_All_Results_List.to_csv('./Results/04b_All_DEGs/PECs_Induced_In_All_7_Timepoints.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Repressed Only
############################
PECs_DEGs_Repressed = Deseq2_Master[(Deseq2_Master['Celltype'] == 'PECs') &
                               (Deseq2_Master['Fold-Change'] <= 1/1.5) &
                               (Deseq2_Master['padj'] <= 0.05)]                                                                      

PECs_DEGs_Repressed_Counts = PECs_DEGs_Repressed.groupby('Time').size().reset_index(name='Repressed')   
PECs_DEGs_Repressed_list = PECs_DEGs_Repressed['Gene'].drop_duplicates(keep='first').sort_values()
PECs_DEGs_Repressed_list.to_csv('./Results/04b_All_DEGs/PECs_Repressed.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Repressed in All 7 time points
############################
PECs_Repressed_All_counts = PECs_DEGs_Repressed['Gene'].value_counts()
PECs_Repressed_All_Results = PECs_Repressed_All_counts[PECs_Repressed_All_counts == 7]
PECs_Repressed_All_Results_List = pd.DataFrame(sorted(PECs_Repressed_All_Results.reset_index()['Gene']))
PECs_Repressed_All_Results_List.to_csv('./Results/04b_All_DEGs/PECs_Repressed_In_All_7_Timepoints.txt', sep='\t', index=False, header=False)



############################
## Make All DEG count Table
############################
PECs_Counts = pd.concat([PECs_DEGs_Induced_Counts, PECs_DEGs_Repressed_Counts], axis=1)
PECs_Counts = PECs_Counts[['Time','Induced', 'Repressed']]


############################
#All Genes in Celltype(Background)
############################
PECs_All_Genes = Deseq2_Master[(Deseq2_Master['Celltype'] == 'PECs')]

PECs_All_All_Genes = pd.DataFrame(PECs_All_Genes['Gene'].drop_duplicates(keep='first').tolist())
PECs_All_All_Genes.to_csv('./Results/04b_All_Genes_In_Celltype/PECs_All_Genes.txt', sep='\t', index=False, header=False)



print("PECs")
print("----------------------------")
print("Induced in all time points:", len(PECs_Induced_All_Results))
print()
print("Repressed in all time points:", len(PECs_Repressed_All_Results))
print()
print("Total number of DEGs:", len(PECs_DEGs_All_Unique_List))
print("--- Induced:", len(PECs_Induced_DEGs_list))
print("--- Repressed:", len(PECs_DEGs_Repressed_list))
print()
print("All Genes in Celltype:",len(PECs_All_All_Genes))
print()
print(PECs_Counts)

PECs
----------------------------
Induced in all time points: 2

Repressed in all time points: 0

Total number of DEGs: 45
--- Induced: 39
--- Repressed: 6

All Genes in Celltype: 8407

  Time Time  Induced  Repressed
0    2    2       30          3
1    4    4       11          0
2    8    8        2          0
3   12   12        5          0
4   18   18        4          0
5   24   24        2          0
6   72   72        4          3


In [11]:
##############
# All DEGs
##############
Pericentral_LSECs_DEGs_All_Unique = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Pericentral LSECs') &
                                           ((Deseq2_Master['Fold-Change'] >= 1.5) | (Deseq2_Master['Fold-Change'] <= 1/1.5)) &
                                           (Deseq2_Master['padj'] <= 0.05)]

Pericentral_LSECs_DEGs_All_Unique_List = Pericentral_LSECs_DEGs_All_Unique[['Gene']].drop_duplicates(keep='first').copy()
Pericentral_LSECs_DEGs_All_Unique_List.sort_values(by='Gene', inplace=True)
Pericentral_LSECs_DEGs_All_Unique_List.to_csv('./Results/04b_All_DEGs/Pericentral_LSECs.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Induced Only
############################
Pericentral_LSECs_DEGs_Induced = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Pericentral LSECs') &
                               (Deseq2_Master['Fold-Change'] >= 1.5) &
                               (Deseq2_Master['padj'] <= 0.05)]

Pericentral_LSECs_DEGs_Induced_Counts = Pericentral_LSECs_DEGs_Induced.groupby('Time').size().reset_index(name='Induced')
Pericentral_LSECs_Induced_DEGs_list = Pericentral_LSECs_DEGs_Induced['Gene'].drop_duplicates(keep='first').sort_values()
Pericentral_LSECs_Induced_DEGs_list.to_csv('./Results/04b_All_DEGs/Pericentral_LSECs_Induced.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Induced in All 7 time points
############################
Pericentral_LSECs_Induced_All_counts = Pericentral_LSECs_DEGs_Induced['Gene'].value_counts()
Pericentral_LSECs_Induced_All_Results = Pericentral_LSECs_Induced_All_counts[Pericentral_LSECs_Induced_All_counts == 7]
Pericentral_LSECs_Induced_All_Results_List = pd.DataFrame(sorted(Pericentral_LSECs_Induced_All_Results.reset_index()['Gene']))
Pericentral_LSECs_Induced_All_Results_List.to_csv('./Results/04b_All_DEGs/Pericentral_LSECs_Induced_In_All_7_Timepoints.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Repressed Only
############################
Pericentral_LSECs_DEGs_Repressed = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Pericentral LSECs') &
                               (Deseq2_Master['Fold-Change'] <= 1/1.5) &
                               (Deseq2_Master['padj'] <= 0.05)]                                                                      

Pericentral_LSECs_DEGs_Repressed_Counts = Pericentral_LSECs_DEGs_Repressed.groupby('Time').size().reset_index(name='Repressed')   
Pericentral_LSECs_DEGs_Repressed_list = Pericentral_LSECs_DEGs_Repressed['Gene'].drop_duplicates(keep='first').sort_values()
Pericentral_LSECs_DEGs_Repressed_list.to_csv('./Results/04b_All_DEGs/Pericentral_LSECs_Repressed.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Repressed in All 7 time points
############################
Pericentral_LSECs_Repressed_All_counts = Pericentral_LSECs_DEGs_Repressed['Gene'].value_counts()
Pericentral_LSECs_Repressed_All_Results = Pericentral_LSECs_Repressed_All_counts[Pericentral_LSECs_Repressed_All_counts == 7]
Pericentral_LSECs_Repressed_All_Results_List = pd.DataFrame(sorted(Pericentral_LSECs_Repressed_All_Results.reset_index()['Gene']))
Pericentral_LSECs_Repressed_All_Results_List.to_csv('./Results/04b_All_DEGs/Pericentral_LSECs_Repressed_In_All_7_Timepoints.txt', sep='\t', index=False, header=False)



############################
## Make All DEG count Table
############################
Pericentral_LSECs_Counts = pd.concat([Pericentral_LSECs_DEGs_Induced_Counts, Pericentral_LSECs_DEGs_Repressed_Counts], axis=1)
Pericentral_LSECs_Counts = Pericentral_LSECs_Counts[['Time','Induced', 'Repressed']]


############################
#All Genes in Celltype(Background)
############################
Pericentral_LSECs_All_Genes = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Pericentral LSECs')]

Pericentral_LSECs_All_All_Genes = pd.DataFrame(Pericentral_LSECs_All_Genes['Gene'].drop_duplicates(keep='first').tolist())
Pericentral_LSECs_All_All_Genes.to_csv('./Results/04b_All_Genes_In_Celltype/Pericentral_LSECs_All_Genes.txt', sep='\t', index=False, header=False)



print("Pericentral_LSECs")
print("----------------------------")
print("Induced in all time points:", len(Pericentral_LSECs_Induced_All_Results))
print()
print("Repressed in all time points:", len(Pericentral_LSECs_Repressed_All_Results))
print()
print("Total number of DEGs:", len(Pericentral_LSECs_DEGs_All_Unique_List))
print("--- Induced:", len(Pericentral_LSECs_Induced_DEGs_list))
print("--- Repressed:", len(Pericentral_LSECs_DEGs_Repressed_list))
print()
print("All Genes in Celltype:",len(Pericentral_LSECs_All_All_Genes))
print()
print(Pericentral_LSECs_Counts)

Pericentral_LSECs
----------------------------
Induced in all time points: 11

Repressed in all time points: 0

Total number of DEGs: 619
--- Induced: 375
--- Repressed: 247

All Genes in Celltype: 11012

  Time Time  Induced  Repressed
0    2    2      282        127
1    4    4      142         76
2    8    8       46          8
3   12   12       62         33
4   18   18       50         11
5   24   24       32         10
6   72   72       47         39


In [12]:
##############
# All DEGs
##############
Pericentral_VECs_DEGs_All_Unique = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Pericentral VECs') &
                                           ((Deseq2_Master['Fold-Change'] >= 1.5) | (Deseq2_Master['Fold-Change'] <= 1/1.5)) &
                                           (Deseq2_Master['padj'] <= 0.05)]

Pericentral_VECs_DEGs_All_Unique_List = Pericentral_VECs_DEGs_All_Unique[['Gene']].drop_duplicates(keep='first').copy()
Pericentral_VECs_DEGs_All_Unique_List.sort_values(by='Gene', inplace=True)
Pericentral_VECs_DEGs_All_Unique_List.to_csv('./Results/04b_All_DEGs/Pericentral_VECs.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Induced Only
############################
Pericentral_VECs_DEGs_Induced = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Pericentral VECs') &
                               (Deseq2_Master['Fold-Change'] >= 1.5) &
                               (Deseq2_Master['padj'] <= 0.05)]

Pericentral_VECs_DEGs_Induced_Counts = Pericentral_VECs_DEGs_Induced.groupby('Time').size().reset_index(name='Induced')
Pericentral_VECs_Induced_DEGs_list = Pericentral_VECs_DEGs_Induced['Gene'].drop_duplicates(keep='first').sort_values()
Pericentral_VECs_Induced_DEGs_list.to_csv('./Results/04b_All_DEGs/Pericentral_VECs_Induced.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Induced in All 7 time points
############################
Pericentral_VECs_Induced_All_counts = Pericentral_VECs_DEGs_Induced['Gene'].value_counts()
Pericentral_VECs_Induced_All_Results = Pericentral_VECs_Induced_All_counts[Pericentral_VECs_Induced_All_counts == 7]
Pericentral_VECs_Induced_All_Results_List = pd.DataFrame(sorted(Pericentral_VECs_Induced_All_Results.reset_index()['Gene']))
Pericentral_VECs_Induced_All_Results_List.to_csv('./Results/04b_All_DEGs/Pericentral_VECs_Induced_In_All_7_Timepoints.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Repressed Only
############################
Pericentral_VECs_DEGs_Repressed = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Pericentral VECs') &
                               (Deseq2_Master['Fold-Change'] <= 1/1.5) &
                               (Deseq2_Master['padj'] <= 0.05)]                                                                      

Pericentral_VECs_DEGs_Repressed_Counts = Pericentral_VECs_DEGs_Repressed.groupby('Time').size().reset_index(name='Repressed')   
Pericentral_VECs_DEGs_Repressed_list = Pericentral_VECs_DEGs_Repressed['Gene'].drop_duplicates(keep='first').sort_values()
Pericentral_VECs_DEGs_Repressed_list.to_csv('./Results/04b_All_DEGs/Pericentral_VECs_Repressed.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Repressed in All 7 time points
############################
Pericentral_VECs_Repressed_All_counts = Pericentral_VECs_DEGs_Repressed['Gene'].value_counts()
Pericentral_VECs_Repressed_All_Results = Pericentral_VECs_Repressed_All_counts[Pericentral_VECs_Repressed_All_counts == 7]
Pericentral_VECs_Repressed_All_Results_List = pd.DataFrame(sorted(Pericentral_VECs_Repressed_All_Results.reset_index()['Gene']))
Pericentral_VECs_Repressed_All_Results_List.to_csv('./Results/04b_All_DEGs/Pericentral_VECs_Repressed_In_All_7_Timepoints.txt', sep='\t', index=False, header=False)



############################
## Make All DEG count Table
############################
Pericentral_VECs_Counts = pd.concat([Pericentral_VECs_DEGs_Induced_Counts, Pericentral_VECs_DEGs_Repressed_Counts], axis=1)
Pericentral_VECs_Counts = Pericentral_VECs_Counts[['Time','Induced', 'Repressed']]


############################
#All Genes in Celltype(Background)
############################
Pericentral_VECs_All_Genes = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Pericentral VECs')]

Pericentral_VECs_All_All_Genes = pd.DataFrame(Pericentral_VECs_All_Genes['Gene'].drop_duplicates(keep='first').tolist())
Pericentral_VECs_All_All_Genes.to_csv('./Results/04b_All_Genes_In_Celltype/Pericentral_VECs_All_Genes.txt', sep='\t', index=False, header=False)



print("Pericentral_VECs")
print("----------------------------")
print("Induced in all time points:", len(Pericentral_VECs_Induced_All_Results))
print()
print("Repressed in all time points:", len(Pericentral_VECs_Repressed_All_Results))
print()
print("Total number of DEGs:", len(Pericentral_VECs_DEGs_All_Unique_List))
print("--- Induced:", len(Pericentral_VECs_Induced_DEGs_list))
print("--- Repressed:", len(Pericentral_VECs_DEGs_Repressed_list))
print()
print("All Genes in Celltype:",len(Pericentral_VECs_All_All_Genes))
print()
print(Pericentral_VECs_Counts)

Pericentral_VECs
----------------------------
Induced in all time points: 0

Repressed in all time points: 0

Total number of DEGs: 1
--- Induced: 1
--- Repressed: 0

All Genes in Celltype: 2018

  Time Time  Induced  Repressed
0    2    2        0          0
1    4    4        1          0
2    8    8        1          0
3   12   12        1          0
4   18   18        0          0
5   24   24        1          0
6   72   72        1          0


In [14]:
##############
# All DEGs
##############
Periportal_LSECs_DEGs_All_Unique = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Periportal LSECs') &
                                           ((Deseq2_Master['Fold-Change'] >= 1.5) | (Deseq2_Master['Fold-Change'] <= 1/1.5)) &
                                           (Deseq2_Master['padj'] <= 0.05)]

Periportal_LSECs_DEGs_All_Unique_List = Periportal_LSECs_DEGs_All_Unique[['Gene']].drop_duplicates(keep='first').copy()
Periportal_LSECs_DEGs_All_Unique_List.sort_values(by='Gene', inplace=True)
Periportal_LSECs_DEGs_All_Unique_List.to_csv('./Results/04b_All_DEGs/Periportal_LSECs.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Induced Only
############################
Periportal_LSECs_DEGs_Induced = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Periportal LSECs') &
                               (Deseq2_Master['Fold-Change'] >= 1.5) &
                               (Deseq2_Master['padj'] <= 0.05)]

Periportal_LSECs_DEGs_Induced_Counts = Periportal_LSECs_DEGs_Induced.groupby('Time').size().reset_index(name='Induced')
Periportal_LSECs_Induced_DEGs_list = Periportal_LSECs_DEGs_Induced['Gene'].drop_duplicates(keep='first').sort_values()
Periportal_LSECs_Induced_DEGs_list.to_csv('./Results/04b_All_DEGs/Periportal_LSECs_Induced.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Induced in All 7 time points
############################
Periportal_LSECs_Induced_All_counts = Periportal_LSECs_DEGs_Induced['Gene'].value_counts()
Periportal_LSECs_Induced_All_Results = Periportal_LSECs_Induced_All_counts[Periportal_LSECs_Induced_All_counts == 7]
Periportal_LSECs_Induced_All_Results_List = pd.DataFrame(sorted(Periportal_LSECs_Induced_All_Results.reset_index()['Gene']))
Periportal_LSECs_Induced_All_Results_List.to_csv('./Results/04b_All_DEGs/Periportal_LSECs_Induced_In_All_7_Timepoints.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Repressed Only
############################
Periportal_LSECs_DEGs_Repressed = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Periportal LSECs') &
                               (Deseq2_Master['Fold-Change'] <= 1/1.5) &
                               (Deseq2_Master['padj'] <= 0.05)]                                                                      

Periportal_LSECs_DEGs_Repressed_Counts = Periportal_LSECs_DEGs_Repressed.groupby('Time').size().reset_index(name='Repressed')   
Periportal_LSECs_DEGs_Repressed_list = Periportal_LSECs_DEGs_Repressed['Gene'].drop_duplicates(keep='first').sort_values()
Periportal_LSECs_DEGs_Repressed_list.to_csv('./Results/04b_All_DEGs/Periportal_LSECs_Repressed.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Repressed in All 7 time points
############################
Periportal_LSECs_Repressed_All_counts = Periportal_LSECs_DEGs_Repressed['Gene'].value_counts()
Periportal_LSECs_Repressed_All_Results = Periportal_LSECs_Repressed_All_counts[Periportal_LSECs_Repressed_All_counts == 7]
Periportal_LSECs_Repressed_All_Results_List = pd.DataFrame(sorted(Periportal_LSECs_Repressed_All_Results.reset_index()['Gene']))
Periportal_LSECs_Repressed_All_Results_List.to_csv('./Results/04b_All_DEGs/Periportal_LSECs_Repressed_In_All_7_Timepoints.txt', sep='\t', index=False, header=False)



############################
## Make All DEG count Table
############################
Periportal_LSECs_Counts = pd.concat([Periportal_LSECs_DEGs_Induced_Counts, Periportal_LSECs_DEGs_Repressed_Counts], axis=1)
Periportal_LSECs_Counts = Periportal_LSECs_Counts[['Time','Induced', 'Repressed']]


############################
#All Genes in Celltype(Background)
############################
Periportal_LSECs_All_Genes = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Periportal LSECs')]

Periportal_LSECs_All_All_Genes = pd.DataFrame(Periportal_LSECs_All_Genes['Gene'].drop_duplicates(keep='first').tolist())
Periportal_LSECs_All_All_Genes.to_csv('./Results/04b_All_Genes_In_Celltype/Periportal_LSECs_All_Genes.txt', sep='\t', index=False, header=False)



print("Periportal_LSECs")
print("----------------------------")
print("Induced in all time points:", len(Periportal_LSECs_Induced_All_Results))
print()
print("Repressed in all time points:", len(Periportal_LSECs_Repressed_All_Results))
print()
print("Total number of DEGs:", len(Periportal_LSECs_DEGs_All_Unique_List))
print("--- Induced:", len(Periportal_LSECs_Induced_DEGs_list))
print("--- Repressed:", len(Periportal_LSECs_DEGs_Repressed_list))
print()
print("All Genes in Celltype:",len(Periportal_LSECs_All_All_Genes))
print()
print(Periportal_LSECs_Counts)

Periportal_LSECs
----------------------------
Induced in all time points: 6

Repressed in all time points: 0

Total number of DEGs: 370
--- Induced: 239
--- Repressed: 133

All Genes in Celltype: 10636

  Time Time  Induced  Repressed
0    2    2      154         44
1    4    4      104         34
2    8    8       21          3
3   12   12       50         27
4   18   18       25          7
5   24   24       20          2
6   72   72       36         34


In [15]:
##############
# All DEGs
##############
Periportal_VECs_DEGs_All_Unique = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Periportal VECs') &
                                           ((Deseq2_Master['Fold-Change'] >= 1.5) | (Deseq2_Master['Fold-Change'] <= 1/1.5)) &
                                           (Deseq2_Master['padj'] <= 0.05)]

Periportal_VECs_DEGs_All_Unique_List = Periportal_VECs_DEGs_All_Unique[['Gene']].drop_duplicates(keep='first').copy()
Periportal_VECs_DEGs_All_Unique_List.sort_values(by='Gene', inplace=True)
Periportal_VECs_DEGs_All_Unique_List.to_csv('./Results/04b_All_DEGs/Periportal_VECs.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Induced Only
############################
Periportal_VECs_DEGs_Induced = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Periportal VECs') &
                               (Deseq2_Master['Fold-Change'] >= 1.5) &
                               (Deseq2_Master['padj'] <= 0.05)]

Periportal_VECs_DEGs_Induced_Counts = Periportal_VECs_DEGs_Induced.groupby('Time').size().reset_index(name='Induced')
Periportal_VECs_Induced_DEGs_list = Periportal_VECs_DEGs_Induced['Gene'].drop_duplicates(keep='first').sort_values()
Periportal_VECs_Induced_DEGs_list.to_csv('./Results/04b_All_DEGs/Periportal_VECs_Induced.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Induced in All 7 time points
############################
Periportal_VECs_Induced_All_counts = Periportal_VECs_DEGs_Induced['Gene'].value_counts()
Periportal_VECs_Induced_All_Results = Periportal_VECs_Induced_All_counts[Periportal_VECs_Induced_All_counts == 7]
Periportal_VECs_Induced_All_Results_List = pd.DataFrame(sorted(Periportal_VECs_Induced_All_Results.reset_index()['Gene']))
Periportal_VECs_Induced_All_Results_List.to_csv('./Results/04b_All_DEGs/Periportal_VECs_Induced_In_All_7_Timepoints.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Repressed Only
############################
Periportal_VECs_DEGs_Repressed = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Periportal VECs') &
                               (Deseq2_Master['Fold-Change'] <= 1/1.5) &
                               (Deseq2_Master['padj'] <= 0.05)]                                                                      

Periportal_VECs_DEGs_Repressed_Counts = Periportal_VECs_DEGs_Repressed.groupby('Time').size().reset_index(name='Repressed')   
Periportal_VECs_DEGs_Repressed_list = Periportal_VECs_DEGs_Repressed['Gene'].drop_duplicates(keep='first').sort_values()
Periportal_VECs_DEGs_Repressed_list.to_csv('./Results/04b_All_DEGs/Periportal_VECs_Repressed.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Repressed in All 7 time points
############################
Periportal_VECs_Repressed_All_counts = Periportal_VECs_DEGs_Repressed['Gene'].value_counts()
Periportal_VECs_Repressed_All_Results = Periportal_VECs_Repressed_All_counts[Periportal_VECs_Repressed_All_counts == 7]
Periportal_VECs_Repressed_All_Results_List = pd.DataFrame(sorted(Periportal_VECs_Repressed_All_Results.reset_index()['Gene']))
Periportal_VECs_Repressed_All_Results_List.to_csv('./Results/04b_All_DEGs/Periportal_VECs_Repressed_In_All_7_Timepoints.txt', sep='\t', index=False, header=False)



############################
## Make All DEG count Table
############################
Periportal_VECs_Counts = pd.concat([Periportal_VECs_DEGs_Induced_Counts, Periportal_VECs_DEGs_Repressed_Counts], axis=1)
Periportal_VECs_Counts = Periportal_VECs_Counts[['Time','Induced', 'Repressed']]


############################
#All Genes in Celltype(Background)
############################
Periportal_VECs_All_Genes = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Periportal VECs')]

Periportal_VECs_All_All_Genes = pd.DataFrame(Periportal_VECs_All_Genes['Gene'].drop_duplicates(keep='first').tolist())
Periportal_VECs_All_All_Genes.to_csv('./Results/04b_All_Genes_In_Celltype/Periportal_VECs_All_Genes.txt', sep='\t', index=False, header=False)



print("Periportal_VECs")
print("----------------------------")
print("Induced in all time points:", len(Periportal_VECs_Induced_All_Results))
print()
print("Repressed in all time points:", len(Periportal_VECs_Repressed_All_Results))
print()
print("Total number of DEGs:", len(Periportal_VECs_DEGs_All_Unique_List))
print("--- Induced:", len(Periportal_VECs_Induced_DEGs_list))
print("--- Repressed:", len(Periportal_VECs_DEGs_Repressed_list))
print()
print("All Genes in Celltype:",len(Periportal_VECs_All_All_Genes))
print()
print(Periportal_VECs_Counts)

Periportal_VECs
----------------------------
Induced in all time points: 0

Repressed in all time points: 0

Total number of DEGs: 5
--- Induced: 5
--- Repressed: 0

All Genes in Celltype: 3941

  Time Time  Induced  Repressed
0    2    2        3          0
1    4    4        0          0
2    8    8        1          0
3   12   12        1          0
4   18   18        1          0
5   24   24        0          0
6   72   72        1          0


In [16]:
##############
# All DEGs
##############
Unknown_DEGs_All_Unique = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Unknown') &
                                           ((Deseq2_Master['Fold-Change'] >= 1.5) | (Deseq2_Master['Fold-Change'] <= 1/1.5)) &
                                           (Deseq2_Master['padj'] <= 0.05)]

Unknown_DEGs_All_Unique_List = Unknown_DEGs_All_Unique[['Gene']].drop_duplicates(keep='first').copy()
Unknown_DEGs_All_Unique_List.sort_values(by='Gene', inplace=True)
Unknown_DEGs_All_Unique_List.to_csv('./Results/04b_All_DEGs/Unknown.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Induced Only
############################
Unknown_DEGs_Induced = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Unknown') &
                               (Deseq2_Master['Fold-Change'] >= 1.5) &
                               (Deseq2_Master['padj'] <= 0.05)]

Unknown_DEGs_Induced_Counts = Unknown_DEGs_Induced.groupby('Time').size().reset_index(name='Induced')
Unknown_Induced_DEGs_list = Unknown_DEGs_Induced['Gene'].drop_duplicates(keep='first').sort_values()
Unknown_Induced_DEGs_list.to_csv('./Results/04b_All_DEGs/Unknown_Induced.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Induced in All 7 time points
############################
Unknown_Induced_All_counts = Unknown_DEGs_Induced['Gene'].value_counts()
Unknown_Induced_All_Results = Unknown_Induced_All_counts[Unknown_Induced_All_counts == 7]
Unknown_Induced_All_Results_List = pd.DataFrame(sorted(Unknown_Induced_All_Results.reset_index()['Gene']))
Unknown_Induced_All_Results_List.to_csv('./Results/04b_All_DEGs/Unknown_Induced_In_All_7_Timepoints.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Repressed Only
############################
Unknown_DEGs_Repressed = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Unknown') &
                               (Deseq2_Master['Fold-Change'] <= 1/1.5) &
                               (Deseq2_Master['padj'] <= 0.05)]                                                                      

Unknown_DEGs_Repressed_Counts = Unknown_DEGs_Repressed.groupby('Time').size().reset_index(name='Repressed')   
Unknown_DEGs_Repressed_list = Unknown_DEGs_Repressed['Gene'].drop_duplicates(keep='first').sort_values()
Unknown_DEGs_Repressed_list.to_csv('./Results/04b_All_DEGs/Unknown_Repressed.txt', sep='\t', index=False, header=False)


############################
## All DEGs - Repressed in All 7 time points
############################
Unknown_Repressed_All_counts = Unknown_DEGs_Repressed['Gene'].value_counts()
Unknown_Repressed_All_Results = Unknown_Repressed_All_counts[Unknown_Repressed_All_counts == 7]
Unknown_Repressed_All_Results_List = pd.DataFrame(sorted(Unknown_Repressed_All_Results.reset_index()['Gene']))
Unknown_Repressed_All_Results_List.to_csv('./Results/04b_All_DEGs/Unknown_Repressed_In_All_7_Timepoints.txt', sep='\t', index=False, header=False)



############################
## Make All DEG count Table
############################
Unknown_Counts = pd.concat([Unknown_DEGs_Induced_Counts, Unknown_DEGs_Repressed_Counts], axis=1)
Unknown_Counts = Unknown_Counts[['Time','Induced', 'Repressed']]


############################
#All Genes in Celltype(Background)
############################
Unknown_All_Genes = Deseq2_Master[(Deseq2_Master['Celltype'] == 'Unknown')]

Unknown_All_All_Genes = pd.DataFrame(Unknown_All_Genes['Gene'].drop_duplicates(keep='first').tolist())
Unknown_All_All_Genes.to_csv('./Results/04b_All_Genes_In_Celltype/Unknown_All_Genes.txt', sep='\t', index=False, header=False)



print("Unknown")
print("----------------------------")
print("Induced in all time points:", len(Unknown_Induced_All_Results))
print()
print("Repressed in all time points:", len(Unknown_Repressed_All_Results))
print()
print("Total number of DEGs:", len(Unknown_DEGs_All_Unique_List))
print("--- Induced:", len(Unknown_Induced_DEGs_list))
print("--- Repressed:", len(Unknown_DEGs_Repressed_list))
print()
print("All Genes in Celltype:",len(Unknown_All_All_Genes))
print()
print(Unknown_Counts)

Unknown
----------------------------
Induced in all time points: 0

Repressed in all time points: 0

Total number of DEGs: 3
--- Induced: 3
--- Repressed: 0

All Genes in Celltype: 4444

  Time Time  Induced  Repressed
0    2    2        0          0
1    4    4        1          0
2    8    8        0          0
3   12   12        0          0
4   18   18        3          0
5   24   24        0          0
6   72   72        0          0
