# Determine 

### 1. Import Required Packages
### 2. Import Data
### 3. Subset Data by Celltype



## <br> 1. Import Required Packages

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

## <br> 2. Import Deseq2 Data

In [2]:
Pseudobulk_MASTER_Counts = pd.read_csv('./Results/pseudobulk_sums.txt', 
                                       delimiter = '\t',
                                       index_col=0, 
                                       header=0)

In [3]:
Pseudobulk_MASTER_Counts

Unnamed: 0,L001_LyECs,L001_Midzonal_LSECs,L001_PECs,L001_Pericentral_LSECs,L001_Pericentral_VECs,L001_Periportal_LSECs,L001_Periportal_VECs,L001_Unknown,L002_Midzonal_LSECs,L002_PECs,...,L137_Periportal_VECs,L137_Unknown,L138_LyECs,L138_Midzonal_LSECs,L138_PECs,L138_Pericentral_LSECs,L138_Pericentral_VECs,L138_Periportal_LSECs,L138_Periportal_VECs,L138_Unknown
0610005C13Rik,157,13,6,18,0,16,0,1,26,20,...,3,2,16,42,17,26,1,25,0,2
0610006L08Rik,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0610009B22Rik,2,0,0,0,0,1,0,0,2,0,...,0,0,0,1,0,0,0,2,0,0
0610009E02Rik,4,4,0,2,0,4,0,0,4,1,...,0,0,2,5,1,6,0,2,0,0
0610009L18Rik,0,1,0,0,0,0,0,0,3,1,...,0,1,0,0,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
mt-Nd3,0,0,0,1,0,1,0,0,1,0,...,0,0,0,4,0,1,0,3,0,1
mt-Nd4,2,0,0,2,0,2,0,0,6,2,...,1,5,1,10,3,11,0,10,2,3
mt-Nd4l,1,0,0,0,0,0,0,0,0,0,...,0,2,0,1,1,0,0,0,0,0
mt-Nd5,0,1,0,0,0,0,0,0,4,0,...,0,0,0,4,2,6,0,3,0,0


## <br> 3. Subset Data by Celltype

In [12]:
Pericentral_LSECs_Counts = Pseudobulk_MASTER_Counts.filter(like="Pericentral_LSECs")
Midzonal_LSECs_Counts = Pseudobulk_MASTER_Counts.filter(like="Midzonal_LSECs")
Periportal_LSECs_Counts = Pseudobulk_MASTER_Counts.filter(like="Periportal_LSECs")
PECs_Counts = Pseudobulk_MASTER_Counts.filter(like="PECs")
LyECs_Counts = Pseudobulk_MASTER_Counts.filter(like="LyECs")
Pericentral_VECs_Counts = Pseudobulk_MASTER_Counts.filter(like="Pericentral_VECs")
Unknown_Counts = Pseudobulk_MASTER_Counts.filter(like="Unknown")
Periportal_VECs_Counts = Pseudobulk_MASTER_Counts.filter(like="Periportal_VECs")


In [13]:
Pseudobulk_MASTER_Counts.filter(like="Midzonal_LSECs")

Unnamed: 0,L001_Midzonal_LSECs,L002_Midzonal_LSECs,L003_Midzonal_LSECs,L016_Midzonal_LSECs,L018_Midzonal_LSECs,L019_Midzonal_LSECs,L021_Midzonal_LSECs,L022_Midzonal_LSECs,L023_Midzonal_LSECs,L036_Midzonal_LSECs,...,L103_Midzonal_LSECs,L116_Midzonal_LSECs,L117_Midzonal_LSECs,L118_Midzonal_LSECs,L121_Midzonal_LSECs,L122_Midzonal_LSECs,L123_Midzonal_LSECs,L136_Midzonal_LSECs,L137_Midzonal_LSECs,L138_Midzonal_LSECs
0610005C13Rik,13,26,29,26,29,51,10,33,58,57,...,38,25,24,47,32,21,54,31,60,42
0610006L08Rik,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0610009B22Rik,0,2,0,2,2,0,0,0,5,0,...,0,0,0,1,0,0,2,3,1,1
0610009E02Rik,4,4,5,3,10,15,5,1,4,4,...,2,2,1,5,2,0,1,3,3,5
0610009L18Rik,1,3,1,1,0,0,0,2,2,0,...,0,0,0,1,1,1,3,1,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
mt-Nd3,0,1,3,0,3,7,0,1,1,1,...,9,3,0,8,0,3,4,0,8,4
mt-Nd4,0,6,16,5,15,16,2,9,12,0,...,50,6,3,60,3,3,15,10,29,10
mt-Nd4l,0,0,3,0,3,5,0,1,1,0,...,8,2,1,4,0,3,4,0,4,1
mt-Nd5,1,4,7,3,5,10,0,4,10,0,...,15,8,1,16,2,2,7,4,11,4


In [14]:
# List of dataframes
dataframes = [Pericentral_LSECs_Counts, Midzonal_LSECs_Counts, Periportal_LSECs_Counts, PECs_Counts, LyECs_Counts, 
              Pericentral_VECs_Counts, Unknown_Counts, Periportal_VECs_Counts]

# List of cell names
cell_names = ["Pericentral LSECs", "Midzonal LSECs", "Periportal LSECs", "PECs", "LyECs", 
              "Pericentral VECs", "Unknown", "Periportal VECs"]

# Create a dictionary to store the maximum values of each row for each cell type
max_values_dict = {}

for cell_df, cell_name in zip(dataframes, cell_names):
    # Find the maximum value of each row (axis=1) and store it in the dictionary
    max_values_dict[cell_name] = cell_df.max(axis=1)

# Create a new DataFrame using the dictionary
max_values_df = pd.DataFrame(max_values_dict)

max_values_df.to_csv("./Results/Max_Count_Per_Celltype.txt", index=True, sep='\t')

# Print the new DataFrame with cell names as column names and maximum values of each row
print(max_values_df)

               Pericentral LSECs  Midzonal LSECs  Periportal LSECs  PECs   
0610005C13Rik                 93              64                86    40  \
0610006L08Rik                  0               0                 0     0   
0610009B22Rik                  5               6                 3     5   
0610009E02Rik                 13              15                14     6   
0610009L18Rik                  4               6                 4     2   
...                          ...             ...               ...   ...   
mt-Nd3                        17               9                12     4   
mt-Nd4                        73              60                65    16   
mt-Nd4l                       14               8                13     4   
mt-Nd5                        23              16                22     3   
mt-Nd6                         5               4                 5     2   

               LyECs  Pericentral VECs  Unknown  Periportal VECs  
0610005C13Rik    276

In [15]:
# Calculate the percentile of each row relative to each column (cell type)
percentile_df = max_values_df.rank(axis=0, pct=True) * 100

percentile_df.to_csv("./Results/Max_Count_Gene_Percentile_Per_Celltype.txt", index=True, sep='\t')

# Print the resulting DataFrame
print(percentile_df)

               Pericentral LSECs  Midzonal LSECs  Periportal LSECs       PECs   
0610005C13Rik          86.241655       86.289654         88.353624  84.086050  \
0610006L08Rik           6.763538        7.293712          7.271894  11.118384   
0610009B22Rik          45.592791       50.914169         40.175416  54.141031   
0610009E02Rik          58.578784       64.857966         62.392547  56.663176   
0610009L18Rik          42.614653       50.914169         44.327355  41.264127   
...                          ...             ...               ...        ...   
mt-Nd3                 62.368547       56.695903         60.012218  51.127984   
mt-Nd4                 83.411878       85.543483         85.056945  71.268054   
mt-Nd4l                59.658769       54.917747         61.227473  51.127984   
mt-Nd5                 67.022298       66.016494         69.553170  47.137496   
mt-Nd6                 45.592791       45.051708         47.429856  41.264127   

                   LyECs  P

In [16]:
# Spot Check Data
max_count = max_values_df.loc["Kit"]
percentile = percentile_df.loc["Kit"]


# Print the resulting row
print(max_count)
print()
print(percentile)

Pericentral LSECs    1487
Midzonal LSECs        664
Periportal LSECs      264
PECs                  340
LyECs                 187
Pericentral VECs       16
Unknown                43
Periportal VECs         4
Name: Kit, dtype: int64

Pericentral LSECs    99.533098
Midzonal LSECs       99.277829
Periportal LSECs     96.618231
PECs                 98.752018
LyECs                94.593533
Pericentral VECs     97.408038
Unknown              98.341842
Periportal VECs      73.524021
Name: Kit, dtype: float64
