# GSEApy Analysis

### 1. Import Required Packages
### 2. Import GSEA Data
### 3. Concatonate All Data into One Master File


## <br> 1. Import Required Packages

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import anndata as ad
import seaborn as sns
import gseapy as gp

from scipy import sparse
from anndata import AnnData
from anndata.experimental.multi_files import AnnCollection

Set figure parameters.

In [2]:
sc.set_figure_params(figsize=(6,6))
%config InlineBackend.print_figure_kwargs={'facecolor' : "w"}
%config InlineBackend.figure_format='retina'
pd.options.display.max_colwidth = 200
#plt.rcParams['font.sans-serif']=['Arial']
plt.rcParams['pdf.fonttype'] = 'truetype'

## <br> 2. Import GSEA Data

In [4]:
GSEA_2_BCell = pd.read_csv('./GSEApy_Data/2_BCell.csv')
GSEA_2_Cholangiocyte = pd.read_csv('./GSEApy_Data/2_Cholangiocyte.csv')
GSEA_2_Hepatocyte = pd.read_csv('./GSEApy_Data/2_Hepatocyte.csv')
GSEA_2_HSC = pd.read_csv('./GSEApy_Data/2_HSC.csv')
GSEA_2_KC = pd.read_csv('./GSEApy_Data/2_KC.csv')
GSEA_2_EC = pd.read_csv('./GSEApy_Data/2_EC.csv')
GSEA_2_Neutrophil = pd.read_csv('./GSEApy_Data/2_Neutrophil.csv')
GSEA_2_pDC = pd.read_csv('./GSEApy_Data/2_pDC.csv')
GSEA_2_PF = pd.read_csv('./GSEApy_Data/2_PF.csv')
GSEA_2_TCell = pd.read_csv('./GSEApy_Data/2_TCell.csv')
GSEA_4_BCell = pd.read_csv('./GSEApy_Data/4_BCell.csv')
GSEA_4_Cholangiocyte = pd.read_csv('./GSEApy_Data/4_Cholangiocyte.csv')
GSEA_4_Hepatocyte = pd.read_csv('./GSEApy_Data/4_Hepatocyte.csv')
GSEA_4_HSC = pd.read_csv('./GSEApy_Data/4_HSC.csv')
GSEA_4_KC = pd.read_csv('./GSEApy_Data/4_KC.csv')
GSEA_4_EC = pd.read_csv('./GSEApy_Data/4_EC.csv')
GSEA_4_Neutrophil = pd.read_csv('./GSEApy_Data/4_Neutrophil.csv')
GSEA_4_pDC = pd.read_csv('./GSEApy_Data/4_pDC.csv')
GSEA_4_PF = pd.read_csv('./GSEApy_Data/4_PF.csv')
GSEA_4_TCell = pd.read_csv('./GSEApy_Data/4_TCell.csv')
GSEA_8_BCell = pd.read_csv('./GSEApy_Data/8_BCell.csv')
GSEA_8_Cholangiocyte = pd.read_csv('./GSEApy_Data/8_Cholangiocyte.csv')
GSEA_8_Hepatocyte = pd.read_csv('./GSEApy_Data/8_Hepatocyte.csv')
GSEA_8_HSC = pd.read_csv('./GSEApy_Data/8_HSC.csv')
GSEA_8_KC = pd.read_csv('./GSEApy_Data/8_KC.csv')
GSEA_8_EC = pd.read_csv('./GSEApy_Data/8_EC.csv')
GSEA_8_Neutrophil = pd.read_csv('./GSEApy_Data/8_Neutrophil.csv')
GSEA_8_pDC = pd.read_csv('./GSEApy_Data/8_pDC.csv')
GSEA_8_PF = pd.read_csv('./GSEApy_Data/8_PF.csv')
GSEA_8_TCell = pd.read_csv('./GSEApy_Data/8_TCell.csv')
GSEA_12_BCell = pd.read_csv('./GSEApy_Data/12_BCell.csv')
GSEA_12_Cholangiocyte = pd.read_csv('./GSEApy_Data/12_Cholangiocyte.csv')
GSEA_12_Hepatocyte = pd.read_csv('./GSEApy_Data/12_Hepatocyte.csv')
GSEA_12_HSC = pd.read_csv('./GSEApy_Data/12_HSC.csv')
GSEA_12_KC = pd.read_csv('./GSEApy_Data/12_KC.csv')
GSEA_12_EC = pd.read_csv('./GSEApy_Data/12_EC.csv')
GSEA_12_Neutrophil = pd.read_csv('./GSEApy_Data/12_Neutrophil.csv')
GSEA_12_pDC = pd.read_csv('./GSEApy_Data/12_pDC.csv')
GSEA_12_PF = pd.read_csv('./GSEApy_Data/12_PF.csv')
GSEA_12_TCell = pd.read_csv('./GSEApy_Data/12_TCell.csv')
GSEA_18_BCell = pd.read_csv('./GSEApy_Data/18_BCell.csv')
GSEA_18_Cholangiocyte = pd.read_csv('./GSEApy_Data/18_Cholangiocyte.csv')
GSEA_18_Hepatocyte = pd.read_csv('./GSEApy_Data/18_Hepatocyte.csv')
GSEA_18_HSC = pd.read_csv('./GSEApy_Data/18_HSC.csv')
GSEA_18_KC = pd.read_csv('./GSEApy_Data/18_KC.csv')
GSEA_18_EC = pd.read_csv('./GSEApy_Data/18_EC.csv')
GSEA_18_Neutrophil = pd.read_csv('./GSEApy_Data/18_Neutrophil.csv')
GSEA_18_pDC = pd.read_csv('./GSEApy_Data/18_pDC.csv')
GSEA_18_PF = pd.read_csv('./GSEApy_Data/18_PF.csv')
GSEA_18_TCell = pd.read_csv('./GSEApy_Data/18_TCell.csv')
GSEA_24_BCell = pd.read_csv('./GSEApy_Data/24_BCell.csv')
GSEA_24_Cholangiocyte = pd.read_csv('./GSEApy_Data/24_Cholangiocyte.csv')
GSEA_24_Hepatocyte = pd.read_csv('./GSEApy_Data/24_Hepatocyte.csv')
GSEA_24_HSC = pd.read_csv('./GSEApy_Data/24_HSC.csv')
GSEA_24_KC = pd.read_csv('./GSEApy_Data/24_KC.csv')
GSEA_24_EC = pd.read_csv('./GSEApy_Data/24_EC.csv')
GSEA_24_Neutrophil = pd.read_csv('./GSEApy_Data/24_Neutrophil.csv')
GSEA_24_pDC = pd.read_csv('./GSEApy_Data/24_pDC.csv')
GSEA_24_PF = pd.read_csv('./GSEApy_Data/24_PF.csv')
GSEA_24_TCell = pd.read_csv('./GSEApy_Data/24_TCell.csv')
GSEA_72_BCell = pd.read_csv('./GSEApy_Data/72_BCell.csv')
GSEA_72_Cholangiocyte = pd.read_csv('./GSEApy_Data/72_Cholangiocyte.csv')
GSEA_72_Hepatocyte = pd.read_csv('./GSEApy_Data/72_Hepatocyte.csv')
GSEA_72_HSC = pd.read_csv('./GSEApy_Data/72_HSC.csv')
GSEA_72_KC = pd.read_csv('./GSEApy_Data/72_KC.csv')
GSEA_72_EC = pd.read_csv('./GSEApy_Data/72_EC.csv')
GSEA_72_Neutrophil = pd.read_csv('./GSEApy_Data/72_Neutrophil.csv')
GSEA_72_pDC = pd.read_csv('./GSEApy_Data/72_pDC.csv')
GSEA_72_PF = pd.read_csv('./GSEApy_Data/72_PF.csv')
GSEA_72_TCell = pd.read_csv('./GSEApy_Data/72_TCell.csv')

FileNotFoundError: [Errno 2] No such file or directory: './GSEApy_Data/72_BCell.csv'

In [4]:
GSEA_2_BCell['Time'] = '2'
GSEA_2_Cholangiocyte['Time'] = '2'
GSEA_2_Hepatocyte['Time'] = '2'
GSEA_2_HSC['Time'] = '2'
GSEA_2_KC['Time'] = '2'
GSEA_2_EC['Time'] = '2'
GSEA_2_Neutrophil['Time'] = '2'
GSEA_2_pDC['Time'] = '2'
GSEA_2_PF['Time'] = '2'
GSEA_2_TCell['Time'] = '2'
GSEA_4_BCell['Time'] = '4'
GSEA_4_Cholangiocyte['Time'] = '4'
GSEA_4_Hepatocyte['Time'] = '4'
GSEA_4_HSC['Time'] = '4'
GSEA_4_KC['Time'] = '4'
GSEA_4_EC['Time'] = '4'
GSEA_4_Neutrophil['Time'] = '4'
GSEA_4_pDC['Time'] = '4'
GSEA_4_PF['Time'] = '4'
GSEA_4_TCell['Time'] = '4'
GSEA_8_BCell['Time'] = '8'
GSEA_8_Cholangiocyte['Time'] = '8'
GSEA_8_Hepatocyte['Time'] = '8'
GSEA_8_HSC['Time'] = '8'
GSEA_8_KC['Time'] = '8'
GSEA_8_EC['Time'] = '8'
GSEA_8_Neutrophil['Time'] = '8'
GSEA_8_pDC['Time'] = '8'
GSEA_8_PF['Time'] = '8'
GSEA_8_TCell['Time'] = '8'
GSEA_12_BCell['Time'] = '12'
GSEA_12_Cholangiocyte['Time'] = '12'
GSEA_12_Hepatocyte['Time'] = '12'
GSEA_12_HSC['Time'] = '12'
GSEA_12_KC['Time'] = '12'
GSEA_12_EC['Time'] = '12'
GSEA_12_Neutrophil['Time'] = '12'
GSEA_12_pDC['Time'] = '12'
GSEA_12_PF['Time'] = '12'
GSEA_12_TCell['Time'] = '12'
GSEA_18_BCell['Time'] = '18'
GSEA_18_Cholangiocyte['Time'] = '18'
GSEA_18_Hepatocyte['Time'] = '18'
GSEA_18_HSC['Time'] = '18'
GSEA_18_KC['Time'] = '18'
GSEA_18_EC['Time'] = '18'
GSEA_18_Neutrophil['Time'] = '18'
GSEA_18_pDC['Time'] = '18'
GSEA_18_PF['Time'] = '18'
GSEA_18_TCell['Time'] = '18'
GSEA_24_BCell['Time'] = '24'
GSEA_24_Cholangiocyte['Time'] = '24'
GSEA_24_Hepatocyte['Time'] = '24'
GSEA_24_HSC['Time'] = '24'
GSEA_24_KC['Time'] = '24'
GSEA_24_EC['Time'] = '24'
GSEA_24_Neutrophil['Time'] = '24'
GSEA_24_pDC['Time'] = '24'
GSEA_24_PF['Time'] = '24'
GSEA_24_TCell['Time'] = '24'
GSEA_72_BCell['Time'] = '72'
GSEA_72_Cholangiocyte['Time'] = '72'
GSEA_72_Hepatocyte['Time'] = '72'
GSEA_72_HSC['Time'] = '72'
GSEA_72_KC['Time'] = '72'
GSEA_72_EC['Time'] = '72'
GSEA_72_Neutrophil['Time'] = '72'
GSEA_72_pDC['Time'] = '72'
GSEA_72_PF['Time'] = '72'
GSEA_72_TCell['Time'] = '72'

In [5]:
GSEA_2_BCell['Celltype'] = 'B_Cell'
GSEA_2_Cholangiocyte['Celltype'] = 'Cholangiocyte'
GSEA_2_Hepatocyte['Celltype'] = 'Hepatocyte'
GSEA_2_HSC['Celltype'] = 'HSC'
GSEA_2_KC['Celltype'] = 'Macrophage'
GSEA_2_EC['Celltype'] = 'EC'
GSEA_2_Neutrophil['Celltype'] = 'Neutrophil'
GSEA_2_pDC['Celltype'] = 'pDC'
GSEA_2_PF['Celltype'] = 'PF'
GSEA_2_TCell['Celltype'] = 'T_Cell'
GSEA_4_BCell['Celltype'] = 'B_Cell'
GSEA_4_Cholangiocyte['Celltype'] = 'Cholangiocyte'
GSEA_4_Hepatocyte['Celltype'] = 'Hepatocyte'
GSEA_4_HSC['Celltype'] = 'HSC'
GSEA_4_KC['Celltype'] = 'Macrophage'
GSEA_4_EC['Celltype'] = 'EC'
GSEA_4_Neutrophil['Celltype'] = 'Neutrophil'
GSEA_4_pDC['Celltype'] = 'pDC'
GSEA_4_PF['Celltype'] = 'PF'
GSEA_4_TCell['Celltype'] = 'T_Cell'
GSEA_8_BCell['Celltype'] = 'B_Cell'
GSEA_8_Cholangiocyte['Celltype'] = 'Cholangiocyte'
GSEA_8_Hepatocyte['Celltype'] = 'Hepatocyte'
GSEA_8_HSC['Celltype'] = 'HSC'
GSEA_8_KC['Celltype'] = 'Macrophage'
GSEA_8_EC['Celltype'] = 'EC'
GSEA_8_Neutrophil['Celltype'] = 'Neutrophil'
GSEA_8_pDC['Celltype'] = 'pDC'
GSEA_8_PF['Celltype'] = 'PF'
GSEA_8_TCell['Celltype'] = 'T_Cell'
GSEA_12_BCell['Celltype'] = 'B_Cell'
GSEA_12_Cholangiocyte['Celltype'] = 'Cholangiocyte'
GSEA_12_Hepatocyte['Celltype'] = 'Hepatocyte'
GSEA_12_HSC['Celltype'] = 'HSC'
GSEA_12_KC['Celltype'] = 'Macrophage'
GSEA_12_EC['Celltype'] = 'EC'
GSEA_12_Neutrophil['Celltype'] = 'Neutrophil'
GSEA_12_pDC['Celltype'] = 'pDC'
GSEA_12_PF['Celltype'] = 'PF'
GSEA_12_TCell['Celltype'] = 'T_Cell'
GSEA_18_BCell['Celltype'] = 'B_Cell'
GSEA_18_Cholangiocyte['Celltype'] = 'Cholangiocyte'
GSEA_18_Hepatocyte['Celltype'] = 'Hepatocyte'
GSEA_18_HSC['Celltype'] = 'HSC'
GSEA_18_KC['Celltype'] = 'Macrophage'
GSEA_18_EC['Celltype'] = 'EC'
GSEA_18_Neutrophil['Celltype'] = 'Neutrophil'
GSEA_18_pDC['Celltype'] = 'pDC'
GSEA_18_PF['Celltype'] = 'PF'
GSEA_18_TCell['Celltype'] = 'T_Cell'
GSEA_24_BCell['Celltype'] = 'B_Cell'
GSEA_24_Cholangiocyte['Celltype'] = 'Cholangiocyte'
GSEA_24_Hepatocyte['Celltype'] = 'Hepatocyte'
GSEA_24_HSC['Celltype'] = 'HSC'
GSEA_24_KC['Celltype'] = 'Macrophage'
GSEA_24_EC['Celltype'] = 'EC'
GSEA_24_Neutrophil['Celltype'] = 'Neutrophil'
GSEA_24_pDC['Celltype'] = 'pDC'
GSEA_24_PF['Celltype'] = 'PF'
GSEA_24_TCell['Celltype'] = 'T_Cell'
GSEA_72_BCell['Celltype'] = 'B_Cell'
GSEA_72_Cholangiocyte['Celltype'] = 'Cholangiocyte'
GSEA_72_Hepatocyte['Celltype'] = 'Hepatocyte'
GSEA_72_HSC['Celltype'] = 'HSC'
GSEA_72_KC['Celltype'] = 'Macrophage'
GSEA_72_EC['Celltype'] = 'EC'
GSEA_72_Neutrophil['Celltype'] = 'Neutrophil'
GSEA_72_pDC['Celltype'] = 'pDC'
GSEA_72_PF['Celltype'] = 'PF'
GSEA_72_TCell['Celltype'] = 'T_Cell'

## <br> 3. Concat All CSVs Into One Object

In [6]:
GSEA_Master = pd.concat([GSEA_2_BCell,
                            GSEA_2_Cholangiocyte,
                            GSEA_2_Hepatocyte,
                            GSEA_2_HSC,
                            GSEA_2_KC,
                            GSEA_2_EC,
                            GSEA_2_Neutrophil,
                            GSEA_2_pDC,
                            GSEA_2_PF,
                            GSEA_2_TCell,
                            GSEA_4_BCell,
                            GSEA_4_Cholangiocyte,
                            GSEA_4_Hepatocyte,
                            GSEA_4_HSC,
                            GSEA_4_KC,
                            GSEA_4_EC,
                            GSEA_4_Neutrophil,
                            GSEA_4_pDC,
                            GSEA_4_PF,
                            GSEA_4_TCell,
                            GSEA_8_BCell,
                            GSEA_8_Cholangiocyte,
                            GSEA_8_Hepatocyte,
                            GSEA_8_HSC,
                            GSEA_8_KC,
                            GSEA_8_EC,
                            GSEA_8_Neutrophil,
                            GSEA_8_pDC,
                            GSEA_8_PF,
                            GSEA_8_TCell,
                            GSEA_12_BCell,
                            GSEA_12_Cholangiocyte,
                            GSEA_12_Hepatocyte,
                            GSEA_12_HSC,
                            GSEA_12_KC,
                            GSEA_12_EC,
                            GSEA_12_Neutrophil,
                            GSEA_12_pDC,
                            GSEA_12_PF,
                            GSEA_12_TCell,
                            GSEA_18_BCell,
                            GSEA_18_Cholangiocyte,
                            GSEA_18_Hepatocyte,
                            GSEA_18_HSC,
                            GSEA_18_KC,
                            GSEA_18_EC,
                            GSEA_18_Neutrophil,
                            GSEA_18_pDC,
                            GSEA_18_PF,
                            GSEA_18_TCell,
                            GSEA_24_BCell,
                            GSEA_24_Cholangiocyte,
                            GSEA_24_Hepatocyte,
                            GSEA_24_HSC,
                            GSEA_24_KC,
                            GSEA_24_EC,
                            GSEA_24_Neutrophil,
                            GSEA_24_pDC,
                            GSEA_24_PF,
                            GSEA_24_TCell,
                            GSEA_72_BCell,
                            GSEA_72_Cholangiocyte,
                            GSEA_72_Hepatocyte,
                            GSEA_72_HSC,
                            GSEA_72_KC,
                            GSEA_72_EC,
                            GSEA_72_Neutrophil,
                            GSEA_72_pDC,
                            GSEA_72_PF,
                            GSEA_72_TCell],
                        ignore_index=True)

In [7]:
GSEA_Master

Unnamed: 0,Name,Term,ES,NES,NOM p-val,FDR q-val,FWER p-val,Tag %,Gene %,Lead_genes,Time,Celltype
0,gsea,KEGG_MM_TRYPTOPHAN_METABOLISM_Ensembl,-0.633982,-2.501622,0.000000,0.002273,0.003,5/32,1.94%,Cyp1a2;Cyp1a1;Tdo2;Kyat3;Aox3,2,B_Cell
1,gsea,GO_BP_MM_CHOLESTEROL_BIOSYNTHETIC_PROCESS_Ensembl,0.695538,2.389591,0.000000,0.006162,0.005,12/23,14.41%,Insig2;Apoa1;Lss;Hmgcs1;Dhcr7;Cyp51;Nsdhl;Ebp;Prkaa1;Hmgcs2;Dhcr24;Fdps,2,B_Cell
2,gsea,WIKIPATHWAYS_MM_NUCLEAR_RECEPTORS_IN_LIPID_METABOLISM_AND_TOXICITY-WP299_Ensembl,-0.681056,-2.363876,0.000000,0.001768,0.007,9/26,6.85%,Cyp1a2;Nr1i3;Ppara;Abcd2;Abcb4;Nr1h4;Rarg;Abcb1a;Abcc2,2,B_Cell
3,gsea,BIOCARTA_MM_NUCLEAR_RECEPTORS_IN_LIPID_METABOLISM_AND_TOXICITY_Ensembl,-0.681056,-2.363876,0.000000,0.001768,0.007,9/26,6.85%,Cyp1a2;Nr1i3;Ppara;Abcd2;Abcb4;Nr1h4;Rarg;Abcb1a;Abcc2,2,B_Cell
4,gsea,TZ_SCS_CENTRALHEP,-0.477989,-2.346232,0.000000,0.001326,0.007,219/460,27.28%,Cyp1a2;Tiparp;Lmo7;Selenbp1;Lrp4;Cyp1a1;Fmo3;Nfe2l2;Reln;Fbp1;Wipf3;Cyb5a;Htatip2;Stard13;Tnfaip8l1;Cpeb4;Hpgd;Nr1i3;Myo1b;Gclc;Deptor;Il1r1;Esrrg;Cped1;2900026A02Rik;Ces1c;Ugdh;Etnk2;Ppara;Rnf217...,2,B_Cell
...,...,...,...,...,...,...,...,...,...,...,...,...
413113,gsea,GO_BP_MM_POSITIVE_REGULATION_OF_JUN_KINASE_ACTIVITY_Ensembl,0.139702,0.463578,0.995633,1.000000,1.000,23/23,94.83%,Il1rn;Magi3;Wnt5a;Mapk8ip3;Dvl2;Zeb2;Pak1;Dvl3;Tnfrsf11a;Map3k10;Map3k1;Vangl2;Traf2;Irak1;Map3k11;Map3k7;Traf6;Axin1;Fzd8;Dab2ip;Taok3;Fzd4;Ptk2b,72,T_Cell
413114,gsea,GO_CC_MM_RECYCLING_ENDOSOME_MEMBRANE_Ensembl,-0.127424,-0.441874,0.997980,0.999139,1.000,11/21,61.00%,Scamp1;Ndrg1;Washc1;Rab11fip4;Ldlr;Pigr;Scamp2;Slc9a7;Rap2b;Zfyve27;Ehd1,72,T_Cell
413115,gsea,MPO_MM_ABNORMAL_PRIMITIVE_STREAK_FORMATION_Ensembl,0.125241,0.434810,1.000000,1.000000,1.000,22/22,96.50%,Tcf7l1;Amn;Rac1;Wnt5a;Pcsk6;Smad3;Acvr2b;Lrp5;Foxa2;Ldb1;Drap1;Furin;Nckap1;Acvr2a;Acvr1;Frs2;Hnf4a;Acvr1b;Smad2;Brca1;Dhx9;Lrp6,72,T_Cell
413116,gsea,GO_MF_MM_POTASSIUM_CHANNEL_ACTIVITY_Ensembl,0.135616,0.399939,1.000000,1.000000,1.000,18/18,93.20%,Kcnd2;Kcnq3;Kcnq1;Kcnk6;Kcnk13;Kcnip4;Kcnn4;Pkd2;Aqp1;Tmem38b;Kcnd3;Kcnk5;Kcnip2;Kcnip3;Kcnj10;Kcnk1;Kcnmb2;Kcnip1,72,T_Cell


In [8]:
GSEA_Master.to_csv('./GSEApy_Data/GSEA_Master.txt', sep='\t', index=True, header=True)