# gProfiler gene set enrichment analysis of classic DGE analyses performed with pseudobulk of replicates containing 10 cells 

In [1]:
library(gprofiler2)
library(ggplot2)
library(ggsci)
library(parallel)
library(readxl)
library(stringi)

Loading the data

In [2]:
JHOS2 <- readRDS(file = "JHOS2_PBRs_diffexpress_subsampled.RDS")
PDC2 <- readRDS(file = "PDC2_PBRs_diffexpress_subsampled.RDS")
PDC3 <- readRDS(file = "PDC3_PBRs_diffexpress_subsampled.RDS")

In [3]:
high_anno <- as.data.frame(read_xlsx(path = "mechanisms_of_actions.xlsx", sheet = 1))
high_anno <- high_anno[, c(1:3)]
rownames(high_anno) <- high_anno$`Preferred name`

[1m[22mNew names:
[36m•[39m `` -> `...4`
[36m•[39m `` -> `...5`


In [4]:
gProfiler_wrapper <- function(genes_list){
    gostres <- gost(query = genes_list, 
                organism = "hsapiens", 
                ordered_query = FALSE, 
                multi_query = FALSE, 
                significant = TRUE, 
                exclude_iea = FALSE, 
                measure_underrepresentation = FALSE, 
                evcodes = TRUE, 
                sources = "REAC", # Focusing only on Reactome
                user_threshold = 0.05, # More relaxed 
                correction_method = "fdr", 
                domain_scope = "annotated", custom_bg = NULL, 
                numeric_ns = "", as_short_link = FALSE)    
                                         
    # Saving as generic enrichment map and removing large terms (accounting for < 300 genes)
    gem <- gostres$result[gostres$result$term_size < 300, ]
    gem <- gem[,c("term_id", "term_name", "p_value", "intersection")]
    colnames(gem) <- c("GO.ID", "Description", "p.Val", "Genes")
    gem$FDR <- gem$p.Val
    gem$Phenotype = "UP"
    gem <- gem[, c("GO.ID", "Description", "p.Val", "FDR", "Phenotype", "Genes")]
    return(gem)
}

In [5]:
for(mech in unique(high_anno$`Higher level classification`[-which(high_anno$`Higher level classification` == "Control")])){
    message(mech)
    # Extracting drugs
    drugs_mech <- high_anno[high_anno$`Higher level classification` == mech, "Preferred name"]
    
    # Filtering the original PBRs' objects for these drugs
    JHOS2_drugs <- JHOS2[drugs_mech]
    PDC2_drugs <- PDC2[drugs_mech]
    PDC3_drugs <- PDC3[drugs_mech]
    
    # Filtering for FDR < 0.01
    JHOS2_filtered <- lapply(JHOS2_drugs, function(x) x[x$FDR < 0.01, ])
    PDC2_filtered <- lapply(PDC2_drugs, function(x) x[x$FDR < 0.01, ])
    PDC3_filtered <- lapply(PDC3_drugs, function(x) x[x$FDR < 0.01, ])
                           
    # Filtering for log2FC > 0.25 or < -0.25
    JHOS2_filtered_lfc_up <- lapply(JHOS2_filtered, function(x) x[x$logFC > 0.25, ])
    JHOS2_filtered_lfc_down <- lapply(JHOS2_filtered, function(x) x[x$logFC < -0.25, ])

    PDC2_filtered_lfc_up <- lapply(PDC2_filtered, function(x) x[x$logFC > 0.25, ])
    PDC2_filtered_lfc_down <- lapply(PDC2_filtered, function(x) x[x$logFC < -0.25, ])

    PDC3_filtered_lfc_up <- lapply(PDC3_filtered, function(x) x[x$logFC > 0.25, ])
    PDC3_filtered_lfc_down <- lapply(PDC3_filtered, function(x) x[x$logFC < -0.25, ])
                                    
    # Getting the union of the row names for all the drugs included in such mechanism of action
    JHOS2_filtered_lfc_up_genes <- Reduce(f = union, x = (sapply(JHOS2_filtered_lfc_up, function(x) rownames(x))))
    JHOS2_filtered_lfc_down_genes <- Reduce(f = union, x = (sapply(JHOS2_filtered_lfc_down, function(x) rownames(x)))) 
                                           
    PDC2_filtered_lfc_up_genes <- Reduce(f = union, x = (sapply(PDC2_filtered_lfc_up, function(x) rownames(x))))
    PDC2_filtered_lfc_down_genes <- Reduce(f = union, x = (sapply(PDC2_filtered_lfc_down, function(x) rownames(x))))
                                          
    PDC3_filtered_lfc_up_genes <- Reduce(f = union, x = (sapply(PDC3_filtered_lfc_up, function(x) rownames(x))))
    PDC3_filtered_lfc_down_genes <- Reduce(f = union, x = (sapply(PDC3_filtered_lfc_down, function(x) rownames(x))))
    
    # Merging in a unique list
    JHOS2_mech <- list(UP = JHOS2_filtered_lfc_up_genes, 
                        DOWN = JHOS2_filtered_lfc_down_genes)  
     
    PDC2_mech <- list(UP = PDC2_filtered_lfc_up_genes, 
                      DOWN = PDC2_filtered_lfc_down_genes)       
                                                                 
    PDC3_mech <- list(UP = PDC3_filtered_lfc_up_genes, 
                      DOWN = PDC3_filtered_lfc_down_genes)                 
                                                  
                                                                 
    # launching the gProfiler with many cores, and a nested mclapply 
    models_lists <- list(JHOS2_mech, PDC2_mech, PDC3_mech)
    results <- mclapply(mc.allow.recursive = T, mc.cores = 96, X = models_lists, 
                                function(x) mclapply(mc.allow.recursive = T, mc.cores = 96, X = x, 
                                function(y) gProfiler_wrapper(genes_list = y))) 
                                                     
    # In results we have a list with 3 lists, each with 2 elements in it
    JHOS2_results <- results[[1]]
    PDC2_results <- results[[2]]
    PDC3_results <- results[[3]]
                                                     
    # We are ready to merge the results by row. However, we need to change the phenotype column to DOWN for the "DOWN" genes
    JHOS2_results$DOWN$Phenotype <- "DOWN"
    PDC2_results$DOWN$Phenotype <- "DOWN"
    PDC3_results$DOWN$Phenotype <- "DOWN"    
                                                     
    # Rbinding and saving
    # One of the two lists might be equal to NULL, so we need to check for that 
    if(is.null(nrow(JHOS2_results$UP))){
        JHOS2_final <- JHOS2_results$DOWN
    }else if(is.null(nrow(JHOS2_results$DOWN))){
        JHOS2_final <- JHOS2_results$UP
    }else{
        JHOS2_final <- do.call("rbind", JHOS2_results) 
    }                                                 
    
    if(is.null(nrow(PDC2_results$UP))){
        PDC2_final <- PDC2_results$DOWN
    }else if(is.null(nrow(PDC2_results$DOWN))){
        PDC2_final <- PDC2_results$UP
    }else{
        PDC2_final <- do.call("rbind", PDC2_results) 
    }
        
    if(is.null(nrow(PDC3_results$UP))){
        PDC3_final <- PDC3_results$DOWN
    }else if(is.null(nrow(PDC3_results$DOWN))){
        PDC3_final <- PDC3_results$UP
    }else{
        PDC3_final <- do.call("rbind", PDC3_results) 
    }    
                                                     
    write.table(x = JHOS2_final, 
                file = paste0("JHOS2_", stri_replace_all_fixed(mech, 
                                                               pattern = c("/", " "), 
                                                               replacement = c("_", "_"), 
                                                               vectorize_all = FALSE), "_gProfiler_Reactome_GEM_05_subsampled_DGE.txt"), 
                sep = "\t", quote = F, row.names = F)       
                                                     
    write.table(x = PDC2_final, 
                file = paste0("PDC2_", stri_replace_all_fixed(mech, 
                                                               pattern = c("/", " "), 
                                                               replacement = c("_", "_"), 
                                                               vectorize_all = FALSE), "_gProfiler_Reactome_GEM_05_subsampled_DGE.txt"), 
                sep = "\t", quote = F, row.names = F)
    write.table(x = PDC3_final, 
                file = paste0("PDC3_", stri_replace_all_fixed(mech, 
                                                               pattern = c("/", " "), 
                                                               replacement = c("_", "_"), 
                                                               vectorize_all = FALSE), "_gProfiler_Reactome_GEM_05_subsampled_DGE.txt"), 
                sep = "\t", quote = F, row.names = F)                                                  
                                                                 
}

HDAC inhibitor

CDK inhibitor

PI3K/mTOR/AKT inhibitor

Multi-kinase inhibitor

Ras/Raf/MEK/ERK inhibitor

PLK1 inhibitor

CHK1 inhibitor

PARP inhibitor

IAPs/SMAC mimetic

“Coercing LHS to a list”
BET inhibitor

XPO1/CRM1 inhibitor

Bcl-2 inhibitor

PAK inhibitor



In [6]:
sessionInfo()

R version 4.2.2 (2022-10-31)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Rocky Linux 8.8 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /homedir01/adini22/.conda/envs/cellhashing_analyses/lib/libopenblasp-r0.3.21.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
[1] stringi_1.7.12   readxl_1.4.1     ggsci_3.0.0      ggplot2_3.4.2   
[5] gprofiler2_0.2.1

loaded via a namespace (and not attached):
 [1] cellranger_1.1.0       pillar_1.9.0           compiler_4.2.2        
 [4] base64enc_0.1-3        tools_4.2.2          