# Functional enrichment analysis of the PBR’s treated vs. DMSO cells DGE

With this script, we want to perform gene enrichment analysis using gProfiler for each of the PBRs results comparing DMSO and the drug. Also, we want to group the data in a good way so as to obtain one unique network for each model. In this specific case, we use only the Reactome database.

In [1]:
suppressWarnings({suppressMessages({
    library(gprofiler2)
    library(ggplot2)
    library(ggsci)
    library(parallel)
    library(readxl)
    library(stringi)
})})

Loading the data.

In [2]:
JHOS2 <- readRDS(file = "JHOS2_PBRs_diffexpress.RDS")
PDC1 <- readRDS(file = "PDC1_PBRs_diffexpress.RDS")
PDC2 <- readRDS(file = "PDC2_PBRs_diffexpress.RDS")

We need to launch gProfiler on the union of the up- and down-regulated genes of all the drugs. Remember to check the genes in common between the two. First, we load the annotation file.

In [3]:
high_anno <- as.data.frame(read_xlsx(path = "mechanisms_of_actions.xlsx", sheet = 1))
high_anno <- high_anno[, c(1:3)]
rownames(high_anno) <- high_anno$`Preferred name`

[1m[22mNew names:
[36m•[39m `` -> `...4`
[36m•[39m `` -> `...5`


In [4]:
gProfiler_wrapper <- function(genes_list){
    gostres <- gost(query = genes_list, 
                organism = "hsapiens", 
                ordered_query = FALSE, 
                multi_query = FALSE, 
                significant = TRUE, 
                exclude_iea = FALSE, 
                measure_underrepresentation = FALSE, 
                evcodes = TRUE, 
                sources = "REAC", # Focusing only on Reactome
                user_threshold = 0.05, # More relaxed 
                correction_method = "fdr", 
                domain_scope = "annotated", custom_bg = NULL, 
                numeric_ns = "", as_short_link = FALSE)    
                                         
    # Saving as generic enrichment map and removing large terms (accounting for < 300 genes)
    gem <- gostres$result[gostres$result$term_size < 300, ]
    gem <- gem[,c("term_id", "term_name", "p_value", "intersection")]
    colnames(gem) <- c("GO.ID", "Description", "p.Val", "Genes")
    gem$FDR <- gem$p.Val
    gem$Phenotype = "UP"
    gem <- gem[, c("GO.ID", "Description", "p.Val", "FDR", "Phenotype", "Genes")]
    return(gem)
}

For each mechanism, we extract the drugs, and obtain two lists of genes gathered from the union of the rownames of the genes which satisfy the FDR and log2FC thresholds. The log2FC corresponds to Seurat’s default threshold to call markers.

In [5]:
for(mech in unique(high_anno$`Higher level classification`[-which(high_anno$`Higher level classification` == "Control")])){
    print(mech)
    # Extracting drugs
    drugs_mech <- high_anno[high_anno$`Higher level classification` == mech, "Preferred name"]
    
    # Filtering the original PBRs' objects for these drugs
    JHOS2_drugs <- JHOS2[drugs_mech]
    PDC1_drugs <- PDC1[drugs_mech]
    PDC2_drugs <- PDC2[drugs_mech]
    
    # Filtering for FDR < 0.01
    JHOS2_filtered <- lapply(JHOS2_drugs, function(x) x[x$FDR < 0.01, ])
    PDC1_filtered <- lapply(PDC1_drugs, function(x) x[x$FDR < 0.01, ])
    PDC2_filtered <- lapply(PDC2_drugs, function(x) x[x$FDR < 0.01, ])
                           
    # Filtering for log2FC > 0.25 or < -0.25
    JHOS2_filtered_lfc_up <- lapply(JHOS2_filtered, function(x) x[x$logFC > 0.25, ])
    JHOS2_filtered_lfc_down <- lapply(JHOS2_filtered, function(x) x[x$logFC < -0.25, ])

    PDC1_filtered_lfc_up <- lapply(PDC1_filtered, function(x) x[x$logFC > 0.25, ])
    PDC1_filtered_lfc_down <- lapply(PDC1_filtered, function(x) x[x$logFC < -0.25, ])

    PDC2_filtered_lfc_up <- lapply(PDC2_filtered, function(x) x[x$logFC > 0.25, ])
    PDC2_filtered_lfc_down <- lapply(PDC2_filtered, function(x) x[x$logFC < -0.25, ])
                                    
    # Getting the union of the row names for all the drugs included in such mechanism of action
    JHOS2_filtered_lfc_up_genes <- Reduce(f = union, x = (sapply(JHOS2_filtered_lfc_up, function(x) rownames(x))))
    JHOS2_filtered_lfc_down_genes <- Reduce(f = union, x = (sapply(JHOS2_filtered_lfc_down, function(x) rownames(x)))) 
                                           
    PDC1_filtered_lfc_up_genes <- Reduce(f = union, x = (sapply(PDC1_filtered_lfc_up, function(x) rownames(x))))
    PDC1_filtered_lfc_down_genes <- Reduce(f = union, x = (sapply(PDC1_filtered_lfc_down, function(x) rownames(x))))
                                          
    PDC2_filtered_lfc_up_genes <- Reduce(f = union, x = (sapply(PDC2_filtered_lfc_up, function(x) rownames(x))))
    PDC2_filtered_lfc_down_genes <- Reduce(f = union, x = (sapply(PDC2_filtered_lfc_down, function(x) rownames(x))))
    
    # Merging in a unique list
    JHOS2_mech <- list(UP = JHOS2_filtered_lfc_up_genes, 
                        DOWN = JHOS2_filtered_lfc_down_genes)  
     
    PDC1_mech <- list(UP = PDC1_filtered_lfc_up_genes, 
                      DOWN = PDC1_filtered_lfc_down_genes)       
                                                                 
    PDC2_mech <- list(UP = PDC2_filtered_lfc_up_genes, 
                      DOWN = PDC2_filtered_lfc_down_genes)                 
                                                  
                                                                 
    # launching the gProfiler with many cores, and a nested mclapply
    models_lists <- list(JHOS2_mech, PDC1_mech, PDC2_mech)
    results <- mclapply(mc.allow.recursive = T, mc.cores = 96, X = models_lists, 
                                function(x) mclapply(mc.allow.recursive = T, mc.cores = 96, X = x, 
                                function(y) gProfiler_wrapper(genes_list = y))) 
                                                     
    # In results we have a list with 3 lists, each with 2 elements in it
    JHOS2_results <- results[[1]]
    PDC1_results <- results[[2]]
    PDC2_results <- results[[3]]
                                                     
    # We are ready to merge the results by row. However, we need to change the phenotype column to DOWN for the "DOWN" genes
    JHOS2_results$DOWN$Phenotype <- "DOWN"
    PDC1_results$DOWN$Phenotype <- "DOWN"
    PDC2_results$DOWN$Phenotype <- "DOWN"    
                                                     
    # Rbinding and saving
    # One of the two lists might be equal to NULL, so we need to check for that 
    if(is.null(nrow(JHOS2_results$UP))){
        JHOS2_final <- JHOS2_results$DOWN
    }else if(is.null(nrow(JHOS2_results$DOWN))){
        JHOS2_final <- JHOS2_results$UP
    }else{
        JHOS2_final <- do.call("rbind", JHOS2_results) 
    }                                                 
    
    if(is.null(nrow(PDC1_results$UP))){
        PDC1_final <- PDC1_results$DOWN
    }else if(is.null(nrow(PDC1_results$DOWN))){
        PDC1_final <- PDC1_results$UP
    }else{
        PDC1_final <- do.call("rbind", PDC1_results) 
    }
        
    if(is.null(nrow(PDC2_results$UP))){
        PDC2_final <- PDC2_results$DOWN
    }else if(is.null(nrow(PDC2_results$DOWN))){
        PDC2_final <- PDC2_results$UP
    }else{
        PDC2_final <- do.call("rbind", PDC2_results) 
    }    
                                                     
    write.table(x = JHOS2_final, 
                file = paste0("JHOS2_", stri_replace_all_fixed(mech, 
                                                               pattern = c("/", " "), 
                                                               replacement = c("_", "_"), 
                                                               vectorize_all = FALSE), "_gProfiler_Reactome_GEM_05.txt"), 
                sep = "\t", quote = F, row.names = F)       
                                                     
    write.table(x = PDC1_final, 
                file = paste0("PDC1_", stri_replace_all_fixed(mech, 
                                                               pattern = c("/", " "), 
                                                               replacement = c("_", "_"), 
                                                               vectorize_all = FALSE), "_gProfiler_Reactome_GEM_05.txt"), 
                sep = "\t", quote = F, row.names = F)
    write.table(x = PDC2_final, 
                file = paste0("PDC2_", stri_replace_all_fixed(mech, 
                                                               pattern = c("/", " "), 
                                                               replacement = c("_", "_"), 
                                                               vectorize_all = FALSE), "_gProfiler_Reactome_GEM_05.txt"), 
                sep = "\t", quote = F, row.names = F)                                                  
                                                                 
}

[1] "HDAC inhibitor"
[1] "CDK inhibitor"
[1] "PI3K/mTOR/AKT inhibitor"
[1] "Multi-kinase inhibitor"
[1] "Ras/Raf/MEK/ERK inhibitor"
[1] "PLK1 inhibitor"
[1] "CHK1 inhibitor"
[1] "PARP inhibitor"
[1] "IAPs/SMAC mimetic"


“Coercing LHS to a list”


[1] "BET inhibitor"
[1] "XPO1/CRM1 inhibitor"
[1] "Bcl-2 inhibitor"
[1] "PAK inhibitor"
