# Bar chart of the differentially expressed genes obtained from the three models and for the 45 drugs - using pseudobulk subsamples of 10 cells 

In [1]:
library(reshape2)
library(ggplot2)
library(cowplot)
library(ggsci)
library(readxl)
library(ggh4x)

In [2]:
JHOS2 <- readRDS(file = "JHOS2_PBRs_diffexpress_subsampled.RDS")
PDC2 <- readRDS(file = "PDC2_PBRs_diffexpress_subsampled.RDS")
PDC3 <- readRDS(file = "PDC3_PBRs_diffexpress_subsampled.RDS")

In [3]:
protein.coding.genes <- read.delim(file = "protein_coding_genes_GRCh38.109.txt", header = T, sep = "\t")

In [4]:
JHOS2_filtered <- lapply(JHOS2, function(x) x[rownames(x) %in% protein.coding.genes$gene_name, ])
PDC2_filtered <- lapply(PDC2, function(x) x[rownames(x) %in% protein.coding.genes$gene_name, ])
PDC3_filtered <- lapply(PDC3, function(x) x[rownames(x) %in% protein.coding.genes$gene_name, ])

In [5]:
JHOS2_filtered <- lapply(JHOS2_filtered, function(x) x[x$FDR < 0.01, ])
PDC2_filtered <- lapply(PDC2_filtered, function(x) x[x$FDR < 0.01, ])
PDC3_filtered <- lapply(PDC3_filtered, function(x) x[x$FDR < 0.01, ])

In [6]:
JHOS2_filtered_lfc_up <- lapply(JHOS2_filtered, function(x) x[x$logFC > 0.5, ])
JHOS2_filtered_lfc_down <- lapply(JHOS2_filtered, function(x) x[x$logFC < -0.5, ])

PDC2_filtered_lfc_up <- lapply(PDC2_filtered, function(x) x[x$logFC > 0.5, ])
PDC2_filtered_lfc_down <- lapply(PDC2_filtered, function(x) x[x$logFC < -0.5, ])
                                 
PDC3_filtered_lfc_up <- lapply(PDC3_filtered, function(x) x[x$logFC > 0.5, ])
PDC3_filtered_lfc_down <- lapply(PDC3_filtered, function(x) x[x$logFC < -0.5, ])     

In [7]:
all_results_filtered <- data.frame("JHOS2_up" = sapply(JHOS2_filtered_lfc_up, function(x) nrow(x)),
                                   "JHOS2_down" = sapply(JHOS2_filtered_lfc_down, function(x) nrow(x)),
                                   "PDC2_up" = sapply(PDC2_filtered_lfc_up, function(x) nrow(x)),
                                   "PDC2_down" = sapply(PDC2_filtered_lfc_down, function(x) nrow(x)),
                                   "PDC3_up" = sapply(PDC3_filtered_lfc_up, function(x) nrow(x)),
                                   "PDC3_down" = sapply(PDC3_filtered_lfc_down, function(x) nrow(x)))    
                                                       
all_results_filtered$Drug <- rownames(all_results_filtered)

In [8]:
all_results_filtered <- melt(all_results_filtered)

Using Drug as id variables



In [9]:
colnames(all_results_filtered) <- c("Drug", "Model_upDown", "Count_significant_DEGs")

In [10]:
all_results_filtered$Model <- factor(sapply(all_results_filtered$Model_upDown, function(x) strsplit(x = as.character(x), split = "_")[[1]][1]),
                                    levels = c("PDC2", "PDC3", "JHOS2"))
all_results_filtered$upDown <- factor(x = sapply(all_results_filtered$Model_upDown, function(x) strsplit(x = as.character(x), split = "_")[[1]][2]),
                                      levels = c("up", "down"))

In [11]:
order_drugs <- aggregate(.~ Drug, all_results_filtered[, c(1, 3)], FUN = sum)
order_drugs <- order_drugs[order(order_drugs$Count_significant_DEGs, decreasing = T), ]

In [12]:
high_anno <- as.data.frame(read_xlsx(path = "mechanisms_of_actions.xlsx", sheet = 1))
high_anno <- high_anno[, c(1:3)]
rownames(high_anno) <- high_anno$`Preferred name`

custom_palette <- c("Bcl-2 inhibitor" = "#2f4f4f", 
                    "BET inhibitor" = "#a52a2a", 
                    "Control" = "#bcbec0",
                    "CDK inhibitor" = "#228b22",
                    "CHK1 inhibitor" = "#4b0082",
                    "HDAC inhibitor" = "#ff8c00",
                    "IAPs/SMAC mimetic" = "#d2b48c",
                    "Multi-kinase inhibitor" = "#00ff00",
                    "PAK inhibitor" = "#00bfff",
                    "PARP inhibitor" = "#0000ff",
                    "PI3K/mTOR/AKT inhibitor" = "#ff1493",
                    "PLK1 inhibitor" = "#ffff54",
                    "Ras/Raf/MEK/ERK inhibitor" = "#dda0dd",
                    "XPO1/CRM1 inhibitor" = "#7fffd4")

strip_colors <- setNames(nm = high_anno$`Preferred name`, object = custom_palette[high_anno$`Higher level classification`]) 
strip_colors <- strip_colors[-which(names(strip_colors) == "DMSO")]
strip_colors <- strip_colors[order(match(names(strip_colors), order_drugs$Drug))]
strip_colors <- strip_themed(background_x = elem_list_rect(fill = strip_colors))

[1m[22mNew names:
[36m•[39m `` -> `...4`
[36m•[39m `` -> `...5`


In [13]:
ggsave(ggplot(data = all_results_filtered, 
       aes(x = Model, 
           y = Count_significant_DEGs, 
           fill = Model, 
           colour = Model, 
           alpha = upDown)) +
  geom_bar(stat = "identity", width = 0.7) + 
  scale_fill_manual(values = setNames(object = pal_npg()(3), nm = c("JHOS2", "PDC2", "PDC3"))) + 
  scale_alpha_manual(values = c(1, 0.2)) + 
  scale_color_manual(values = c("black", "black", "black")) +
  theme_classic() + 
  ylab("No. of DEGs") + 
  theme(strip.text = element_text(colour = "white", face = "bold"),
        axis.text.x = element_blank(),
        axis.text.y = element_text(colour = "black"),
        axis.line.x = element_line(colour = "black"),
        axis.ticks.x = element_blank()) +
  facet_wrap2(~factor(Drug, levels = order_drugs$Drug), nrow = 3, strip = strip_colors),
  filename = "PBRs_barchart_drugs_all_Fig3_protein_coding_genes_subsampled_pseudobulk_DGE.pdf", width = 16, height = 6)

In [14]:
sessionInfo()

R version 4.2.2 (2022-10-31)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Rocky Linux 8.8 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /homedir01/adini22/.conda/envs/cellhashing_analyses/lib/libopenblasp-r0.3.21.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] ggh4x_0.2.4.9000 readxl_1.4.1     ggsci_3.0.0      cowplot_1.1.1   
[5] ggplot2_3.4.2    reshape2_1.4.4  

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.10       cellranger_1.1.0  pillar_1.9.0      compiler_4.2.2   
 [5] plyr_1.8.8        base64enc_0.1-3   to