## Set Library Path

In [1]:
.libPaths("/home/vhaghani/anaconda3/envs/jupyter_nb_R4.4.2/lib/R/library")

## Load Libraries

In [3]:
library(dplyr)
library(tidyr)
library(tidyverse)
library(enrichR)
library(openxlsx)
library(ggplot2)

## Load Data

In [4]:
# Read in modules
module_membership <- read.delim("06_WGCNA/filtered_module_membership.txt")

# View data
head(module_membership)

Unnamed: 0_level_0,Probe,treatment,Module,entrez_gene_id,external_gene_name,greenyellow,white,black,turquoise,plum1,⋯,darkmagenta,grey60,skyblue,orange,darkolivegreen,steelblue,violet,floralwhite,lightgreen,darkturquoise
Unnamed: 0_level_1,<chr>,<lgl>,<chr>,<int>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,ENSMUSG00000000001,,tan,14679,Gnai3,0.55229621,0.2263257,-0.06552225,0.2886875,0.0001263238,⋯,0.14084283,0.1023081,0.311802994,0.24719505,0.1805406,-0.3897085,-0.118955,-0.21915725,-0.43610818,-0.71604808
2,ENSMUSG00000000028,,purple,12544,Cdc45,-0.09481233,-0.1547947,-0.30072175,0.1891965,0.1369664885,⋯,-0.19264559,-0.1103468,0.104636467,0.03342896,-0.1803781,0.1165139,0.2375027,-0.46483099,-0.53169569,-0.54525555
3,ENSMUSG00000000031,,mediumpurple3,14955,H19,-0.43095943,-0.2349354,-0.32095044,-0.4484298,-0.564901596,⋯,0.39945429,0.2603373,0.395481899,0.30612813,0.5099523,0.2060982,-0.1094473,0.25812459,0.7213953,0.28664828
4,ENSMUSG00000000037,,blue,107815,Scml2,-0.25072585,-0.3682261,-0.19434719,-0.4289547,0.1234594909,⋯,0.03983615,0.3355153,-0.001035538,-0.12903201,-0.2096256,0.2233409,-0.3970837,-0.04257972,0.01592102,0.58997201
5,ENSMUSG00000000049,,blue,11818,Apoh,-0.49984548,-0.461852,-0.6524966,-0.3347883,-0.2601837701,⋯,0.38233221,0.2761443,0.391118407,-0.20078335,-0.2333499,0.5572453,-0.1564202,-0.40815405,-0.24198338,0.08961941
6,ENSMUSG00000000056,,skyblue,67608,Narf,-0.45173551,-0.702417,-0.6353318,-0.3199402,0.2636769988,⋯,0.31893909,0.6960881,0.563556385,0.09328353,-0.122126,0.5278084,-0.6232543,-0.56165465,-0.47475211,0.10572537


## Separate Genes Per Module

In [5]:
# View modules
modules <- unique(module_membership$Module)

print(modules)

 [1] "tan"             "purple"          "mediumpurple3"   "blue"           
 [5] "skyblue"         "grey60"          "turquoise"       "darkgreen"      
 [9] "yellow"          "red"             "grey"            "brown"          
[13] "pink"            "magenta"         "salmon"          "cyan"           
[17] "black"           "lightcyan"       "lightgreen"      "lightyellow"    
[21] "green"           "orangered4"      "greenyellow"     "orange"         
[25] "saddlebrown"     "midnightblue"    "darkgrey"        "white"          
[29] "paleturquoise"   "darkturquoise"   "sienna3"         "darkorange"     
[33] "darkred"         "royalblue"       "darkolivegreen"  "skyblue3"       
[37] "yellowgreen"     "violet"          "lightsteelblue1" "plum1"          
[41] "darkmagenta"     "lightcyan1"      "floralwhite"     "ivory"          
[45] "steelblue"      


In [6]:
# Iterate over each module
for (module in modules) {
  # Filter data for the current module
  module_data <- module_membership[module_membership$Module == module, ]
  
  # Extract genes from external_gene_name column
  genes <- module_data$external_gene_name
  
  # Remove NA values
  genes <- genes[!is.na(genes)]
  
  # Create a data frame with a single column containing genes
  genes_df <- data.frame(Genes = genes)
  
  # Define the filename for the CSV
  filename <- paste0("06_WGCNA/", module, "_genes.csv")

  # Rename the column
  colnames(genes_df) <- module
    
  # Write the genes to a CSV file
  write.csv(genes_df, file = filename, row.names = FALSE)
}

In [7]:
# List to store genes for each module
gene_lists <- list()

# Iterate over each module
for (module in modules) {
  # Define the filename for the CSV
  filename <- paste0("06_WGCNA/", module, "_genes.csv")
  
  # Read CSV file
  module_genes <- read.csv(filename, header = FALSE)$V1
  
  # Store genes in the list
  gene_lists[[module]] <- module_genes
}

# Find the maximum length among all gene lists
max_length <- max(sapply(gene_lists, length))

# Pad shorter gene lists with NA values to make all lists equal length
for (module in modules) {
  diff_length <- max_length - length(gene_lists[[module]])
  if (diff_length > 0) {
    gene_lists[[module]] <- c(gene_lists[[module]], rep(NA, diff_length))
  }
}

# Combine gene lists into a data frame
merged_df <- as.data.frame(gene_lists)

# Rename columns with module names
colnames(merged_df) <- modules

# Remove the first row that has module names
merged_df <- merged_df[-1, ]

# Print the first few rows of the merged data frame
head(merged_df)

Unnamed: 0_level_0,tan,purple,mediumpurple3,blue,skyblue,grey60,turquoise,darkgreen,yellow,red,⋯,skyblue3,yellowgreen,violet,lightsteelblue1,plum1,darkmagenta,lightcyan1,floralwhite,ivory,steelblue
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
2,Gnai3,Cdc45,H19,Scml2,Narf,Cav2,Klf6,Tbx2,Ngfr,Wnt3,⋯,Blvra,Pnck,Angptl4,Map2k7,Timm44,Ralb,Aldh3a2,Pam16,E2f3,Slc35c2
3,Icosl,Krit1,Lgals9,Apoh,Spa17,Cyp51,Cox5a,Ppm1j,Nalcn,Pih1d2,⋯,Fam162a,Shh,Grk5,Brd8,Lpcat3,Atp4a,Nlk,Ltc4s,Ddx42,Shroom1
4,Meox1,Dlg3,Lsr,Scmh1,Adprh,Dhrs1,Dlat,Tmem161a,Trim25,Btbd17,⋯,Apip,Trh,Pias4,Pan2,Gpr108,Tomm40l,Rev3l,Pgam2,Lmln,Pnpo
5,Ifrd1,Mcm3ap,Col18a1,Xpo6,Pex19,Bmp8b,Sdhd,Dnase1,Ckmt1,Mid2,⋯,H2bc4,P4htm,Fli1,Zbtb17,Gmnn,Nr1i3,Lratd1,Akr1c13,Hmgxb3,Abtb3
6,Aif1l,Vps50,Cavin1,Tfe3,Gcdh,Prkcsh,Gmpr,Pola1,Oprm1,Kat2b,⋯,Bzw2,Unkl,Rnf145,Nxf1,Ccl6,Kptn,Carmil1,Crhbp,Alas2,Pggt1b
7,Ugp2,Gtf2h4,Angptl2,Ccnd2,Pgf,Bicd1,Scpep1,Ezh1,Slc5a5,S100a6,⋯,Taf11,Acot8,Bltp3b,Ube2i,Reps1,Prpf31,Mrs2,Otp,Plcd4,Drd4


In [8]:
# Save all modules and genes into a dataframe
write.csv(merged_df, file = "06_WGCNA/all_genes_per_module.csv", row.names = FALSE)

## Carry Out Gene Ontology with EnrichR

In [9]:
# Iterate over each module
for (module in modules) {
  tryCatch({
    # Perform the enrichR analysis on the gene list for the current module
    enrichr_results <- enrichr(merged_df[[module]], c("GO_Biological_Process_2023",
                                                     "GO_Cellular_Component_2023",
                                                     "GO_Molecular_Function_2023",
                                                     "KEGG_2019_Mouse",
                                                     "Panther_2016",
                                                     "Reactome_2016",
                                                     "RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO"))
    
    # Check if enrichr_results is empty
    if (length(enrichr_results) == 0) {
      cat("No results for module", module, "\n")
      next
    }
    
    # Save Enrichr outputs
    wb <- createWorkbook()
    
    for (i in seq_along(enrichr_results)) {
      # Extract the data frame from the list
      df <- enrichr_results[[i]]
      
      # Check if the data frame is empty
      if (nrow(df) == 0) {
        cat("Empty data frame for", names(enrichr_results)[i], "in module", module, "\n")
        next
      }
      
      # Define the original sheet name
      original_sheet_name <- names(enrichr_results)[i]
      
      # Modify the sheet name if it's specifically "RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO"
      sheet_name <- if (original_sheet_name == "RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO") {
        "RNAseq_DiseaseGene_DrugSigs_GEO"
      } else {
        original_sheet_name
      }
      
      # Add the data frame as a new sheet in the Excel workbook
      addWorksheet(wb, sheet_name)
      writeData(wb, sheet = sheet_name, x = df)
    }
    
    # Save the Excel workbook
    saveWorkbook(wb, paste0("06_WGCNA/", module, "_enrichr_results.xlsx"), overwrite = TRUE)
    
    # Function to plot and save the results
    plot_and_save <- function(df, filename, title) {
      if (nrow(df) == 0) {
        cat("Empty data frame for", title, "in module", module, "\n")
        return()
      }
      pdf(filename, height = 7, width = 15)
      print(plotEnrich(df, showTerms = 25, numChar = 75, y = "Count", orderBy = "P.value") + ggtitle(title))
      dev.off()
    }
    
    # Plot and save Enrichr results
    plot_and_save(enrichr_results$GO_Biological_Process_2023, 
                  paste0("06_WGCNA/", module, "_GO_Biological_Process_2023.pdf"), 
                  paste("GO_Biological_Process_2023 for", module, "module"))
    
    plot_and_save(enrichr_results$GO_Cellular_Component_2023, 
                  paste0("06_WGCNA/", module, "_GO_Cellular_Component_2023.pdf"), 
                  paste("GO_Cellular_Component_2023 for", module, "module"))
    
    plot_and_save(enrichr_results$GO_Molecular_Function_2023, 
                  paste0("06_WGCNA/", module, "_GO_Molecular_Function_2023.pdf"), 
                  paste("GO_Molecular_Function_2023 for", module, "module"))
    
    plot_and_save(enrichr_results$KEGG_2019_Mouse, 
                  paste0("06_WGCNA/", module, "_KEGG_2019_Mouse.pdf"), 
                  paste("KEGG_2019_Mouse for", module, "module"))
    
    plot_and_save(enrichr_results$Panther_2016, 
                  paste0("06_WGCNA/", module, "_Panther_2016.pdf"), 
                  paste("Panther_2016 for", module, "module"))
    
    plot_and_save(enrichr_results$Reactome_2016, 
                  paste0("06_WGCNA/", module, "_Reactome_2016.pdf"), 
                  paste("Reactome_2016 for", module, "module"))
    
    plot_and_save(enrichr_results$`RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO`, 
                  paste0("06_WGCNA/", module, "_RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO.pdf"), 
                  paste("RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO for", module, "module"))
    
  }, error = function(e) {
    cat("Error occurred for module", module, ": ", conditionMessage(e), "\n")
    # Log the error to a file for further inspection
    write(paste("Error occurred for module", module, ": ", conditionMessage(e), "\n"), file = "error_log.txt", append = TRUE)
    # Continue to the next module
    next
  })
}

Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2023... Done.
  Querying GO_Cellular_Component_2023... Done.
  Querying GO_Molecular_Function_2023... Done.
  Querying KEGG_2019_Mouse... Done.
  Querying Panther_2016... Done.
  Querying Reactome_2016... Done.
  Querying RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO... Done.
Parsing results... Done.
Empty data frame for Reactome_2016 in module tan 
Empty data frame for Reactome_2016 for tan module in module tan 
Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2023... Done.
  Querying GO_Cellular_Component_2023... Done.
  Querying GO_Molecular_Function_2023... Done.
  Querying KEGG_2019_Mouse... Done.
  Querying Panther_2016... Done.
  Querying Reactome_2016... Done.
  Querying RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO... Done.
Parsing results... Done.
Empty data frame for Reactome_2016 in module purple 
Empty data frame for Reactome_2016 for purple module in module purple 
Uploading dat

“There are duplicated trimmed names in the plot, consider increasing the 'numChar' setting.”


Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2023... Done.
  Querying GO_Cellular_Component_2023... Done.
  Querying GO_Molecular_Function_2023... Done.
  Querying KEGG_2019_Mouse... Done.
  Querying Panther_2016... Done.
  Querying Reactome_2016... Done.
  Querying RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO... Done.
Parsing results... Done.
Empty data frame for Reactome_2016 in module black 
Empty data frame for Reactome_2016 for black module in module black 
Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2023... Done.
  Querying GO_Cellular_Component_2023... Done.
  Querying GO_Molecular_Function_2023... Done.
  Querying KEGG_2019_Mouse... Done.
  Querying Panther_2016... Done.
  Querying Reactome_2016... Done.
  Querying RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO... Done.
Parsing results... Done.
Empty data frame for Reactome_2016 in module lightcyan 
Empty data frame for Reactome_2016 for lightcyan module in module lightcyan