## Set Library Path

In [1]:
.libPaths("/share/korflab/home/viki/anaconda3/jupyter_nb_R4.3/lib/R/library")

## Load Libraries

In [3]:
library(dplyr)
library(tidyr)
library(tidyverse)
library(enrichR)
library(openxlsx)
library(ggplot2)

## Load Data

In [4]:
# Read in modules
module_membership <- read.delim("06_WGCNA/module_membership.txt")

# View data
head(module_membership)

Unnamed: 0_level_0,Probe,green,purple,darkgrey,floralwhite,darkmagenta,darkorange2,steelblue,lightgreen,lightsteelblue1,⋯,royalblue,lightyellow,red,cyan,midnightblue,grey,treatment,Module,entrez_gene_id,external_gene_name
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>,<chr>,<int>,<chr>
1,ENSMUSG00000000001,0.53142578,0.319054961,0.09441111,0.17026045,0.56879173,0.17024136,0.56362065,0.40996143,0.33985973,⋯,-0.43415736,-0.13178895,-0.49889088,-0.7366948,-0.2444481,0.21113112,,darkturquoise,14679,Gnai3
2,ENSMUSG00000000028,0.60286963,0.001076259,-0.20567613,-0.21333172,0.36991102,-0.25185972,-0.19855241,-0.12488154,-0.33269158,⋯,-0.53578706,0.07207819,-0.50864414,-0.6742956,-0.4225157,0.12412939,,green,12544,Cdc45
3,ENSMUSG00000000031,-0.56823447,-0.371424292,-0.15641282,-0.02060351,-0.13964062,-0.12241386,-0.17642277,-0.47723936,0.05839258,⋯,0.92420016,0.11543744,0.54617147,0.3178056,0.4676651,-0.05378321,,royalblue,14955,H19
4,ENSMUSG00000000037,-0.19368906,-0.050444428,-0.08844034,0.08340248,-0.05129436,0.4173677,0.04044405,-0.01045664,-0.03456155,⋯,0.28891655,-0.2547847,0.0434939,0.2439955,-0.1895834,-0.09170282,,blue,107815,Scml2
5,ENSMUSG00000000049,0.02317175,-0.31209692,-0.43382664,-0.13326823,0.25839775,0.01139912,-0.28451796,-0.74208287,-0.36940677,⋯,0.07591392,0.0113097,-0.06507401,-0.1504348,-0.4146887,0.26865218,,blue,11818,Apoh
6,ENSMUSG00000000056,-0.08285896,-0.522418637,-0.67198767,-0.5776342,0.38722027,0.28926086,0.12705343,-0.19322065,0.01120284,⋯,-0.13355367,0.19881728,-0.0243418,-0.2208846,-0.6033503,-0.03792721,,greenyellow,67608,Narf


## Separate Genes Per Module

In [5]:
# View modules
modules <- unique(module_membership$Module)

print(modules)

 [1] "darkturquoise"   "green"           "royalblue"       "blue"           
 [5] "greenyellow"     "turquoise"       "cyan"            "brown"          
 [9] "black"           "red"             "violet"          "magenta"        
[13] "yellow"          "pink"            "tan"             "lightyellow"    
[17] "paleturquoise"   "plum1"           "midnightblue"    "lightgreen"     
[21] "salmon"          "purple"          "grey"            "ivory"          
[25] "lightcyan"       "white"           "skyblue3"        "lightcyan1"     
[29] "skyblue"         "grey60"          "steelblue"       "darkred"        
[33] "darkgrey"        "saddlebrown"     "lightsteelblue1" "orange"         
[37] "darkgreen"       "orangered4"      "darkorange"      "floralwhite"    
[41] "darkolivegreen"  "sienna3"         "darkmagenta"     "yellowgreen"    
[45] "darkorange2"     "mediumpurple3"  


In [6]:
# Iterate over each module
for (module in modules) {
  # Filter data for the current module
  module_data <- module_membership[module_membership$Module == module, ]
  
  # Extract genes from external_gene_name column
  genes <- module_data$external_gene_name
  
  # Remove NA values
  genes <- genes[!is.na(genes)]
  
  # Create a data frame with a single column containing genes
  genes_df <- data.frame(Genes = genes)
  
  # Define the filename for the CSV
  filename <- paste0("06_WGCNA/", module, "_genes.csv")

  # Rename the column
  colnames(genes_df) <- module
    
  # Write the genes to a CSV file
  write.csv(genes_df, file = filename, row.names = FALSE)
}

In [7]:
# List to store genes for each module
gene_lists <- list()

# Iterate over each module
for (module in modules) {
  # Define the filename for the CSV
  filename <- paste0("06_WGCNA/", module, "_genes.csv")
  
  # Read CSV file
  module_genes <- read.csv(filename, header = FALSE)$V1
  
  # Store genes in the list
  gene_lists[[module]] <- module_genes
}

# Find the maximum length among all gene lists
max_length <- max(sapply(gene_lists, length))

# Pad shorter gene lists with NA values to make all lists equal length
for (module in modules) {
  diff_length <- max_length - length(gene_lists[[module]])
  if (diff_length > 0) {
    gene_lists[[module]] <- c(gene_lists[[module]], rep(NA, diff_length))
  }
}

# Combine gene lists into a data frame
merged_df <- as.data.frame(gene_lists)

# Rename columns with module names
colnames(merged_df) <- modules

# Remove the first row that has module names
merged_df <- merged_df[-1, ]

# Print the first few rows of the merged data frame
head(merged_df)

Unnamed: 0_level_0,darkturquoise,green,royalblue,blue,greenyellow,turquoise,cyan,brown,black,red,⋯,darkgreen,orangered4,darkorange,floralwhite,darkolivegreen,sienna3,darkmagenta,yellowgreen,darkorange2,mediumpurple3
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
2,Gnai3,Cdc45,H19,Scml2,Narf,Klf6,Scmh1,Tbx2,Ngfr,Wnt3,⋯,Rab34,Golga2,Fkbp7,Hdgfl2,Dll3,Atp4a,Saal1,Slc35c2,Slc25a35,Eef1akmt1
3,Cav2,Brat1,Pdgfb,Apoh,Ckmt1,Cox5a,Th,Xpo6,Nalcn,Btbd17,⋯,Ddb2,Grk5,Syce2,Napa,Klc4,Rpl10,Ddah2,Abtb3,Nkd2,B3gnt5
4,Vps50,Lhx2,Acvrl1,Fer,Comt,Pih1d2,Nkx2-1,Gna12,Egfl6,Tpd52l1,⋯,Prodh,Vax1,Sgta,Gosr1,Nr2c1,Baiap2l2,Zmat5,Lrrc9,Kctd6,Tdrd1
5,Ift46,Bcl6b,Serpinf1,Tfe3,Gstt1,Sdhd,Brinp2,Dgke,Slc5a5,Lck,⋯,Rnf215,Dedd,Wdr83,Ager,Map3k4,Pfn1,Tmem115,Cbln2,D16Ertd472e,Farsb
6,Vrk3,Fap,Lgals9,Drp2,Clpb,Gmpr,Wars2,Mnt,Usp32,Dynlt1c,⋯,Lpcat3,Sertad4,Shpk,Cyp46a1,Elmo3,Ctdnep1,Pls3,Cyp2u1,Csrnp1,Dcaf6
7,Ercc1,Cttnbp2,Lsr,Tom1l2,Clgn,Hddc2,Mettl25b,Tamalin,Pcbp3,Mmp11,⋯,Gpr108,Adarb1,Crip1,Dyrk1a,Notch4,Tlcd1,Psmd11,Nfkb1,Manf,Larp7


In [8]:
# Save all modules and genes into a dataframe
write.csv(merged_df, file = "06_WGCNA/all_genes_per_module.csv", row.names = FALSE)

## Carry Out Gene Ontology with EnrichR

In [9]:
# Iterate over each module
for (module in modules) {
  tryCatch({
    # Perform the enrichR analysis on the gene list for the current module
    enrichr_results <- enrichr(merged_df[[module]], c("GO_Biological_Process_2023",
                                                     "GO_Cellular_Component_2023",
                                                     "GO_Molecular_Function_2023",
                                                     "KEGG_2019_Mouse",
                                                     "Panther_2016",
                                                     "Reactome_2016",
                                                     "RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO"))
    
    # Check if enrichr_results is empty
    if (length(enrichr_results) == 0) {
      cat("No results for module", module, "\n")
      next
    }
    
    # Save Enrichr outputs
    wb <- createWorkbook()
    
    for (i in seq_along(enrichr_results)) {
      # Extract the data frame from the list
      df <- enrichr_results[[i]]
      
      # Check if the data frame is empty
      if (nrow(df) == 0) {
        cat("Empty data frame for", names(enrichr_results)[i], "in module", module, "\n")
        next
      }
      
      # Define the original sheet name
      original_sheet_name <- names(enrichr_results)[i]
      
      # Modify the sheet name if it's specifically "RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO"
      sheet_name <- if (original_sheet_name == "RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO") {
        "RNAseq_DiseaseGene_DrugSigs_GEO"
      } else {
        original_sheet_name
      }
      
      # Add the data frame as a new sheet in the Excel workbook
      addWorksheet(wb, sheet_name)
      writeData(wb, sheet = sheet_name, x = df)
    }
    
    # Save the Excel workbook
    saveWorkbook(wb, paste0("06_WGCNA/", module, "_enrichr_results.xlsx"), overwrite = TRUE)
    
    # Function to plot and save the results
    plot_and_save <- function(df, filename, title) {
      if (nrow(df) == 0) {
        cat("Empty data frame for", title, "in module", module, "\n")
        return()
      }
      pdf(filename, height = 7, width = 15)
      print(plotEnrich(df, showTerms = 25, numChar = 75, y = "Count", orderBy = "P.value") + ggtitle(title))
      dev.off()
    }
    
    # Plot and save Enrichr results
    plot_and_save(enrichr_results$GO_Biological_Process_2023, 
                  paste0("06_WGCNA/", module, "_GO_Biological_Process_2023.pdf"), 
                  paste("GO_Biological_Process_2023 for", module, "module"))
    
    plot_and_save(enrichr_results$GO_Cellular_Component_2023, 
                  paste0("06_WGCNA/", module, "_GO_Cellular_Component_2023.pdf"), 
                  paste("GO_Cellular_Component_2023 for", module, "module"))
    
    plot_and_save(enrichr_results$GO_Molecular_Function_2023, 
                  paste0("06_WGCNA/", module, "_GO_Molecular_Function_2023.pdf"), 
                  paste("GO_Molecular_Function_2023 for", module, "module"))
    
    plot_and_save(enrichr_results$KEGG_2019_Mouse, 
                  paste0("06_WGCNA/", module, "_KEGG_2019_Mouse.pdf"), 
                  paste("KEGG_2019_Mouse for", module, "module"))
    
    plot_and_save(enrichr_results$Panther_2016, 
                  paste0("06_WGCNA/", module, "_Panther_2016.pdf"), 
                  paste("Panther_2016 for", module, "module"))
    
    plot_and_save(enrichr_results$Reactome_2016, 
                  paste0("06_WGCNA/", module, "_Reactome_2016.pdf"), 
                  paste("Reactome_2016 for", module, "module"))
    
    plot_and_save(enrichr_results$`RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO`, 
                  paste0("06_WGCNA/", module, "_RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO.pdf"), 
                  paste("RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO for", module, "module"))
    
  }, error = function(e) {
    cat("Error occurred for module", module, ": ", conditionMessage(e), "\n")
    # Log the error to a file for further inspection
    write(paste("Error occurred for module", module, ": ", conditionMessage(e), "\n"), file = "error_log.txt", append = TRUE)
    # Continue to the next module
    next
  })
}

Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2023... Done.
  Querying GO_Cellular_Component_2023... Done.
  Querying GO_Molecular_Function_2023... Done.
  Querying KEGG_2019_Mouse... Done.
  Querying Panther_2016... Done.
  Querying Reactome_2016... Done.
  Querying RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO... Done.
Parsing results... Done.
Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2023... Done.
  Querying GO_Cellular_Component_2023... Done.
  Querying GO_Molecular_Function_2023... Done.
  Querying KEGG_2019_Mouse... Done.
  Querying Panther_2016... Done.
  Querying Reactome_2016... Done.
  Querying RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO... Done.
Parsing results... Done.
Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2023... Done.
  Querying GO_Cellular_Component_2023... Done.
  Querying GO_Molecular_Function_2023... Done.
  Querying KEGG_2019_Mouse... Done.
  Querying Panther_2016... Done.
  Query

“There are duplicated trimmed names in the plot, consider increasing the 'numChar' setting.”


Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2023... Done.
  Querying GO_Cellular_Component_2023... Done.
  Querying GO_Molecular_Function_2023... Done.
  Querying KEGG_2019_Mouse... Done.
  Querying Panther_2016... Done.
  Querying Reactome_2016... Done.
  Querying RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO... Done.
Parsing results... Done.
Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2023... Done.
  Querying GO_Cellular_Component_2023... Done.
  Querying GO_Molecular_Function_2023... Done.
  Querying KEGG_2019_Mouse... Done.
  Querying Panther_2016... Done.
  Querying Reactome_2016... Done.
  Querying RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO... Done.
Parsing results... Done.
Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2023... Done.
  Querying GO_Cellular_Component_2023... Done.
  Querying GO_Molecular_Function_2023... Done.
  Querying KEGG_2019_Mouse... Done.
  Querying Panther_2016... Done.
  Query

“There are duplicated trimmed names in the plot, consider increasing the 'numChar' setting.”


Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2023... Done.
  Querying GO_Cellular_Component_2023... Done.
  Querying GO_Molecular_Function_2023... Done.
  Querying KEGG_2019_Mouse... Done.
  Querying Panther_2016... Done.
  Querying Reactome_2016... Done.
  Querying RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO... Done.
Parsing results... Done.
Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2023... Done.
  Querying GO_Cellular_Component_2023... Done.
  Querying GO_Molecular_Function_2023... Done.
  Querying KEGG_2019_Mouse... Done.
  Querying Panther_2016... Done.
  Querying Reactome_2016... Done.
  Querying RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO... Done.
Parsing results... Done.
Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2023... Done.
  Querying GO_Cellular_Component_2023... Done.
  Querying GO_Molecular_Function_2023... Done.
  Querying KEGG_2019_Mouse... Done.
  Querying Panther_2016... Done.
  Query

“There are duplicated trimmed names in the plot, consider increasing the 'numChar' setting.”


Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2023... Done.
  Querying GO_Cellular_Component_2023... Done.
  Querying GO_Molecular_Function_2023... Done.
  Querying KEGG_2019_Mouse... Done.
  Querying Panther_2016... Done.
  Querying Reactome_2016... Done.
  Querying RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO... Done.
Parsing results... Done.
Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2023... Done.
  Querying GO_Cellular_Component_2023... Done.
  Querying GO_Molecular_Function_2023... Done.
  Querying KEGG_2019_Mouse... Done.
  Querying Panther_2016... Done.
  Querying Reactome_2016... Done.
  Querying RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO... Done.
Parsing results... Done.
