## Set Library Path

In [20]:
.libPaths("/share/korflab/home/viki/anaconda3/jupyter_nb_R4.3/lib/R/library")

## Load Libraries

In [3]:
library(VennDiagram)
library(UpSetR)
library(openxlsx)
library(readxl)
library(dplyr)
library(glue)
library(ggplot2)
library(viridis)
library(ggrepel)

## Visualize DEG Analysis

### Read in Data

In [24]:
# List all DEG files
files <- list.files(path = "05_DEGs", pattern = "\\_genes.csv$", full.names = TRUE)

# View
print(files)

[1] "05_DEGs/control_male_vs_control_female_genes.csv"   
[2] "05_DEGs/faexcess_female_vs_control_female_genes.csv"
[3] "05_DEGs/faexcess_female_vs_control_male_genes.csv"  
[4] "05_DEGs/faexcess_male_vs_control_female_genes.csv"  
[5] "05_DEGs/faexcess_male_vs_control_male_genes.csv"    
[6] "05_DEGs/faexcess_male_vs_faexcess_female_genes.csv" 
[7] "05_DEGs/faexcess_vs_control_genes.csv"              


In [25]:
# Filter to significant DEGs only

# Initialize a vector to store files with significant DEGs
significant_DEGs_files <- c()

# Loop through each file
for (file in files) {
  # Read the CSV file
  data <- read.csv(file)
  
  # Filter for significant DEGs
  significant_DEGs <- data[data$adj.P.Val < 0.05, ]
  
  # Print the file name and number of significant DEGs
  cat("File:", basename(file), "- Number of significant DEGs:", nrow(significant_DEGs), "\n")
  
  # Check if there are significant DEGs
  if (nrow(significant_DEGs) > 0) {
    significant_DEGs_files <- c(significant_DEGs_files, file)
  }
}

# Overwrite to reflect only significant DEGs
files <- significant_DEGs_files

# View files with significant DEGs
cat("\nFiles with significant DEGs:\n")
print(files)

File: control_male_vs_control_female_genes.csv - Number of significant DEGs: 0 
File: faexcess_female_vs_control_female_genes.csv - Number of significant DEGs: 161 
File: faexcess_female_vs_control_male_genes.csv - Number of significant DEGs: 0 
File: faexcess_male_vs_control_female_genes.csv - Number of significant DEGs: 0 
File: faexcess_male_vs_control_male_genes.csv - Number of significant DEGs: 0 
File: faexcess_male_vs_faexcess_female_genes.csv - Number of significant DEGs: 0 
File: faexcess_vs_control_genes.csv - Number of significant DEGs: 646 

Files with significant DEGs:
[1] "05_DEGs/faexcess_female_vs_control_female_genes.csv"
[2] "05_DEGs/faexcess_vs_control_genes.csv"              


### Make Volcano Plot

In [26]:
create_volcano_plot <- function(file) {
  # Read the CSV file
  data <- read.csv(file)
  
  # Create a new column for significance
  data$Significant <- ifelse(data$adj.P.Val < 0.05 & abs(data$logFC) > 1, "Yes", "No")
  
  # Create a new column for color coding based on logFC
  data$Color <- ifelse(data$logFC > 1, "Upregulated", 
                       ifelse(data$logFC < -1, "Downregulated", "Not Significant"))
  
  # Get the top 10 upregulated and downregulated genes
  top_upregulated <- data %>%
    arrange(desc(logFC)) %>%
    head(10)
  
  top_downregulated <- data %>%
    arrange(logFC) %>%
    head(10)

  # Print the number of genes in each category
  print(paste("Number of upregulated genes:", nrow(top_upregulated)))
  print(paste("Number of downregulated genes:", nrow(top_downregulated)))
  
  # Print the minimum and maximum values for upregulated genes
  print(paste("Upregulated genes - Min -log10(adj.P.Val):", min(-log10(top_upregulated$adj.P.Val), na.rm = TRUE)))
  print(paste("Upregulated genes - Max -log10(adj.P.Val):", max(-log10(top_upregulated$adj.P.Val), na.rm = TRUE)))
  print(paste("Upregulated genes - Min logFC:", min(top_upregulated$logFC, na.rm = TRUE)))
  print(paste("Upregulated genes - Max logFC:", max(top_upregulated$logFC, na.rm = TRUE)))
  
  # Print the minimum and maximum values for downregulated genes
  print(paste("Downregulated genes - Min -log10(adj.P.Val):", min(-log10(top_downregulated$adj.P.Val), na.rm = TRUE)))
  print(paste("Downregulated genes - Max -log10(adj.P.Val):", max(-log10(top_downregulated$adj.P.Val), na.rm = TRUE)))
  print(paste("Downregulated genes - Min logFC:", min(top_downregulated$logFC, na.rm = TRUE)))
  print(paste("Downregulated genes - Max logFC:", max(top_downregulated$logFC, na.rm = TRUE)))
    
  # Combine the top genes for labeling
  top_genes <- rbind(top_upregulated, top_downregulated)
  
  # Initialize PDF
  filename <- gsub("\\.csv$", "", basename(file))
  pdf(file = paste0("05_DEGs/volcano_plot_", filename, ".pdf"), width = 8, height = 6)
  
  # Create a new column for color coding based on logFC
  data$Color <- factor(ifelse(data$logFC > 1, "Upregulated", 
                               ifelse(data$logFC < -1, "Downregulated", "Not Significant")),
                       levels = c("Downregulated", "Not Significant", "Upregulated"))
  
  # Calculate the limits for the axes
  x_min <- -4.1
  x_max <- 4.1
  y_min <- 0 
  y_max <- 2.1
  
  # Function to format the filename into a title
  format_title <- function(filename) {
    if (grepl("faexcess_female_vs_control_female_genes", filename)) {
      return("Control Female vs. FAE Female")
    } else if (grepl("faexcess_genes_vs_control", filename)) {
      return("Control vs. FAE")
    } else {
      return(filename) 
    }
  }
  
  # Generate the volcano plot
  p <- ggplot(data, aes(x = logFC, y = -log10(adj.P.Val), color = Color)) +
    geom_point() +  
    scale_color_manual(
      values = c("Downregulated" = "blue", "Not Significant" = "grey", "Upregulated" = "red"),
      labels = c("Downregulated", "Not Significant", "Upregulated"),
      guide = guide_legend(override.aes = list(size = 4)) 
    ) +
    labs(title = paste("Volcano Plot for", format_title(filename)),
         x = "Log2 Fold Change",
         y = "-Log10 Adjusted P-Value") +
    scale_x_continuous(limits = c(x_min, x_max), expand = expansion(mult = c(0.05, 0.05))) +  
    scale_y_continuous(limits = c(y_min, y_max), expand = expansion(mult = c(0.05, 0.05))) +  
    theme_minimal() +
    theme(legend.position = "top", 
          plot.title = element_text(hjust = 0.5)) + 
    geom_label_repel(data = top_genes, aes(label = external_gene_name), 
                     size = 3, 
                     box.padding = 0.5, 
                     point.padding = 0.5, 
                     segment.color = 'grey50', 
                     show.legend = FALSE)
  # Print the plot
  print(p)
  
  # Close the PDF device
  dev.off()
}

In [27]:
# Create volcano plots
for (file in files) {
  create_volcano_plot(file)
}

[1] "Number of upregulated genes: 10"
[1] "Number of downregulated genes: 10"
[1] "Upregulated genes - Min -log10(adj.P.Val): 0.212024619420637"
[1] "Upregulated genes - Max -log10(adj.P.Val): 1.36245130940759"
[1] "Upregulated genes - Min logFC: 2.62372110891518"
[1] "Upregulated genes - Max logFC: 3.89415298079131"
[1] "Downregulated genes - Min -log10(adj.P.Val): 0.374533189227352"
[1] "Downregulated genes - Max -log10(adj.P.Val): 1.40939172112212"
[1] "Downregulated genes - Min logFC: -3.03571890391901"
[1] "Downregulated genes - Max logFC: -1.93073337714869"
[1] "Number of upregulated genes: 10"
[1] "Number of downregulated genes: 10"
[1] "Upregulated genes - Min -log10(adj.P.Val): 0.284839616640349"
[1] "Upregulated genes - Max -log10(adj.P.Val): 1.80175834797695"
[1] "Upregulated genes - Min logFC: 1.55765267943444"
[1] "Upregulated genes - Max logFC: 3.150621640697"
[1] "Downregulated genes - Min -log10(adj.P.Val): 0.575468855580139"
[1] "Downregulated genes - Max -log10(adj.P.

### Separate Upregulated and Downregulated Genes

In [28]:
# Save upregulated and downregulated gene lists
for (file in files) {
  # Extract the base filename without extension
  filename <- tools::file_path_sans_ext(basename(file))
  
  # Split the filename into parts based on '_vs_'
  split_name <- unlist(strsplit(filename, "_vs_"))
  
  # Handle different naming conventions and assign group1 and group2
  if (length(split_name) == 2) {
    # Case for filename "control_female_vs_faexcess_female"
    group1 <- tolower(split_name[1])  
    group2 <- tolower(gsub("_genes", "", split_name[2]))  
  } else if (length(split_name) == 1) {
    # Case for filename "control_vs_faexcess"
    group1 <- tolower(split_name[1])
    group2 <- "faexcess"
  }

  # Read the current file
  df <- read.csv(file)

  # Filter to only significant DEGs
  df <- df[df$adj.P.Val < 0.05, ]

  # Filter for only upregulated genes
  upregulated_genes <- df[df$logFC > 0, "external_gene_name"]

  # Filter for only downregulated genes
  downregulated_genes <- df[df$logFC < 0, "external_gene_name"]

  # Write the upregulated genes to a file in the output directory
  upregulated_file <- file.path(paste0("05_DEGs/", group1, "_vs_", group2, "_upregulated.txt"))
  write.table(upregulated_genes, file = upregulated_file, row.names = FALSE, col.names = FALSE, quote = FALSE)

  # Write the downregulated genes to a file in the output directory
  downregulated_file <- file.path(paste0("05_DEGs/", group1, "_vs_", group2, "_downregulated.txt"))
  write.table(downregulated_genes, file = downregulated_file, row.names = FALSE, col.names = FALSE, quote = FALSE)
}

In [29]:
# Read in gene list files
files <- list.files(path = "05_DEGs", pattern = "_(upregulated|downregulated)\\.txt$", full.names = TRUE)

# Print the list of files
print(files)

[1] "05_DEGs/faexcess_female_vs_control_female_downregulated.txt"
[2] "05_DEGs/faexcess_female_vs_control_female_upregulated.txt"  
[3] "05_DEGs/faexcess_vs_control_downregulated.txt"              
[4] "05_DEGs/faexcess_vs_control_upregulated.txt"                


### Make Venn Diagram

In [30]:
# Initialize a list to store gene sets
gene_list <- list()

# Create a named vector for custom labels
label_mapping <- c(
  "faexcess_vs_control_downregulated" = "Control vs. FAE Downregulated",
  "faexcess_vs_control_upregulated" = "Control vs. FAE Upregulated",
  "faexcess_female_vs_control_female_downregulated" = "Control Female vs. FAE Female Downregulated",
  "faexcess_female_vs_control_female_upregulated" = "Control Female vs. FAE Female Upregulated"
)

# Loop through each file and read the gene names
for (file in files) {
  # Read the gene names from the file
  gene_names <- read.table(file, stringsAsFactors = FALSE)$V1  # Assuming gene names are in the first column
  
  # Use the filename (without path and extension) as the list name
  list_name <- tools::file_path_sans_ext(basename(file))
  
  # Store the gene names in the list
  gene_list[[list_name]] <- gene_names
}

pdf("05_DEGs/venn_diagram_of_DEGs.pdf", width = 12, height = 12)

# Create the Venn diagram
venn.plot <- venn.diagram(
  x = gene_list,
  category.names = label_mapping[names(gene_list)],  # Use the custom labels for category names
  filename = NULL,  # Set to NULL to plot directly to the R graphics device
  output = TRUE,
  fill = rainbow(length(gene_list)),  # Use a rainbow color palette for different bubbles
  alpha = 0.5,
  cat.cex = 1,  # Adjust the size of the category labels
  cex = 1.5,
  cat.pos = c(0, 0, 0, 0),  # Adjust positions for each label
  cat.dist = c(0.1, 0.1, 0.1, 0.1),  # Adjust distances for each label
  main = "Significant Differentially Expressed Genes",
  main.cex = 1.5
)

# Plot the Venn diagram
grid.draw(venn.plot)

# Close the PDF 
dev.off()

### Make Upset Plot

In [31]:
# Initialize a list to store gene sets
gene_list <- list()

# Create a named vector for custom labels
label_mapping <- c(
  "faexcess_vs_control_downregulated" = "Control vs. FAE Downregulated",
  "faexcess_vs_control_upregulated" = "Control vs. FAE Upregulated",
  "faexcess_female_vs_control_female_downregulated" = "Control Female vs. FAE Female Downregulated",
  "faexcess_female_vs_control_female_upregulated" = "Control Female vs. FAE Female Upregulated"
)

# Loop through each file and read the gene names
for (file in files) {
  # Read the gene names from the file
  gene_names <- read.table(file, stringsAsFactors = FALSE)$V1  # Assuming gene names are in the first column
  
  # Use the filename (without path and extension) as the list name
  list_name <- tools::file_path_sans_ext(basename(file))
  
  # Store the gene names in the list
  gene_list[[list_name]] <- gene_names
}

# Convert the gene list to a data frame suitable for UpSetR
gene_df <- data.frame(
  Gene = unlist(gene_list),
  Group = rep(names(gene_list), times = sapply(gene_list, length))
)

# Create a binary presence-absence matrix for UpSetR
gene_matrix <- as.data.frame.matrix(table(gene_df))

# Rename the columns of the binary matrix using the label mapping
colnames(gene_matrix) <- label_mapping[names(gene_matrix)]

pdf("05_DEGs/upset_plot_of_DEGs.pdf", width = 11, height = 6)

# Create the UpSet plot
p <- upset(gene_matrix, sets = colnames(gene_matrix), keep.order = TRUE,
      main.bar.color = "steelblue", 
      sets.bar.color = "darkgreen",
      order.by = "freq", 
      text.scale = 1.5)

print(p)

dev.off()

### Store Gene Lists

In [12]:
# Initialize a list to store gene sets
gene_lists <- list()

# Loop through each file and read the gene names
for (file in files) {
  # Read the gene names from the file
  gene_names <- read.table(file, stringsAsFactors = FALSE)$V1  # Assuming gene names are in the first column
  
  # Use the filename (without path and extension) as the list name
  list_name <- tools::file_path_sans_ext(basename(file))
  
  # Store the gene names in the list
  gene_lists[[list_name]] <- gene_names
}

# Create a data frame to hold the results
max_length <- max(sapply(gene_lists, length))  # Find the maximum length of gene lists

# Initialize a data frame with NA values
gene_df <- data.frame(matrix(NA, nrow = max_length, ncol = length(gene_lists)))

# Set the column names to the condition names
colnames(gene_df) <- label_mapping[names(gene_lists)]

# Fill the data frame with gene names
for (i in seq_along(gene_lists)) {
  gene_df[1:length(gene_lists[[i]]), i] <- gene_lists[[i]]
}

# Calculate intersections and store them in a new data frame
intersection_list <- list()
gene_names <- names(gene_lists)

# Loop through all combinations of gene lists to find intersections
for (i in 1:(length(gene_names) - 1)) {
  for (j in (i + 1):length(gene_names)) {
    # Get the corresponding labels for the two gene lists
    label1 <- label_mapping[gene_names[i]]
    label2 <- label_mapping[gene_names[j]]
    
    # Create the intersection name using the labels, replacing "intersect" with "and"
    intersection_name <- paste(label1, "and", label2, sep = " ")
    
    # Calculate the intersection
    intersection_result <- intersect(gene_lists[[i]], gene_lists[[j]])
    
    # Store the intersection result
    intersection_list[[intersection_name]] <- intersection_result
  }
}

# Create a data frame for intersections
intersection_df <- data.frame(matrix(NA, nrow = max_length, ncol = length(intersection_list)))
colnames(intersection_df) <- names(intersection_list)

# Fill the intersection data frame with gene names or NA if empty
for (i in seq_along(intersection_list)) {
  if (length(intersection_list[[i]]) == 0) {
    intersection_df[, i] <- NA  
  } else {
    intersection_df[1:length(intersection_list[[i]]), i] <- intersection_list[[i]]
  }
}

# Combine the original gene data frame with the intersection data frame
combined_df <- cbind(gene_df, intersection_df)

# Save the combined data frame as an Excel file using openxlsx
write.xlsx(combined_df, file = "05_DEGs/upset_plot_DEGS.xlsx", rowNames = FALSE)

## Visualize DEG GO Analysis

### Read in Data

In [13]:
# List of GO databases
databases <- c("GO_Biological_Process_2023", "GO_Cellular_Component_2023", "GO_Molecular_Function_2023",
               "KEGG_2019_Mouse", "Panther_2016", "Reactome_2016", "RNAseq_DiseaseGene_DrugSigs_GEO")

In [14]:
# Store GO data into a data frame

# Create a named vector for custom labels
label_mapping <- c(
  "faexcess_vs_control" = "Control vs. FAE",
  "faexcess_female_vs_control_female" = "Control Female vs. FAE Female"
)

# Initialize an empty dataframe
all_data <- data.frame()

# Specify the directory containing the files
directory <- "05_DEGs"

# List all files in the directory that match the pattern
file_list <- list.files(path = directory, pattern = "_enrichr_results\\.xlsx$", full.names = TRUE)

# Iterate over each file and read in the corresponding Excel sheets
for (file_path in file_list) {
  # Extract the comparison name from the file name
  comparison <- tools::file_path_sans_ext(basename(file_path))
  
  # Get the names of the sheets in the Excel file
  databases <- excel_sheets(file_path)
  
  for (database in databases) {
    try({
      # Read the data from the Excel file
      df <- read_excel(file_path, sheet = database)
      
      if (nrow(df) == 0) {
        next  # Skip to the next database if no data
      }
      
      # Select the required columns and add module (comparison) and database information
      df <- df %>%
        select(Term, Adjusted.P.value, Odds.Ratio) %>%
        mutate(Module = comparison, Database = database)  # Keep the original comparison name
      
      # Append to the dataframe
      all_data <- bind_rows(all_data, df)
      
    }, silent = TRUE)
  }
}

# Check if any data was read
if (nrow(all_data) == 0) {
  stop("No data read from any of the Excel files.")
}

# Replace the Module names with corresponding labels from label_mapping
all_data$Module <- gsub("_genes_enrichr_results$", "", all_data$Module)
all_data$Module <- ifelse(all_data$Module %in% names(label_mapping), 
                           label_mapping[all_data$Module], 
                           all_data$Module) 

# Display the combined dataframe
head(all_data)

Unnamed: 0_level_0,Term,Adjusted.P.value,Odds.Ratio,Module,Database
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<chr>,<chr>
1,Cytoplasmic Translation (GO:0002181),7.026020999999999e-19,35.738009,Control Female vs. FAE Female,GO_Biological_Process_2023
2,Translation (GO:0006412),6.226612e-15,14.65298,Control Female vs. FAE Female,GO_Biological_Process_2023
3,Macromolecule Biosynthetic Process (GO:0009059),6.226612e-15,17.122221,Control Female vs. FAE Female,GO_Biological_Process_2023
4,Peptide Biosynthetic Process (GO:0043043),6.226612e-15,18.963421,Control Female vs. FAE Female,GO_Biological_Process_2023
5,Gene Expression (GO:0010467),5.033018e-11,10.053962,Control Female vs. FAE Female,GO_Biological_Process_2023
6,rRNA Processing (GO:0006364),0.002180118,9.547872,Control Female vs. FAE Female,GO_Biological_Process_2023


In [15]:
# Filter out rows where the Adjusted.P.value is less than 0.05
all_data <- all_data %>%
  filter(Adjusted.P.value <= 0.05)

# Display the filtered dataframe
head(all_data)

Unnamed: 0_level_0,Term,Adjusted.P.value,Odds.Ratio,Module,Database
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<chr>,<chr>
1,Cytoplasmic Translation (GO:0002181),7.026020999999999e-19,35.738009,Control Female vs. FAE Female,GO_Biological_Process_2023
2,Translation (GO:0006412),6.226612e-15,14.65298,Control Female vs. FAE Female,GO_Biological_Process_2023
3,Macromolecule Biosynthetic Process (GO:0009059),6.226612e-15,17.122221,Control Female vs. FAE Female,GO_Biological_Process_2023
4,Peptide Biosynthetic Process (GO:0043043),6.226612e-15,18.963421,Control Female vs. FAE Female,GO_Biological_Process_2023
5,Gene Expression (GO:0010467),5.033018e-11,10.053962,Control Female vs. FAE Female,GO_Biological_Process_2023
6,rRNA Processing (GO:0006364),0.002180118,9.547872,Control Female vs. FAE Female,GO_Biological_Process_2023


In [16]:
# Count the number of total terms and unique terms for each database
database_term_counts <- all_data %>%
  group_by(Database) %>%
  summarise(
    Total_Terms = n(),
    Unique_Terms = n_distinct(Term)
  )

# Print the table
print(database_term_counts)

[90m# A tibble: 6 × 3[39m
  Database                        Total_Terms Unique_Terms
  [3m[90m<chr>[39m[23m                                 [3m[90m<int>[39m[23m        [3m[90m<int>[39m[23m
[90m1[39m GO_Biological_Process_2023               49           38
[90m2[39m GO_Cellular_Component_2023               52           37
[90m3[39m GO_Molecular_Function_2023               14           11
[90m4[39m KEGG_2019_Mouse                          10            9
[90m5[39m RNAseq_DiseaseGene_DrugSigs_GEO        [4m1[24m208          690
[90m6[39m Reactome_2016                           105           65


### Plot Top 25 Dot Plots

In [17]:
# Calculate the number of modules each term appears in for each database
term_module_counts <- all_data %>%
  group_by(Database, Term) %>%
  summarise(ModuleCount = n_distinct(Module), .groups = 'drop')

# Rank the terms within each database by the number of modules they appear in
ranked_terms <- term_module_counts %>%
  arrange(Database, desc(ModuleCount)) %>%
  group_by(Database) %>%
  slice_head(n = 25) %>%
  ungroup()

# Merge with the original data to filter the top 25 terms per database
filtered_data_top_25 <- all_data %>%
  semi_join(ranked_terms, by = c("Database", "Term"))

# Print the filtered data
head(filtered_data_top_25)

Unnamed: 0_level_0,Term,Adjusted.P.value,Odds.Ratio,Module,Database
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<chr>,<chr>
1,Cytoplasmic Translation (GO:0002181),7.026020999999999e-19,35.738009,Control Female vs. FAE Female,GO_Biological_Process_2023
2,Translation (GO:0006412),6.226612e-15,14.65298,Control Female vs. FAE Female,GO_Biological_Process_2023
3,Macromolecule Biosynthetic Process (GO:0009059),6.226612e-15,17.122221,Control Female vs. FAE Female,GO_Biological_Process_2023
4,Peptide Biosynthetic Process (GO:0043043),6.226612e-15,18.963421,Control Female vs. FAE Female,GO_Biological_Process_2023
5,Gene Expression (GO:0010467),5.033018e-11,10.053962,Control Female vs. FAE Female,GO_Biological_Process_2023
6,rRNA Processing (GO:0006364),0.002180118,9.547872,Control Female vs. FAE Female,GO_Biological_Process_2023


In [18]:
# Count the number of total terms and unique terms for each database
filtered_database_term_counts <- filtered_data_top_25 %>%
  group_by(Database) %>%
  summarise(
    Total_Terms = n(),
    Unique_Terms = n_distinct(Term)
  )

# Print the table
print(filtered_database_term_counts)

[90m# A tibble: 6 × 3[39m
  Database                        Total_Terms Unique_Terms
  [3m[90m<chr>[39m[23m                                 [3m[90m<int>[39m[23m        [3m[90m<int>[39m[23m
[90m1[39m GO_Biological_Process_2023               36           25
[90m2[39m GO_Cellular_Component_2023               40           25
[90m3[39m GO_Molecular_Function_2023               14           11
[90m4[39m KEGG_2019_Mouse                          10            9
[90m5[39m RNAseq_DiseaseGene_DrugSigs_GEO          50           25
[90m6[39m Reactome_2016                            50           25


In [19]:
# Create plots for each database
for (database in unique(filtered_data_top_25$Database)) {
  database_filtered_data <- filtered_data_top_25 %>%
    filter(Database == database)
  
  if (nrow(database_filtered_data) > 0) {
    dot_plot <- ggplot(database_filtered_data, aes(x = Module, y = Term, size = Odds.Ratio, fill = Adjusted.P.value)) +
      geom_point(shape = 21) +
      scale_fill_viridis() +
      xlab('') + ylab('') +
      labs(
        title = 'Top GO Terms Associated with Significant DEGs',
        subtitle = glue('{database}')
      ) +
      theme(
        panel.background = element_rect(fill = "white", color = NA),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        axis.line = element_line(color = "black"),
        plot.background = element_rect(fill = "white", color = NA),
        axis.text.x = element_text(angle = 90, hjust = 1)
      )
    
    # Save the dot plot with the database name in the filename
    ggsave(filename = glue("05_DEGs/top_25_dot_plot_{database}.pdf"), plot = dot_plot, height = 10, width = 15)
  } else {
    cat(glue("No data available for {database}. Skipping...\n"))
  }
}