## Update Bioconductor

In [None]:
if (!require("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install(version = "3.17")

## Install Packages

In [None]:
# ComplexHeatmap
if (!require("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install("ComplexHeatmap")

# dplyr
install.packages("dplyr")

# tidy4
install.packages("tidyr")

# reshape2
install.packages("reshape2")

## Load Packages

In [3]:
library(ComplexHeatmap)
library(dplyr)
library(tidyr)
library(reshape2)
library(stringr)

## Load Dataframe

In [44]:
df <- read.csv("tables_and_figures/expected_vs_observed_peaks_master.csv", header = TRUE)

In [45]:
# Format data

# Select data of interest
heatmap_data <- df %>%
  select(Endedness, Peak_Type, Aligner, Peak_Caller, Deduplicator, Test_Dataset, Control, Sensitivity, Precision, F1_Score)

# Combine variables for plotting
heatmap_data$x_axis_labels <- paste(heatmap_data$Peak_Caller, heatmap_data$Deduplicator, heatmap_data$Test_Dataset, sep = "_")
heatmap_data$y_axis_labels <- paste(heatmap_data$Endedness, heatmap_data$Peak_Type, heatmap_data$Aligner, sep = "_")

# Replace NA values with 0
heatmap_data <- heatmap_data %>%
  mutate(Sensitivity = replace_na(Sensitivity, 0),
         Precision = replace_na(Precision, 0),
         F1_Score = replace_na(F1_Score, 0))

## Data with Control

### F1 Scores

#### Create matrix

In [46]:
# Subset for data with control
control_df <- heatmap_data[heatmap_data$Control == "with_control", ]

# Melt the data from the heatmap_data dataframe
melted_control_df <- melt(control_df, id.vars = c("x_axis_labels", "y_axis_labels"), measure.vars = "F1_Score")

# Cast the data into the desired format
cast_control_df <- dcast(melted_control_df, y_axis_labels ~ x_axis_labels, value.var = "value")

# Generate matrices
rownames(cast_control_df) <- cast_control_df$y_axis_labels
cast_control_df$y_axis_labels <- NULL
control_matrix <- as.matrix(cast_control_df)

# Transpose matrix to switch rows and columns
control_matrix <- t(control_matrix)

#### Prepare metadata 

In [51]:
# Save matrix as data frame for manipulation
control_metadata <- as.data.frame(control_matrix)

# Peak Caller
new_row_names <- sub("^(.*?)_.*$", "\\1", rownames(control_matrix))
replacement_rules <- c("cisgenome" = "Cisgenome", "macs3" = "MACS3", "pepr" = "PePr", "genrich" = "Genrich")
new_row_names <- sapply(new_row_names, function(name) {
  if (name %in% names(replacement_rules)) {
    return(replacement_rules[name])
  } else {
    return(name)
  }
})
control_metadata$Peak_Caller <- new_row_names

# Deduplicator
new_row_names <- str_extract(rownames(control_matrix), "(?<=_)[^_]+")
replacement_rules <- c("no" = "None", "sambamba" = "Sambamba", "picard" = "Picard", "samtools" = "Samtools")
new_row_names <- sapply(new_row_names, function(name) {
  if (name %in% names(replacement_rules)) {
    return(replacement_rules[name])
  } else {
    return(name)
  }
})
control_metadata$Deduplicator <- new_row_names

# Test_Dataset
control_metadata$Test_Dataset <- as.integer(gsub(".*_(\\d+)$", "\\1", rownames(control_metadata)))
value_map <- c("4X", "8X", "16X", "32X", "64X", "128X")
control_metadata$Test_Dataset <- value_map[control_metadata$Test_Dataset]

#### Create heatmap

In [55]:
# Annotation objects

# Define the desired order for Peak Coverage levels
peak_coverage_order <- c("4X", "8X", "16X", "32X", "64X", "128X")

# Row annotations
row_ha = rowAnnotation(`Peak Caller` = control_metadata$Peak_Caller,
                       `Deduplicator` = control_metadata$Deduplicator,
                       `Peak Coverage` = factor(control_metadata$Test_Dataset, levels = peak_coverage_order),
                       col = list(`Peak Caller` = c("Cisgenome" = "#c2ffc3","Genrich"= "#90ec7c","MACS3"= "#00b22a", "PePr" = "#006b05"),
                                  `Deduplicator` = c("Samtools" = "#620093", "None" = "#e6e6fa", "Picard" = "#aea3e0", "Sambamba" = "#835fbd"),
                                  `Peak Coverage` = c("4X" = "#00CCFF", "8X" = "#00A3FF", "16X" = "#007AFF", "32X" = "#0052FF", "64X" = "#0000FF", "128X" = "#0700C4"))
                        )

# Column annotations
col_ha = HeatmapAnnotation(`Endedness` = c(rep("Paired", 6), rep("Single", 6)),
                            `Peak_Type` = c(rep("Broad", 3), rep("Narrow", 3), rep("Broad", 3), rep("Narrow", 3)),
                            `Aligner` = c("Bowtie2", "BWA-MEM", "STAR", "Bowtie2", "BWA-MEM", "STAR", "Bowtie2", "BWA-MEM", "STAR", "Bowtie2", "BWA-MEM", "STAR"),
                            col = list(`Endedness` = c("Paired" = "#f7d3db", "Single" = "#eb647e"),
                                       `Peak_Type` = c("Narrow" = "#ff4d00", "Broad" = "#ff9a00"),
                                       `Aligner` = c("BWA-MEM" = "#8c0000", "Bowtie2" = "#ff0000", "STAR" = "#400000"))
                            )

In [56]:
# Create heatmap

pdf("tables_and_figures/f1_score_control_heatmap.pdf", width = 6, height = 10)

# Heatmap for control
control_heatmap_object <- Heatmap(
  # Data
  control_matrix, 
  
  # Column customizations
  cluster_columns = FALSE,
  column_km = 4,
  show_column_names = FALSE,
  column_title = NULL,
  top_annotation = col_ha,
    
  # Row customizations
  cluster_rows = FALSE,
  show_row_names = FALSE,
  left_annotation = row_ha,

  # Legend customizations
  heatmap_legend_param = list(title = "F1 Scores"),
  col = colorRampPalette(c("#B2182B", "#EEEEEE", "#2166AC"))(100),

  # Other customizations
  border = TRUE, 
  width = ncol(control_matrix)*unit(5, "mm"), 
  height = nrow(control_matrix)*unit(2, "mm")
)

ht = draw(control_heatmap_object)

# Print width and height to determine appropriate export size
w = ComplexHeatmap:::width(ht)
w = convertX(w, "inch", valueOnly = TRUE)
h = ComplexHeatmap:::height(ht)
h = convertY(h, "inch", valueOnly = TRUE)
c(w, h)

dev.off()

### Precision

#### Create matrix

In [None]:
# Subset for data with control
control_df <- heatmap_data[heatmap_data$Control == "with_control", ]

# Melt the data from the heatmap_data dataframe
melted_control_df <- melt(control_df, id.vars = c("x_axis_labels", "y_axis_labels"), measure.vars = "Precision")

# Cast the data into the desired format
cast_control_df <- dcast(melted_control_df, y_axis_labels ~ x_axis_labels, value.var = "value")

# Generate matrices
rownames(cast_control_df) <- cast_control_df$y_axis_labels
cast_control_df$y_axis_labels <- NULL
control_matrix <- as.matrix(cast_control_df)

# Transpose matrix to switch rows and columns
control_matrix <- t(control_matrix)

#### Prepare metadata 

### Sensitivity

## Data with no Control

### F1 Scores

#### Create matrix

In [57]:
# Subset for data with no control
no_control_df <- heatmap_data[heatmap_data$Control == "no_control", ]

# Melt the data from the heatmap_data dataframe
melted_no_control_df <- melt(no_control_df, id.vars = c("x_axis_labels", "y_axis_labels"), measure.vars = "F1_Score")

# Cast the data into the desired format
cast_no_control_df <- dcast(melted_no_control_df, y_axis_labels ~ x_axis_labels, value.var = "value")

# Generate matrices
rownames(cast_no_control_df) <- cast_no_control_df$y_axis_labels
cast_no_control_df$y_axis_labels <- NULL
no_control_matrix <- as.matrix(cast_no_control_df)

# Transpose matrix to switch rows and columns
no_control_matrix <- t(no_control_matrix)

#### Prepare metadata 

In [58]:
# Save matrix as data frame for manipulation
no_control_metadata <- as.data.frame(no_control_matrix)

# Peak Caller
new_row_names <- sub("^(.*?)_.*$", "\\1", rownames(no_control_matrix))
replacement_rules <- c("cisgenome" = "Cisgenome", "macs3" = "MACS3", "pepr" = "PePr", "genrich" = "Genrich")
new_row_names <- sapply(new_row_names, function(name) {
  if (name %in% names(replacement_rules)) {
    return(replacement_rules[name])
  } else {
    return(name)
  }
})
no_control_metadata$Peak_Caller <- new_row_names

# Deduplicator
new_row_names <- str_extract(rownames(no_control_matrix), "(?<=_)[^_]+")
replacement_rules <- c("no" = "None", "sambamba" = "Sambamba", "picard" = "Picard", "samtools" = "Samtools")
new_row_names <- sapply(new_row_names, function(name) {
  if (name %in% names(replacement_rules)) {
    return(replacement_rules[name])
  } else {
    return(name)
  }
})
no_control_metadata$Deduplicator <- new_row_names

# Test_Dataset
no_control_metadata$Test_Dataset <- as.integer(gsub(".*_(\\d+)$", "\\1", rownames(no_control_metadata)))
value_map <- c("4X", "8X", "16X", "32X", "64X", "128X")
no_control_metadata$Test_Dataset <- value_map[no_control_metadata$Test_Dataset]

#### Create heatmap

In [59]:
# Annotation objects

# Define the desired order for Peak Coverage levels
peak_coverage_order <- c("4X", "8X", "16X", "32X", "64X", "128X")

# Row annotations
row_ha = rowAnnotation(`Peak Caller` = no_control_metadata$Peak_Caller,
                       `Deduplicator` = no_control_metadata$Deduplicator,
                       `Peak Coverage` = factor(no_control_metadata$Test_Dataset, levels = peak_coverage_order),
                       col = list(`Peak Caller` = c("Cisgenome" = "#c2ffc3","Genrich"= "#90ec7c","MACS3"= "#00b22a", "PePr" = "#006b05"),
                                  `Deduplicator` = c("Samtools" = "#620093", "None" = "#e6e6fa", "Picard" = "#aea3e0", "Sambamba" = "#835fbd"),
                                  `Peak Coverage` = c("4X" = "#00CCFF", "8X" = "#00A3FF", "16X" = "#007AFF", "32X" = "#0052FF", "64X" = "#0000FF", "128X" = "#0700C4"))
                        )

# Column annotations
col_ha = HeatmapAnnotation(`Endedness` = c(rep("Paired", 6), rep("Single", 6)),
                            `Peak_Type` = c(rep("Broad", 3), rep("Narrow", 3), rep("Broad", 3), rep("Narrow", 3)),
                            `Aligner` = c("Bowtie2", "BWA-MEM", "STAR", "Bowtie2", "BWA-MEM", "STAR", "Bowtie2", "BWA-MEM", "STAR", "Bowtie2", "BWA-MEM", "STAR"),
                            col = list(`Endedness` = c("Paired" = "#f7d3db", "Single" = "#eb647e"),
                                       `Peak_Type` = c("Narrow" = "#ff4d00", "Broad" = "#ff9a00"),
                                       `Aligner` = c("BWA-MEM" = "#8c0000", "Bowtie2" = "#ff0000", "STAR" = "#400000"))
                            )

In [60]:
# Create heatmap

pdf("tables_and_figures/f1_score_no_control_heatmap.pdf", width = 6, height = 10)

# Heatmap for control
control_heatmap_object <- Heatmap(
  # Data
  no_control_matrix, 
  
  # Column customizations
  cluster_columns = FALSE,
  column_km = 4,
  show_column_names = FALSE,
  column_title = NULL,
  top_annotation = col_ha,
    
  # Row customizations
  cluster_rows = FALSE,
  show_row_names = FALSE,
  left_annotation = row_ha,

  # Legend customizations
  heatmap_legend_param = list(title = "F1 Scores"),
  col = colorRampPalette(c("#B2182B", "#EEEEEE", "#2166AC"))(100),

  # Other customizations
  border = TRUE, 
  width = ncol(control_matrix)*unit(5, "mm"), 
  height = nrow(control_matrix)*unit(2, "mm")
)

ht = draw(control_heatmap_object)

# Print width and height to determine appropriate export size
w = ComplexHeatmap:::width(ht)
w = convertX(w, "inch", valueOnly = TRUE)
h = ComplexHeatmap:::height(ht)
h = convertY(h, "inch", valueOnly = TRUE)
c(w, h)

dev.off()

### Precision

### Sensitivity