## Update Bioconductor

In [None]:
if (!require("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install(version = "3.17")

## Install Packages

In [None]:
# ComplexHeatmap
if (!require("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install("ComplexHeatmap")

# dplyr
install.packages("dplyr")

# tidy4
install.packages("tidyr")

# reshape2
install.packages("reshape2")

## Load Packages

In [None]:
library(ComplexHeatmap)
library(dplyr)
library(tidyr)
library(reshape2)

## Load Dataframe

In [84]:
df <- read.csv("tables_and_figures/expected_vs_observed_peaks_master.csv", header = TRUE)

## Create Heatmap of F1 Scores

In [109]:
# Create matrices

# Select data of interest
heatmap_data <- df %>%
  select(Endedness, Peak_Type, Aligner, Peak_Caller, Deduplicator, Test_Dataset, Control, Sensitivity, Precision, F1_Score)

# Combine variables for plotting
heatmap_data$x_axis_labels <- paste(heatmap_data$Peak_Caller, heatmap_data$Deduplicator, heatmap_data$Test_Dataset, sep = "_")
heatmap_data$y_axis_labels <- paste(heatmap_data$Endedness, heatmap_data$Peak_Type, heatmap_data$Aligner, sep = "_")

# Replace NA values with 0
heatmap_data <- heatmap_data %>%
  mutate(Sensitivity = replace_na(Sensitivity, 0),
         Precision = replace_na(Precision, 0),
         F1_Score = replace_na(F1_Score, 0))

# Split into control vs. no_control
control_df <- heatmap_data[heatmap_data$Control == "with_control", ]
no_control_df <- heatmap_data[heatmap_data$Control == "no_control", ]

# Melt the data from the heatmap_data dataframe
melted_control_df <- melt(control_df, id.vars = c("x_axis_labels", "y_axis_labels"), measure.vars = "F1_Score")
melted_no_control_df <- melt(no_control_df, id.vars = c("x_axis_labels", "y_axis_labels"), measure.vars = "F1_Score")

# Cast the data into the desired format
cast_control_df <- dcast(melted_control_df, y_axis_labels ~ x_axis_labels, value.var = "value")
cast_no_control_df <- dcast(melted_no_control_df, y_axis_labels ~ x_axis_labels, value.var = "value")

# Generate matrices
rownames(cast_control_df) <- cast_control_df$y_axis_labels
cast_control_df$y_axis_labels <- NULL
control_matrix <- as.matrix(cast_control_df)
 
rownames(cast_no_control_df) <- cast_no_control_df$y_axis_labels
cast_no_control_df$y_axis_labels <- NULL
no_control_matrix <- as.matrix(cast_no_control_df)

In [None]:
# Create Heatmap

#pdf("F1_score_heatmap.pdf", height = 20, width = 10)

row_ha = rowAnnotation(F1_Scores = rownames(male_matrix))
column_ha = HeatmapAnnotation(`Peak Caller` = heatmap_matrix$Peak_Caller, 
                              `Deduplicator` = heatmap_matrix$Deduplicator,
                              `Test Dataset` = heatmap_matrix$Test_Dataset,
                              col = list(`Peak Caller` = c("cisgenome" = "#c2ffc3","genrich"= "#90ec7c","macs3"= "#00b22a", "pepr" = "#006b05"), 
                                         `Deduplicator` = c("samtools" = "#e6e6fa", "no_deduplication" = "#aea3e0", "picard" = "#835fbd", "sambamba" = "#620093"),
                                         `Test Dataset` = c("1" = "#00CCFF", "2" = "#00A3FF", "3" = "#007AFF", "4" = "#0052FF", "5" = "#0000FF", "6" = "#0700C4", ), 
                                         annotation_name_gp = gpar(fontsize = 16 )),
                              annotation_name_gp = gpar(fontsize = 16 ))

map = grid.grabExpr(
  draw(
    Heatmap(heatmap_matrix, 
            name = "F1 Scores", 
            top_annotation = column_ha, 
            col = col_fun, 
            row_names_gp=gpar(fontsize=16,  ),
            column_names_gp=gpar(fontsize=0),
            cluster_columns = FALSE, 
            heatmap_legend_param = list(title="F1 Scores", 
                                        title_gp = gpar(fontsize = 16,  ), 
                                        labels_gp = gpar(fontsize = 12,  )), 
)))

#grid.newpage()
grid.draw(map)

#dev.off()

## Create Heatmap of Precision

## Create Heatmap of Sensitivity