## Set Library Path

In [1]:
.libPaths("/share/korflab/home/viki/anaconda3/jupyter_nb/lib/R/library")

## Load Packages

In [4]:
library(ggplot2)
library(tidyr)
library(dplyr)

## Load Dataframe

In [39]:
df <- read.csv("cut_and_tag_run.csv", header = TRUE)

## Prepare Data

In [15]:
# Subset CUT&RUN data
cut_and_run <- df[df$Origin == "CUT&RUN", ]

cut_and_run$Original_Percent_Aligned <- as.numeric(sub("%", "", cut_and_run$Original_Percent_Aligned))
cut_and_run$Rocketchip_Percent_Aligned <- as.numeric(sub("%", "", cut_and_run$Rocketchip_Percent_Aligned))

cut_and_run_long <- tidyr::pivot_longer(cut_and_run, cols = c("Original_Percent_Aligned", "Rocketchip_Percent_Aligned"), names_to = "Method")

In [16]:
# Carry out t-test

# Reorganize data
cut_and_run_long$Raw_Reads <- as.numeric(gsub(",", "", cut_and_run_long$Raw_Reads))
cut_and_run_wide <- cut_and_run_long %>% spread(Method, value)

# Calculate the standard error of the mean for each group
cut_and_run_sem_values <- cut_and_run_wide %>%
  summarise(
    Original_Percent_Aligned = sd(Original_Percent_Aligned, na.rm = TRUE) / sqrt(length(Original_Percent_Aligned)),
    Rocketchip_Percent_Aligned = sd(Rocketchip_Percent_Aligned, na.rm = TRUE) / sqrt(length(Rocketchip_Percent_Aligned))
  )

# Perform paired t-test
cut_and_run_t_test_result <- t.test(cut_and_run_wide$Rocketchip_Percent_Aligned, cut_and_run_wide$Original_Percent_Aligned, paired = TRUE)

# Display the t-test results
print(cut_and_run_t_test_result)

# Save the t-test results in a variable
cut_and_run_p_value <- cut_and_run_t_test_result$p.value


	Paired t-test

data:  cut_and_run_wide$Rocketchip_Percent_Aligned and cut_and_run_wide$Original_Percent_Aligned
t = 4.1932, df = 8, p-value = 0.003025
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
 14.69945 50.62277
sample estimates:
mean difference 
       32.66111 



In [40]:
# Subset CUT&Tag data
cut_and_tag <- df[df$Origin == "CUT&Tag", ]

cut_and_tag$Original_Percent_Aligned <- as.numeric(sub("%", "", cut_and_tag$Original_Percent_Aligned))
cut_and_tag$Rocketchip_Percent_Aligned <- as.numeric(sub("%", "", cut_and_tag$Rocketchip_Percent_Aligned))

cut_and_tag_long <- tidyr::pivot_longer(cut_and_tag, cols = c("Original_Percent_Aligned", "Rocketchip_Percent_Aligned"), names_to = "Method")

In [41]:
# Carry out t-test

# Reorganize data
cut_and_tag_long$Raw_Reads <- as.numeric(gsub(",", "", cut_and_tag_long$Raw_Reads))
cut_and_tag_wide <- cut_and_tag_long %>% spread(Method, value)

# Calculate the standard error of the mean for each group
cut_and_tag_sem_values <- cut_and_tag_wide %>%
  summarise(
    Original_Percent_Aligned = sd(Original_Percent_Aligned, na.rm = TRUE) / sqrt(length(Original_Percent_Aligned)),
    Rocketchip_Percent_Aligned = sd(Rocketchip_Percent_Aligned, na.rm = TRUE) / sqrt(length(Rocketchip_Percent_Aligned))
  )

# Perform paired t-test
cut_and_tag_t_test_result <- t.test(cut_and_tag_wide$Rocketchip_Percent_Aligned, cut_and_tag_wide$Original_Percent_Aligned, paired = TRUE)

# Display the t-test results
print(cut_and_tag_t_test_result)

# Save the t-test results in a variable
cut_and_tag_p_value <- cut_and_tag_t_test_result$p.value


	Paired t-test

data:  cut_and_tag_wide$Rocketchip_Percent_Aligned and cut_and_tag_wide$Original_Percent_Aligned
t = -10.291, df = 5, p-value = 0.000149
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
 -10.508741  -6.307926
sample estimates:
mean difference 
      -8.408333 



## Create Plot

In [34]:
# CUT&RUN

pdf("cut_and_run_barplot.pdf", width = 9, height = 5)

ggplot(cut_and_run_long, aes(x = SRA_ID, y = value, fill = Method)) +
  geom_bar(position = position_dodge(width = 0.8), stat = "identity", width = 0.6) +
  geom_errorbar(aes(ymin = value - cut_and_run_sem_values$Original_Percent_Aligned, ymax = value + cut_and_run_sem_values$Original_Percent_Aligned), position = position_dodge(width = 0.8), width = 0.25) +
  geom_errorbar(aes(ymin = value - cut_and_run_sem_values$Rocketchip_Percent_Aligned, ymax = value + cut_and_run_sem_values$Rocketchip_Percent_Aligned), position = position_dodge(width = 0.8), width = 0.25) +
  labs(title = paste("Alignment Percentages for Akdogan-Ozdilek et. al (Original) vs. Rocketchip for CUT&RUN Data",
                     "\nPaired t-test p-value:", format.pval(cut_and_run_p_value, digits = 3)),
       x = "SRA ID",
       y = "Percent Alignment") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        plot.title = element_text(hjust = 0.5),
        panel.grid = element_blank(),
        axis.line.x = element_line(color = "black"),
        axis.line.y = element_line(color = "black")) +  
  scale_fill_manual(values = c("Original_Percent_Aligned" = "#DF536B", "Rocketchip_Percent_Aligned" = "#2297E6"),
                    name = "Method",
                    labels = c("Original_Percent_Aligned" = "Original", "Rocketchip_Percent_Aligned" = "Rocketchip"))

dev.off()

In [42]:
# CUT&Tag

pdf("cut_and_tag_barplot.pdf", width = 9, height = 5)

ggplot(cut_and_tag_long, aes(x = SRA_ID, y = value, fill = Method)) +
  geom_bar(position = position_dodge(width = 0.8), stat = "identity", width = 0.6) +
  geom_errorbar(aes(ymin = value - cut_and_tag_sem_values$Original_Percent_Aligned, ymax = value + cut_and_tag_sem_values$Original_Percent_Aligned), position = position_dodge(width = 0.8), width = 0.25) +
  geom_errorbar(aes(ymin = value - cut_and_tag_sem_values$Rocketchip_Percent_Aligned, ymax = value + cut_and_tag_sem_values$Rocketchip_Percent_Aligned), position = position_dodge(width = 0.8), width = 0.25) +
  labs(title = paste("Alignment Percentages for Akdogan-Ozdilek et. al (Original) vs. Rocketchip for CUT&tag Data",
                     "\nPaired t-test p-value:", format.pval(cut_and_tag_p_value, digits = 3)),
       x = "SRA ID",
       y = "Percent Alignment") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        plot.title = element_text(hjust = 0.5),
        panel.grid = element_blank(),
        axis.line.x = element_line(color = "black"),
        axis.line.y = element_line(color = "black")) +  
  scale_fill_manual(values = c("Original_Percent_Aligned" = "#DF536B", "Rocketchip_Percent_Aligned" = "#2297E6"),
                    name = "Method",
                    labels = c("Original_Percent_Aligned" = "Original", "Rocketchip_Percent_Aligned" = "Rocketchip"))

dev.off()