#### Extract the number of differentially accessible regions per contrast and per cell type and produce plots displaying the number of DARs

In [1]:
library(tidyverse)

── [1mAttaching core tidyverse packages[22m ─────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.5.1     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.4     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


In [2]:
contrasts <- c("age", "fetal", "sex", "disease")
num_contrasts <- length(contrasts)

corresponding_contrast_file_names <- c("age-group_old_vs_young", "age-group_fetal_vs_young",
                              "sex_male_vs_female", "disease-binary_Y_vs_N")

cell_types <- c("Cardiomyocyte", "Endothelial", "Fibroblast", "Lymphoid", "Myeloid", "Pericyte")

q_val_threshold <- 0.05
log2_FC_threshold <- 0.5

In [3]:
significant_peaks_dir = "significant_peak_csvs/"
dir.create(significant_peaks_dir)

In [18]:
# iterate through cell types and contrasts
for (cell_type in cell_types) {

    for (i in 1:num_contrasts) {
        contrast_file_name <- corresponding_contrast_file_names[i]
        contrast_file_path <- paste0("pydeseq2_results/", cell_type, "_", contrast_file_name, "_results.csv")

        if (file.exists(contrast_file_path)) {
            df <- read.csv(contrast_file_path, row.names = 1)
            df <- df %>% rename(neg_log10_p = `X.log10.padj.`)

            # filter to the significant
            significant_df <- df[df$significant == "True",]

            write.csv(significant_df, paste0(significant_peaks_dir, cell_type, "_", contrast_file_name, "_significant_results.csv"))
            
        }
         else {
            print(paste0("File does not exist:", contrast_file_path))
        }
    }
}