# Set-up

In [None]:
# Define base directory for input data
base.data.dir <- "/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data"


In [2]:
library(dplyr)
library(glue)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [3]:
rbind.fill <- plyr::rbind.fill

In [None]:
my_time <- function(t0 = NULL){
    now <- Sys.time()
    if(!is.null(t0)){
        diff <- now -t0
        message(glue('[{now}] (+{diff}s)'))
    } else {
        message(glue('[{now}]'))
    }
    return()
}

# **Part 1: List the CTDs to analyse**

In [None]:
# Define input directory for CTD files
in.dir <- file.path(base.data.dir, "by_Subregion")

ctd.files <- list.files(in.dir, full.names = T, recursive = T) %>% grep(value = T, pattern = 'ctd_', fixed=T) %>% sort
ctd.files
length(ctd.files)

# **Part 2: List the lists to analyse**

In [10]:
# results_name <-paste0('Diseases_',Sys.Date())
results_name <-'Diseases_2024-11-28'
results_name

In [None]:
# Define path to gene list file
gene.list.file <- '/users/genomics/xoel/codebases/gene_disease_asociation/parsed_lists_with_nicola.union.csv'

disease.associations <- data.table::fread(
    file = gene.list.file, 
    data.table=F) %>% rename(Gene=V1)
disease.associations.lists <- lapply(colnames(disease.associations)[-1], 
                                     function(x){disease.associations$Gene[as.logical(disease.associations[,x])]}) %>% 
    setNames(nm=colnames(disease.associations)[-1])
diseases <- names(disease.associations.lists)

# **Part 3: Run EWCE**

In [None]:
reps <- 10000
overwrite <- F

lists <-  disease.associations.lists
list.names <- names(lists)

In [None]:
for (ctd.file in ctd.files){

    path <- dirname(ctd.file)
    t0 <- Sys.time()
    
    my_time(t0)

    res.file <- glue('{path}/EWCE.{results_name}.{reps}_reps.results.csv')
    print(res.file)    
    
    if (overwrite | !file.exists(res.file)){
        message('Running because results were not found or overwrite is set to T')

        load(ctd.file, verbose=T)
                
        n.levels <- length(ctd)
        print(glue('There are {n.levels} annotation levels in this CTD'))
        
        #########################################################
        # FOR N LEVELS
        # Bootstrap significance test, no control for transcript length and GC content
        lapply(1:n.levels, function(annotLevel){
        
            #########################################################
            # FOR EACH LIST
            level.results <- lapply(1:length(list.names), function(list.i){
                list.name <- list.names[list.i]
                print(glue('{list.i+(annotLevel-1)*length(list.names)}/{length(list.names)*n.levels}:\t{list.name}'))
                my_time(t0)
                
                full_results <- EWCE::bootstrap_enrichment_test(sct_data = ctd, 
                                                                geneSizeControl = F,
                                                                no_cores = parallel::detectCores(),
                                                                sctSpecies = "human",
                                                                genelistSpecies = "human",
                                                                hits = lists[[list.name]], 
                                                                reps = reps,
                                                                annotLevel = annotLevel)
                for (a in names(full_results)){
                    if (!is.data.frame(full_results[[a]])){
                        full_results[[a]] <- data.frame(hit.cells = full_results[[a]]) %>% t() %>% as.data.frame()
                    }
                    full_results[[a]] <- full_results[[a]] %>% mutate(
                    annotLevel = annotLevel, list = list.name)
                }
                
                return(full_results)
                
            }) %>% setNames(nm = list.names)
            #########################################################
            gc()
            # Aggregate results across lists for the current annotation level
            lapply(names(level.results[[1]]), function(x){
                lapply(level.results, function(y){y[[x]]}) %>% do.call(what='rbind')
            }) %>% setNames(names(level.results[[1]])) -> level.results
            
            return(level.results)
            
        }) -> full_results
        #########################################################
        my_time(t0)
        
        # Aggregate results across all annotation levels
        lapply(names(full_results[[1]]), function(x){
            lapply(full_results, function(y){y[[x]]}) %>% do.call(what='rbind.fill')
        }) %>% setNames(names(full_results[[1]])) -> full_results
            
        #########################################################
        # Save
        for (iname in names(full_results)){
            data.table::fwrite(full_results[[iname]], glue('{path}/EWCE.{results_name}.{reps}_reps.{iname}.csv'))
        }
        
        ctd <- NULL
        full_results <- NULL
        
    } else  {
        message('Not re-running because results were found or overwrite is set to F')
    }
}

[2024-11-29 10:42:58.983694] (+1.00135803222656e-05s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Brain/by_Age/6.0/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.002458] (+0.00571775436401367s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Cortex/by_Age/10.0/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.011504] (+6.91413879394531e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Cortex/by_Age/11.5/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.020414] (+6.91413879394531e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Cortex/by_Age/12.0/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.029267] (+6.91413879394531e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Cortex/by_Age/13.0/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.038127] (+6.91413879394531e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Cortex/by_Age/14.0/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.046938] (+6.91413879394531e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Cortex/by_Age/6.9/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.055791] (+6.91413879394531e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Cortex/by_Age/8.0/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.064655] (+1.19209289550781e-05s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Cortex/by_Age/9.2/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.073533] (+6.67572021484375e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Cortex/by_Age/9.5/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.082395] (+7.15255737304688e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Forebrain/by_Age/10.0/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.091514] (+7.15255737304688e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Forebrain/by_Age/5.0/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.100729] (+6.91413879394531e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Forebrain/by_Age/5.5/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.109588] (+7.15255737304688e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Forebrain/by_Age/6.6/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.118475] (+7.39097595214844e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Forebrain/by_Age/7.5/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.127376] (+6.67572021484375e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Forebrain/by_Age/8.5/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.136209] (+1.26361846923828e-05s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Forebrain/by_Age/9.2/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.145062] (+7.15255737304688e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Head/by_Age/5.0/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.15392] (+6.91413879394531e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Hippocampus/by_Age/12.0/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.162729] (+6.67572021484375e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Hippocampus/by_Age/14.0/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.171566] (+6.91413879394531e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Striatum/by_Age/14.0/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.180604] (+6.91413879394531e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Striatum/by_Age/6.9/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.189452] (+6.43730163574219e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Subcortex/by_Age/11.5/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.198282] (+7.15255737304688e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Subcortex/by_Age/12.0/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.207122] (+1.02519989013672e-05s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Subcortex/by_Age/13.0/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.216007] (+6.67572021484375e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Subcortex/by_Age/8.0/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.224856] (+6.43730163574219e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Subcortex/by_Age/8.5/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.233774] (+6.67572021484375e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Subcortex/by_Age/9.5/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

[2024-11-29 10:42:59.242697] (+6.91413879394531e-06s)



/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/by_Subregion//Telencephalon/by_Age/6.7/EWCE.Diseases_2024-11-28.10000_reps.results.csv


Not re-running because results were found or overwrite is set to F

