In [1]:
library(Seurat)
library(dplyr)

The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
which was just loaded, will retire in October 2023.
Please refer to R-spatial evolution reports for details, especially
https://r-spatial.org/r/2023/05/15/evolution4.html.
It may be desirable to make the sf package available;
package maintainers should consider adding sf to Suggests:.
The sp package is now running under evolution status 2
     (status 2 uses the sf package in place of rgdal)

Attaching SeuratObject

Seurat v4 was just loaded with SeuratObject v5; disabling v5 assays and
validation routines, and ensuring assays work in strict v3/v4
compatibility mode


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




# Data load

In [2]:
load('../data/commons.rda', verbose = T)

Loading objects:
  cell.type.colors
  cell.types
  cell.types.nohighmito
  chr_df
  donor_sex
  fig
  flora_paper_list
  gene_type.df
  imprinted.df
  imprinted.genes
  monkey_paper_list
  monkey.genes
  monkey.markers
  nescreg.genes
  nescreg.genes.no_trg
  nescreg.genes.no_trg.early
  nescreg.list
  nescreg.list.no_trg
  nescreg.list.no_trg.early
  nescreg.markers
  nescreg.markers.no_trg
  nescreg.markers.no_trg.early
  nicola_marker_list
  origin_sex.colors
  pat.de
  pat.de.top20
  pat.list
  pat.neu.de
  pat.neu.de.top20
  pat.neu.list
  pat.neu.top20
  pat.top20
  phases
  phases.colors
  protein_coding.genes
  region.genes
  region.list
  region.markers
  samples
  samples.colors
  sex_chr.genes
  sex_chr.genes.x
  sex_chr.genes.y
  shown_2b_list
  shown_4d_list
  tf.genes
  tfs.df
  top100.sub.pat
  top100.subtype
  valid_chr


In [3]:
dir.create('../results/selected_markers/')

“'../results/selected_markers' already exists”


## Expression data

In [39]:
## no high-mito
# Read
seu <- readRDS('../data/DE_DS.NoHighMito.pp.scaled.RDS')

## RG early
# Read
seu.rge <- readRDS('../data/DE_DS.RGearly.pp.scaled.RDS')


# Vector of donor-condition equivalences
id_conditions <- with(unique(seu.rge@meta.data[,c('cell_origin', 'condition')]),
                      {setNames(condition, cell_origin)})

id_conditions


## Seurat markers

In [42]:
# no high-mito

res <- data.table::fread('../results/markers/markers.nohighmito.csv', data.table = F) %>% mutate(V1=NULL, anno_cluster_fct=factor(anno_cluster_fct, cell.types.nohighmito))
res <- subset(res, anno_cluster_fct !='High-mito')

# RG early
res.rge <- data.table::fread('../results/markers/markers.rge_phases.csv', data.table = F) %>% mutate(V1=NULL)

## Seurat markers per donor

In [43]:
# per donor no high-mito

res.donors <- data.table::fread('../results/markers/markers_per_donor.nohighmito.csv', data.table = F) %>%
    mutate(V1=NULL,
           anno_cluster_fct=factor(anno_cluster_fct, cell.types.nohighmito),
           donor=factor(donor, samples[samples%in%unique(donor)]))
res.donors <- subset(res.donors, anno_cluster_fct !='High-mito')


## Pseudobulk markers

In [44]:
# rge
pb.res <- data.table::fread('../results/markers_pseudobulk/neighbors_3_314/ASD_vs_Ctrl.csv', data.table = F) %>% mutate(V1=NULL)
# phases
pb.phase.res <- data.table::fread('../results/markers_pseudobulk/neighbors_3_314/ASD_vs_Ctrl_perPhase.csv', data.table = F) %>% mutate(V1=NULL)

# Functions

### Annotate

In [161]:
annotate_markers <- function(df, gene_var='gene', logfc_var='avg_log2FC',
                             tfs = T, imprinting = T, chromosome=T, biotype=T,
                             direction=T, ranking=T, pos='ASD', neg='Ctrl',
                             extra_genesets = NULL, drop_existing=T){
    
    df$gene_var_temp <- df[,gene_var, drop=T]
    
    if (drop_existing){
        drop.cols <- c(
            'gene.is.tf', 'Imprinted.Status', 'Expressed.Allele', 'rank.Ctrl',
            'rank.ASD', 'overexpressed.in', 'overexpressed.in.', 'av.in.monkey',
            'chromosome', 'in.sex_chr', 'gene_biotype', 'protein_coding')
        to.drop <- colnames(df)%in%drop.cols
        if (sum(to.drop)>0){
            message(glue::glue('Dropping [{paste0(colnames(df)[to.drop], collapse=", ")}]'))
            df <- df[,!to.drop]
        }
    }
    
    if (direction | ranking){
        df$logfc_var_temp <- df[,logfc_var, drop=T]
    }
    
    if (tfs){
        # TF genes
        df$gene.is.tf  <- df$gene_var_temp %in% tf.genes
    }
    
    if (imprinting){
        # Imprinting data
        df <- merge(df, imprinted.df, by='gene', all.x=T, all.y=F)
        # Set NAs
        df[, colnames(df)%in%colnames(imprinted.df)][is.na(df[, colnames(df)%in%colnames(imprinted.df)])] <- 'Not in DB'
        # table(df$Imprinted.Status)
    }
    
    if (biotype){
        # biotype/protein_coding data
        df <- merge(df, gene_type.df, by.x='gene', by.y='hgnc_symbol', all.x=T, all.y=F)
        # Set NAs
        df[is.na(df$protein_coding),'protein_coding'] <- F
        df[, colnames(df)%in%colnames(gene_type.df)][is.na(df[, colnames(df)%in%colnames(gene_type.df)])] <- 'Not in DB'
        # table(df$Imprinted.Status)
    }
    
    # Direction and rank
    if (direction){
        df$overexpressed.in <- plyr::mapvalues(x = sign(df$logfc_var_temp), from=c(1,-1,0), to=c(pos, neg, 'None'))
    }
    if (ranking){
        df <- df %>% ungroup() %>% mutate(rank.pos = ifelse(logfc_var_temp>0, rank(desc(logfc_var_temp)), Inf),
                                          rank.neg = ifelse(logfc_var_temp<0, rank(logfc_var_temp), Inf))
        colnames(df) <- colnames(df) %>% 
            gsub(pattern='rank.neg', replacement=paste0('rank.', neg)) %>% 
            gsub(pattern='rank.pos', replacement=paste0('rank.', pos))
    }


    if (!is.null(extra_genesets)){
        # Extra boolean columns
        for (col in names(extra_genesets)){
            df[,col] <- df$gene_var_temp %in% extra_genesets[[col]]
        }
    }
    
    if (chromosome){
        # Chromosome
        df <- merge(df, chr_df, by.x='gene', by.y='hgnc_symbol', all.x=T, all.y=F) %>% dplyr::rename(chromosome=chr)

        # Sex chromosomes' genes
        df$in.sex_chr <- df$gene_var_temp %in% sex_chr.genes
    }
    
    return(df[,-grep('_var_temp', colnames(df))])
}

### Report

In [162]:
report.markers <- function(df, by.group=NULL){
    if (is.null(by.group)){
        message(paste('Number of genes:', length(unique(df$gene))))
    } else {
        # print('Reporting with group')
        lapply(split(df, as.character(df[,by.group, drop=T])), function(df){
            tag <- df[1,by.group] 
            message(paste0('Number of genes in ', tag, ': ', length(unique(df$gene))))
        })
    }
    return()
}

### Filter top

In [163]:
subset.top <- function(df, topby='pval', topn=30, by.group=NULL){

    if (is.null(by.group)){
        if (topby == 'pval'){
            if (!'pval'%in%colnames(df)){stop('wtf')}
            df <- df %>% slice_min(p_val, n=topn, with_ties = F) 
        } else if (topby == 'logfc'){
            if (!'logfc'%in%colnames(df)){stop('wtf')}
            df <- df %>% slice_max(abs(logfc), n=topn, with_ties = F)
        } else {
            stop('Valid topby: pval, logfc')
        }
        return(df)
        
    } else {
        df <- do.call('rbind',lapply(
            split(df, apply(df[,by.group, drop=F], 1, paste, collapse=' ')),
            function(gr.df){
                ddf <- subset.top(gr.df,
                                  topby=topby, 
                                  topn=topn, 
                                  by.group=NULL)
                return(ddf)
                })) %>% as.data.frame()
        rownames(df) <- NULL
        return(df)
    }}

saveRDS(subset.top, '../data/subset.top.fun.rds')

### Filter pct of condition

In [164]:
get.exp.data <- function(seu, features, ...){
    # print('ñ1')
    d <- Seurat::DotPlot(object = seu, features = unique(features), ...)$data
    # print('ñ2')
    return(d)
}

In [165]:
get.genes.fraction.cond.samples.over.controls <- function(
    exp.data, 
    id_conditions,
    control.cond = 'Ctrl',
    summary.function = median,
    fr.cond.over.control = 1
    
){
    
    exp.data <- exp.data %>% mutate(condition=plyr::mapvalues(id, from=names(id_conditions), to=id_conditions))

    control.stats <- subset(exp.data, condition == control.cond) %>% group_by(features.plot, condition) %>% summarise_at(summary.function, .vars=vars(avg.exp, pct.exp, avg.exp.scaled))

    exp.data <- merge(
        subset(exp.data, condition != control.cond),
        control.stats, by='features.plot', 
        suffixes=c('','.ctrl'))

    cond.summary <- exp.data %>% mutate(
        avg.exp.diff = avg.exp - avg.exp.ctrl,
        pct.exp.diff = pct.exp - pct.exp.ctrl,
        avg.exp.scaled.diff = avg.exp.scaled - avg.exp.scaled.ctrl) %>%
    group_by(features.plot) %>%
    summarise(
        avg.exp.cond.over.control = mean(avg.exp.diff > 0),
        pct.exp.cond.over.control = mean(pct.exp.diff > 0),
        avg.exp.scaled.cond.over.control = mean(avg.exp.scaled.diff > 0))

    genes.over <- as.character(cond.summary[,1, drop=T][(cond.summary[,-1]>=fr.cond.over.control) %>% apply(MARGIN = 1, FUN = all)])    
    
    return(genes.over)
}

In [166]:
filter.condition.comparison <- function(res, seu,
                                        id_conditions,
                                        by.group = 'anno_cluster',
                                        id.sample = 'cell_origin',
                                        group.var = 'anno_cluster',
                                        
                                        control.cond = 'Ctrl',
                                        test.cond = 'ASD',
                                        summary.function = median,
                                        fr.cond.over.control = 1, 
                                        topn=20, topby='logfc',
                                        consistent.logfc = T){
    if (is.null(by.group)){
        res$temp_id <- 'temp'
        by.group <- 'temp_id'
        
    } else {
        res$temp_id <- res[,by.group]
    }
    
    # if (is.null(group.var)){
    #     group.var <- 'temp_id'
    #     seu$temp_id <- 'temp'
    # } 

    list.markers <- split(res$gene, res$temp_id)
    list.markers <- list.markers[sapply(list.markers, length)!=0]
    
    exp.data.list <- lapply(
        names(list.markers),
        function(cl){
            message(cl)
            subseu <- if(is.null(group.var)){seu}else{seu[, FetchData(seu, group.var) == cl]}
            get.exp.data(
                subseu,
                # subset(seu, anno_cluster == cl),
                list.markers[[cl]],
                group.by=id.sample) %>% mutate(cluster=cl)
            })
    # print('2')
    
    res.test <- do.call('rbind', lapply(
        exp.data.list,
        function(exp.data){
            valid.genes <- get.genes.fraction.cond.samples.over.controls(
                exp.data, 
                control.cond = control.cond,
                fr.cond.over.control=fr.cond.over.control,
                summary.function = summary.function,
                id_conditions = id_conditions)
            cl <- unique(exp.data[,'cluster',drop=T])[1]
            clres <- res[(res[, by.group, drop=T]==cl)&(res$gene%in%valid.genes),]
            # message(paste(cl, length(valid.genes)))
            return(clres)

        })) %>% as.data.frame()
    
    res.control <- do.call('rbind', lapply(
        exp.data.list,
        function(exp.data){
            valid.genes <- get.genes.fraction.cond.samples.over.controls(
                exp.data, 
                control.cond = test.cond,
                fr.cond.over.control=fr.cond.over.control,
                summary.function = summary.function,
                id_conditions = id_conditions)
            cl <- unique(exp.data[,'cluster',drop=T])[1]
            clres <- res[(res[, by.group, drop=T]==cl)&(res$gene%in%valid.genes),]
            # message(paste(cl, length(valid.genes)))
            return(clres)

        })) %>% as.data.frame()
    
    print(paste(test.cond,'markers'))
    report.markers(res.test, by.group=by.group)
    print(paste(control.cond,'markers'))
    report.markers(res.control, by.group=by.group)
    
    if (consistent.logfc){
        print('Filtering genes with consistent test/control logFC')
        res.test <- res.test %>% subset(logfc>0)         
        res.control <- res.control %>% subset(logfc<0)
        print('Test condition')
        report.markers(res.test, by.group=by.group)
        print('Control condition')
        report.markers(res.control, by.group=by.group)
    }
    
    if (is.finite(topn)){
        print(paste('Filtering top-n:', topn, ', by: ', topby))
        res.test <- res.test %>% subset.top(topby=topby, topn=topn, by.group=by.group)
        res.control <- res.control %>% subset.top(topby=topby, topn=topn, by.group=by.group)
        print('Test condition')
        report.markers(res.test, by.group=by.group)
        print('Control condition')
        report.markers(res.control, by.group=by.group)
    }
    
    filt.res <- rbind(res.test, res.control)
    rownames(filt.res) <- NULL
    
    res$temp_id <- NULL
    
    return(filt.res)
    
}

### Filter general function

In [167]:
filter.marker.genes <- function(res, seu = NULL,
                                by.group = 'anno_cluster',
                                group.var = 'anno_cluster',
                                
                                p_val_thres = 0.05,
                                p_val_var = 'p_val',
                                logfc_var = 'avg_log2FC',
                                logfc_thres = NULL,
                                pct.exp.thres = 0.05,
                                pct.exp.diff.thres = 0.05,
                                
                                topn=Inf,
                                topby='logfc', 

                                compare.conditions = T,
                                
                                id_conditions=NULL,
                                id.sample = 'cell_origin',
                                control.cond = 'Ctrl',
                                test.cond = 'ASD',
                                summary.function = mean,
                                fr.cond.over.control = 1, 
                                comp.topn=Inf, 
                                comp.topby='logfc',
                                consistent.logfc = T,                             
                                
                                return_full = F){
    
    
    res$logfc <- res[,logfc_var]

    if (return_full){
        res$filt.index <- 1:nrow(res)
    }
    
    filt.res <- res %>% arrange(desc(logfc))
    
    report.markers(filt.res, by.group=by.group)
    
    if (!is.null(p_val_thres)){
        print(paste('Filtering by p_val:', p_val_thres))
        filt.res <- filt.res[!is.na(filt.res[ ,p_val_var]), ]
        filt.res <- filt.res[filt.res[ ,p_val_var] <= p_val_thres, ]
        report.markers(filt.res, by.group=by.group)
    }
            
    if (!is.null(logfc_thres)){
        print(paste('Filtering by logFC:', as.character(logfc_thres)))
        filt.res <- filt.res[!is.na(filt.res[ ,'logfc']), ]
        filt.res <- filt.res[abs(filt.res[ ,'logfc']) >= logfc_thres, ]
        report.markers(filt.res, by.group=by.group)
    }
        
    if (!is.null(pct.exp.thres)){
        print(paste('Filtering by pct of expressing cells:', pct.exp.thres))
        
        filt.res <- filt.res[ifelse(
            sign(filt.res$logfc) == 1,
            filt.res$pct.1 >= pct.exp.thres,
            filt.res$pct.2 >= pct.exp.thres), ]
        report.markers(filt.res, by.group=by.group)
    }
        
    if (!is.null(pct.exp.diff.thres)){
        print(paste('Filtering by difference of pct of expressing cells:', pct.exp.diff.thres))
        
        filt.res <- filt.res[ifelse(
            sign(filt.res$logfc) == 1,
            (filt.res$pct.1 - filt.res$pct.2 >= pct.exp.diff.thres)| (filt.res$pct.2==0),
            (filt.res$pct.2 - filt.res$pct.1 >= pct.exp.diff.thres)| (filt.res$pct.1==0)), ]
        report.markers(filt.res, by.group=by.group)
    }
        
    if (is.finite(topn)){
        print(paste('Filtering top-n:', topn, ', by: ', topby))
        filt.res <- subset.top(filt.res, topby=topby, topn=topn, by.group = by.group)
        report.markers(filt.res, by.group=by.group)
    }
    if (compare.conditions){
        print(paste('Comparison of conditions:', test.cond, control.cond))
        filt.res <- filter.condition.comparison(
            res=filt.res, seu=seu,
            id_conditions=id_conditions,
            by.group = by.group, 
            group.var = group.var,
            id.sample = id.sample,
            control.cond = control.cond,
            test.cond = test.cond,
            summary.function = summary.function,
            fr.cond.over.control = fr.cond.over.control, 
            topn=comp.topn, topby=comp.topby,
            consistent.logfc = consistent.logfc)

        print('After condition comparison')
        report.markers(filt.res, by.group=by.group)
    }
    
    
    if (!return_full){
        return(filt.res)
    } else {
        return(
            res %>% mutate(
                filter.ok = filt.index %in% unique(filt.res$filt.index),
                filt.index = NULL)
            )
    }
}

In [168]:
second.lowest <- function(x,how.many.lower=2){x[order(x)][how.many.lower]}
second.lowest(c(1,2,3,4,10,3,4,5))

In [169]:
## Tests that we want to run

# pvalue
# always 0.05 and adjusted

# top filtering: None
# sorting: log2fc

# minimum pct in the overexpressing group: 5%

# condition comparisons
# 2/3 > 2/3 # at least two of three have bigger expression than 2 of 3 from the other group
# 3/3 > 3/3 # all of them have bigger expression than all the other group
# 2/3 > 3/3 # at least two have bigger expression than all the other group


all_tests <- function(res, p_val_var = 'p_val_adj', ...){
    
    fixed.args <- list(
        p_val_var = p_val_var,
        p_val_thres = 0.05,
        
        
        topn=Inf,
        topby='logfc', 
        pct.exp.diff.thres = 0.1,
        pct.exp.thres = .1,
        logfc_thres = .5,

        compare.conditions = T,
        id.sample = 'cell_origin',
        control.cond = 'Ctrl',
        id_conditions=id_conditions,
        test.cond = 'ASD',
        comp.topn=Inf, 
        comp.topby='logfc',
        consistent.logfc = T,
        return_full = T
    )
    
    iter.args <- list(
        filter.2of3.over.2of3 = list(summary.function = second.lowest, fr.cond.over.control = 2/3),
        filter.2of3.over.max = list(summary.function = max, fr.cond.over.control = 2/3),
        filter.all.over.2of3 = list(summary.function = second.lowest, fr.cond.over.control = 3/3),
        filter.all.over.max = list(summary.function = max, fr.cond.over.control = 3/3)
    )

    provided.args <- list(...)
    
    
    for (filt in (names(iter.args))){
        for(i in 1:3){print(paste(rep('#', 60), collapse=''))}
        message(filt)
        for(i in 1:3){print(paste(rep('#', 60), collapse=''))}

        iargs <- c(iter.args[[filt]], fixed.args)
        for (i in names(provided.args)){
            iargs[i] <- provided.args[i]
        }
        
        # return(iargs)
        # return(paste(paste(names(iargs), as.character(iargs), sep="="), collapse=", "))
        rlang::exec(.fn = filter.marker.genes, res=res, !!!iargs) -> res
        colnames(res) <- ifelse(colnames(res)=='filter.ok', filt, colnames(res))
        
    }

    return(res)
}


# Whole data

In [170]:
filt.res <- annotate_markers(res)

Dropping [gene.is.tf, Imprinted.Status, Expressed.Allele, rank.Ctrl, rank.ASD, overexpressed.in., av.in.monkey, chromosome, in.sex_chr]



In [171]:
tryCatch(expr={all_tests(filt.res, seu=seu,
                         by.group = 'anno_cluster_fct',
                         group.var = 'anno_cluster')}, 
         cond=function(cond){print(as.character(cond)); return(cond)},
         error=function(cond){print(as.character(cond)); return(cond)}) -> filt.res

[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


filter.2of3.over.2of3



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


Number of genes in FGF17-like: 37733

Number of genes in Mes prog: 37733

Number of genes in Neral crest/Mes: 37733

Number of genes in Neuron: 37733

Number of genes in RG early: 37733

Number of genes in RG late: 37733



[1] "Filtering by p_val: 0.05"


Number of genes in Mes prog: 1423

Number of genes in Neral crest/Mes: 51

Number of genes in Neuron: 79

Number of genes in RG early: 3821

Number of genes in RG late: 242



[1] "Filtering by logFC: 0.5"


Number of genes in Mes prog: 13

Number of genes in Neral crest/Mes: 31

Number of genes in Neuron: 30

Number of genes in RG early: 11

Number of genes in RG late: 17



[1] "Filtering by pct of expressing cells: 0.1"


Number of genes in Mes prog: 13

Number of genes in Neral crest/Mes: 31

Number of genes in Neuron: 30

Number of genes in RG early: 11

Number of genes in RG late: 17



[1] "Filtering by difference of pct of expressing cells: 0.1"


Number of genes in Mes prog: 11

Number of genes in Neral crest/Mes: 13

Number of genes in Neuron: 23

Number of genes in RG early: 9

Number of genes in RG late: 14



[1] "Comparison of conditions: ASD Ctrl"


Mes prog

RG early

RG late

Neral crest/Mes

Neuron

The following `from` values were not present in `x`: D8_Ctrl_317

The following `from` values were not present in `x`: D8_Ctrl_317



[1] "ASD markers"


Number of genes in Mes prog: 7

Number of genes in Neral crest/Mes: 3

Number of genes in Neuron: 10

Number of genes in RG early: 7

Number of genes in RG late: 11



[1] "Ctrl markers"


Number of genes in Mes prog: 4

Number of genes in Neral crest/Mes: 6

Number of genes in Neuron: 11

Number of genes in RG early: 2

Number of genes in RG late: 3



[1] "Filtering genes with consistent test/control logFC"
[1] "Test condition"


Number of genes in Mes prog: 6

Number of genes in Neral crest/Mes: 3

Number of genes in Neuron: 10

Number of genes in RG early: 7

Number of genes in RG late: 11



[1] "Control condition"


Number of genes in Mes prog: 3

Number of genes in Neral crest/Mes: 6

Number of genes in Neuron: 10

Number of genes in RG early: 2

Number of genes in RG late: 3



[1] "After condition comparison"


Number of genes in Mes prog: 9

Number of genes in Neral crest/Mes: 9

Number of genes in Neuron: 20

Number of genes in RG early: 9

Number of genes in RG late: 14



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


filter.2of3.over.max



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


Number of genes in FGF17-like: 37733

Number of genes in Mes prog: 37733

Number of genes in Neral crest/Mes: 37733

Number of genes in Neuron: 37733

Number of genes in RG early: 37733

Number of genes in RG late: 37733



[1] "Filtering by p_val: 0.05"


Number of genes in Mes prog: 1423

Number of genes in Neral crest/Mes: 51

Number of genes in Neuron: 79

Number of genes in RG early: 3821

Number of genes in RG late: 242



[1] "Filtering by logFC: 0.5"


Number of genes in Mes prog: 13

Number of genes in Neral crest/Mes: 31

Number of genes in Neuron: 30

Number of genes in RG early: 11

Number of genes in RG late: 17



[1] "Filtering by pct of expressing cells: 0.1"


Number of genes in Mes prog: 13

Number of genes in Neral crest/Mes: 31

Number of genes in Neuron: 30

Number of genes in RG early: 11

Number of genes in RG late: 17



[1] "Filtering by difference of pct of expressing cells: 0.1"


Number of genes in Mes prog: 11

Number of genes in Neral crest/Mes: 13

Number of genes in Neuron: 23

Number of genes in RG early: 9

Number of genes in RG late: 14



[1] "Comparison of conditions: ASD Ctrl"


Mes prog

RG early

RG late

Neral crest/Mes

Neuron

The following `from` values were not present in `x`: D8_Ctrl_317

The following `from` values were not present in `x`: D8_Ctrl_317



[1] "ASD markers"


Number of genes in Mes prog: 4

Number of genes in Neral crest/Mes: 3

Number of genes in Neuron: 3

Number of genes in RG early: 3

Number of genes in RG late: 7



[1] "Ctrl markers"


Number of genes in Mes prog: 3

Number of genes in Neral crest/Mes: 3

Number of genes in Neuron: 6

Number of genes in RG early: 2

Number of genes in RG late: 1



[1] "Filtering genes with consistent test/control logFC"
[1] "Test condition"


Number of genes in Mes prog: 4

Number of genes in Neral crest/Mes: 3

Number of genes in Neuron: 3

Number of genes in RG early: 3

Number of genes in RG late: 7



[1] "Control condition"


Number of genes in Mes prog: 3

Number of genes in Neral crest/Mes: 3

Number of genes in Neuron: 6

Number of genes in RG early: 2

Number of genes in RG late: 1



[1] "After condition comparison"


Number of genes in Mes prog: 7

Number of genes in Neral crest/Mes: 6

Number of genes in Neuron: 9

Number of genes in RG early: 5

Number of genes in RG late: 8



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


filter.all.over.2of3



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


Number of genes in FGF17-like: 37733

Number of genes in Mes prog: 37733

Number of genes in Neral crest/Mes: 37733

Number of genes in Neuron: 37733

Number of genes in RG early: 37733

Number of genes in RG late: 37733



[1] "Filtering by p_val: 0.05"


Number of genes in Mes prog: 1423

Number of genes in Neral crest/Mes: 51

Number of genes in Neuron: 79

Number of genes in RG early: 3821

Number of genes in RG late: 242



[1] "Filtering by logFC: 0.5"


Number of genes in Mes prog: 13

Number of genes in Neral crest/Mes: 31

Number of genes in Neuron: 30

Number of genes in RG early: 11

Number of genes in RG late: 17



[1] "Filtering by pct of expressing cells: 0.1"


Number of genes in Mes prog: 13

Number of genes in Neral crest/Mes: 31

Number of genes in Neuron: 30

Number of genes in RG early: 11

Number of genes in RG late: 17



[1] "Filtering by difference of pct of expressing cells: 0.1"


Number of genes in Mes prog: 11

Number of genes in Neral crest/Mes: 13

Number of genes in Neuron: 23

Number of genes in RG early: 9

Number of genes in RG late: 14



[1] "Comparison of conditions: ASD Ctrl"


Mes prog

RG early

RG late

Neral crest/Mes

Neuron

The following `from` values were not present in `x`: D8_Ctrl_317

The following `from` values were not present in `x`: D8_Ctrl_317



[1] "ASD markers"


Number of genes in Mes prog: 6

Number of genes in Neral crest/Mes: 1

Number of genes in Neuron: 8

Number of genes in RG early: 6

Number of genes in RG late: 8



[1] "Ctrl markers"


Number of genes in Mes prog: 1

Number of genes in Neral crest/Mes: 6

Number of genes in Neuron: 6

Number of genes in RG early: 1



[1] "Filtering genes with consistent test/control logFC"
[1] "Test condition"


Number of genes in Mes prog: 6

Number of genes in Neral crest/Mes: 1

Number of genes in Neuron: 8

Number of genes in RG early: 6

Number of genes in RG late: 8



[1] "Control condition"


Number of genes in Mes prog: 1

Number of genes in Neral crest/Mes: 6

Number of genes in Neuron: 6

Number of genes in RG early: 1



[1] "After condition comparison"


Number of genes in Mes prog: 7

Number of genes in Neral crest/Mes: 7

Number of genes in Neuron: 14

Number of genes in RG early: 7

Number of genes in RG late: 8



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


filter.all.over.max



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


Number of genes in FGF17-like: 37733

Number of genes in Mes prog: 37733

Number of genes in Neral crest/Mes: 37733

Number of genes in Neuron: 37733

Number of genes in RG early: 37733

Number of genes in RG late: 37733



[1] "Filtering by p_val: 0.05"


Number of genes in Mes prog: 1423

Number of genes in Neral crest/Mes: 51

Number of genes in Neuron: 79

Number of genes in RG early: 3821

Number of genes in RG late: 242



[1] "Filtering by logFC: 0.5"


Number of genes in Mes prog: 13

Number of genes in Neral crest/Mes: 31

Number of genes in Neuron: 30

Number of genes in RG early: 11

Number of genes in RG late: 17



[1] "Filtering by pct of expressing cells: 0.1"


Number of genes in Mes prog: 13

Number of genes in Neral crest/Mes: 31

Number of genes in Neuron: 30

Number of genes in RG early: 11

Number of genes in RG late: 17



[1] "Filtering by difference of pct of expressing cells: 0.1"


Number of genes in Mes prog: 11

Number of genes in Neral crest/Mes: 13

Number of genes in Neuron: 23

Number of genes in RG early: 9

Number of genes in RG late: 14



[1] "Comparison of conditions: ASD Ctrl"


Mes prog

RG early

RG late

Neral crest/Mes

Neuron

The following `from` values were not present in `x`: D8_Ctrl_317

The following `from` values were not present in `x`: D8_Ctrl_317



[1] "ASD markers"


Number of genes in Mes prog: 3

Number of genes in Neral crest/Mes: 1

Number of genes in Neuron: 1

Number of genes in RG early: 2

Number of genes in RG late: 2



[1] "Ctrl markers"


Number of genes in Mes prog: 1

Number of genes in Neral crest/Mes: 3

Number of genes in Neuron: 2

Number of genes in RG early: 1



[1] "Filtering genes with consistent test/control logFC"
[1] "Test condition"


Number of genes in Mes prog: 3

Number of genes in Neral crest/Mes: 1

Number of genes in Neuron: 1

Number of genes in RG early: 2

Number of genes in RG late: 2



[1] "Control condition"


Number of genes in Mes prog: 1

Number of genes in Neral crest/Mes: 3

Number of genes in Neuron: 2

Number of genes in RG early: 1



[1] "After condition comparison"


Number of genes in Mes prog: 4

Number of genes in Neral crest/Mes: 4

Number of genes in Neuron: 3

Number of genes in RG early: 3

Number of genes in RG late: 2



In [172]:
## Export
write.csv(filt.res, '../results/selected_markers/markers_with_filters.nohighmito.csv')

# RG early per Phase

In [173]:
filt.res.rge <- annotate_markers(res.rge)

tryCatch(expr={all_tests(filt.res.rge, seu=seu.rge,
                         by.group = 'Phase',
                         group.var = 'Phase')}, 
         cond=function(cond){as.character(cond); return(cond)},
         error=function(cond){as.character(cond); return(cond)}) -> filt.res.rge
filt.res.rge
## Export
write.csv(filt.res.rge, '../results/selected_markers/markers_with_filters.rge_phases.csv')

Dropping [gene.is.tf, Imprinted.Status, Expressed.Allele, rank.Ctrl, rank.ASD, overexpressed.in., av.in.monkey, chromosome, in.sex_chr]



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


filter.2of3.over.2of3



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


Number of genes in G1: 37733

Number of genes in G2M: 37733

Number of genes in S: 37733



[1] "Filtering by p_val: 0.05"


Number of genes in G1: 1684

Number of genes in G2M: 2048

Number of genes in S: 1251



[1] "Filtering by logFC: 0.5"


Number of genes in G1: 17

Number of genes in G2M: 12

Number of genes in S: 10



[1] "Filtering by pct of expressing cells: 0.1"


Number of genes in G1: 17

Number of genes in G2M: 12

Number of genes in S: 10



[1] "Filtering by difference of pct of expressing cells: 0.1"


Number of genes in G1: 15

Number of genes in G2M: 9

Number of genes in S: 8



[1] "Comparison of conditions: ASD Ctrl"


G1

G2M

S



[1] "ASD markers"


Number of genes in G1: 10

Number of genes in G2M: 7

Number of genes in S: 5



[1] "Ctrl markers"


Number of genes in G1: 5

Number of genes in G2M: 2

Number of genes in S: 3



[1] "Filtering genes with consistent test/control logFC"
[1] "Test condition"


Number of genes in G1: 10

Number of genes in G2M: 7

Number of genes in S: 5



[1] "Control condition"


Number of genes in G1: 5

Number of genes in G2M: 2

Number of genes in S: 3



[1] "After condition comparison"


Number of genes in G1: 15

Number of genes in G2M: 9

Number of genes in S: 8



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


filter.2of3.over.max



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


Number of genes in G1: 37733

Number of genes in G2M: 37733

Number of genes in S: 37733



[1] "Filtering by p_val: 0.05"


Number of genes in G1: 1684

Number of genes in G2M: 2048

Number of genes in S: 1251



[1] "Filtering by logFC: 0.5"


Number of genes in G1: 17

Number of genes in G2M: 12

Number of genes in S: 10



[1] "Filtering by pct of expressing cells: 0.1"


Number of genes in G1: 17

Number of genes in G2M: 12

Number of genes in S: 10



[1] "Filtering by difference of pct of expressing cells: 0.1"


Number of genes in G1: 15

Number of genes in G2M: 9

Number of genes in S: 8



[1] "Comparison of conditions: ASD Ctrl"


G1

G2M

S



[1] "ASD markers"


Number of genes in G1: 6

Number of genes in G2M: 2

Number of genes in S: 2



[1] "Ctrl markers"


Number of genes in G1: 5

Number of genes in G2M: 2

Number of genes in S: 2



[1] "Filtering genes with consistent test/control logFC"
[1] "Test condition"


Number of genes in G1: 6

Number of genes in G2M: 2

Number of genes in S: 2



[1] "Control condition"


Number of genes in G1: 5

Number of genes in G2M: 2

Number of genes in S: 2



[1] "After condition comparison"


Number of genes in G1: 11

Number of genes in G2M: 4

Number of genes in S: 4



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


filter.all.over.2of3



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


Number of genes in G1: 37733

Number of genes in G2M: 37733

Number of genes in S: 37733



[1] "Filtering by p_val: 0.05"


Number of genes in G1: 1684

Number of genes in G2M: 2048

Number of genes in S: 1251



[1] "Filtering by logFC: 0.5"


Number of genes in G1: 17

Number of genes in G2M: 12

Number of genes in S: 10



[1] "Filtering by pct of expressing cells: 0.1"


Number of genes in G1: 17

Number of genes in G2M: 12

Number of genes in S: 10



[1] "Filtering by difference of pct of expressing cells: 0.1"


Number of genes in G1: 15

Number of genes in G2M: 9

Number of genes in S: 8



[1] "Comparison of conditions: ASD Ctrl"


G1

G2M

S



[1] "ASD markers"


Number of genes in G1: 9

Number of genes in G2M: 5

Number of genes in S: 4



[1] "Ctrl markers"


Number of genes in G1: 3

Number of genes in S: 2



[1] "Filtering genes with consistent test/control logFC"
[1] "Test condition"


Number of genes in G1: 9

Number of genes in G2M: 5

Number of genes in S: 4



[1] "Control condition"


Number of genes in G1: 3

Number of genes in S: 2



[1] "After condition comparison"


Number of genes in G1: 12

Number of genes in G2M: 5

Number of genes in S: 6



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


filter.all.over.max



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


Number of genes in G1: 37733

Number of genes in G2M: 37733

Number of genes in S: 37733



[1] "Filtering by p_val: 0.05"


Number of genes in G1: 1684

Number of genes in G2M: 2048

Number of genes in S: 1251



[1] "Filtering by logFC: 0.5"


Number of genes in G1: 17

Number of genes in G2M: 12

Number of genes in S: 10



[1] "Filtering by pct of expressing cells: 0.1"


Number of genes in G1: 17

Number of genes in G2M: 12

Number of genes in S: 10



[1] "Filtering by difference of pct of expressing cells: 0.1"


Number of genes in G1: 15

Number of genes in G2M: 9

Number of genes in S: 8



[1] "Comparison of conditions: ASD Ctrl"


G1

G2M

S



[1] "ASD markers"


Number of genes in G1: 4

Number of genes in G2M: 1

Number of genes in S: 2



[1] "Ctrl markers"


Number of genes in G1: 1

Number of genes in S: 1



[1] "Filtering genes with consistent test/control logFC"
[1] "Test condition"


Number of genes in G1: 4

Number of genes in G2M: 1

Number of genes in S: 2



[1] "Control condition"


Number of genes in G1: 1

Number of genes in S: 1



[1] "After condition comparison"


Number of genes in G1: 5

Number of genes in G2M: 1

Number of genes in S: 3



gene,p_val,avg_log2FC,pct.1,pct.2,p_val_adj,ident.1,ident.2,ncells.1,ncells.2,⋯,overexpressed.in,rank.ASD,rank.Ctrl,chromosome,in.sex_chr,logfc,filter.2of3.over.2of3,filter.2of3.over.max,filter.all.over.2of3,filter.all.over.max
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<int>,<int>,⋯,<chr>,<dbl>,<dbl>,<chr>,<lgl>,<dbl>,<lgl>,<lgl>,<lgl>,<lgl>
A1BG,0.6037602603,-6.837629e-05,0.088,0.098,1,ASD,Ctrl,3971,4634,⋯,Ctrl,Inf,37276,19,FALSE,-6.837629e-05,FALSE,FALSE,FALSE,FALSE
A1BG,0.6947624072,1.044563e-02,0.121,0.119,1,ASD,Ctrl,6033,5116,⋯,ASD,12158,Inf,19,FALSE,1.044563e-02,FALSE,FALSE,FALSE,FALSE
A1BG,0.1560858882,-1.932446e-02,0.084,0.088,1,ASD,Ctrl,7530,5289,⋯,Ctrl,Inf,10873,19,FALSE,-1.932446e-02,FALSE,FALSE,FALSE,FALSE
A1BG-AS1,0.2663185805,-6.692442e-03,0.024,0.029,1,ASD,Ctrl,6033,5116,⋯,Ctrl,Inf,17005,19,FALSE,-6.692442e-03,FALSE,FALSE,FALSE,FALSE
A1BG-AS1,0.5659545658,-3.957209e-03,0.023,0.028,1,ASD,Ctrl,3971,4634,⋯,Ctrl,Inf,19702,19,FALSE,-3.957209e-03,FALSE,FALSE,FALSE,FALSE
A1BG-AS1,0.1763785442,-5.599441e-03,0.018,0.023,1,ASD,Ctrl,7530,5289,⋯,Ctrl,Inf,17943,19,FALSE,-5.599441e-03,FALSE,FALSE,FALSE,FALSE
A1CF,0.0185157613,-1.032206e-03,0.000,0.001,1,ASD,Ctrl,6033,5116,⋯,Ctrl,Inf,27041,10,FALSE,-1.032206e-03,FALSE,FALSE,FALSE,FALSE
A1CF,0.0879682269,-1.528657e-03,0.000,0.001,1,ASD,Ctrl,3971,4634,⋯,Ctrl,Inf,24827,10,FALSE,-1.528657e-03,FALSE,FALSE,FALSE,FALSE
A1CF,0.1158903721,2.162832e-03,0.001,0.000,1,ASD,Ctrl,7530,5289,⋯,ASD,22181,Inf,10,FALSE,2.162832e-03,FALSE,FALSE,FALSE,FALSE
A2M,0.9071098234,-2.790128e-05,0.001,0.001,1,ASD,Ctrl,3971,4634,⋯,Ctrl,Inf,37700,12,FALSE,-2.790128e-05,FALSE,FALSE,FALSE,FALSE


# RG early Pseudobulk

In [174]:
filt.pb.rge <- annotate_markers(pb.res, logfc_var = 'log2FoldChange')

tryCatch(expr={all_tests(res = filt.pb.rge,
                         seu=seu.rge, 
                         logfc_var = 'log2FoldChange',
                         p_val_var="padj",
                         pct.exp.diff.thres = NULL, pct.exp.thres = NULL,
                         by.group=NULL,
                         group.var=NULL)}, 
         cond=function(cond){as.character(cond); return(cond)},
         error=function(cond){as.character(cond); return(cond)}) -> filt.pb.rge

write.csv(filt.pb.rge, '../results/selected_markers/markers_with_filters.rge_pseudobulk.csv')

[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


filter.2of3.over.2of3



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


Number of genes: 37733



[1] "Filtering by p_val: 0.05"


Number of genes: 4834



[1] "Filtering by logFC: 0.5"


Number of genes: 1326



[1] "Comparison of conditions: ASD Ctrl"


temp



[1] "ASD markers"


Number of genes in temp: 919



[1] "Ctrl markers"


Number of genes in temp: 368



[1] "Filtering genes with consistent test/control logFC"
[1] "Test condition"


Number of genes in temp: 908



[1] "Control condition"


Number of genes in temp: 355



[1] "After condition comparison"


Number of genes: 1263



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


filter.2of3.over.max



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


Number of genes: 37733



[1] "Filtering by p_val: 0.05"


Number of genes: 4834



[1] "Filtering by logFC: 0.5"


Number of genes: 1326



[1] "Comparison of conditions: ASD Ctrl"


temp



[1] "ASD markers"


Number of genes in temp: 489



[1] "Ctrl markers"


Number of genes in temp: 270



[1] "Filtering genes with consistent test/control logFC"
[1] "Test condition"


Number of genes in temp: 487



[1] "Control condition"


Number of genes in temp: 270



[1] "After condition comparison"


Number of genes: 757



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


filter.all.over.2of3



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


Number of genes: 37733



[1] "Filtering by p_val: 0.05"


Number of genes: 4834



[1] "Filtering by logFC: 0.5"


Number of genes: 1326



[1] "Comparison of conditions: ASD Ctrl"


temp



[1] "ASD markers"


Number of genes in temp: 667



[1] "Ctrl markers"


Number of genes in temp: 280



[1] "Filtering genes with consistent test/control logFC"
[1] "Test condition"


Number of genes in temp: 667



[1] "Control condition"


Number of genes in temp: 280



[1] "After condition comparison"


Number of genes: 947



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


filter.all.over.max



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


Number of genes: 37733



[1] "Filtering by p_val: 0.05"


Number of genes: 4834



[1] "Filtering by logFC: 0.5"


Number of genes: 1326



[1] "Comparison of conditions: ASD Ctrl"


temp



[1] "ASD markers"


Number of genes in temp: 172



[1] "Ctrl markers"


Number of genes in temp: 143



[1] "Filtering genes with consistent test/control logFC"
[1] "Test condition"


Number of genes in temp: 172



[1] "Control condition"


Number of genes in temp: 143



[1] "After condition comparison"


Number of genes: 315



# RG early per Phase Pseudobulk

In [175]:
filt.pb.phase <- annotate_markers(pb.phase.res, logfc_var = 'log2FoldChange')


tryCatch(expr={all_tests(filt.pb.phase, seu=seu.rge,
                         p_val_var = 'padj',
                         logfc_var = 'log2FoldChange',
                         pct.exp.diff.thres = NULL, pct.exp.thres = NULL,
                         by.group = 'Phase',
                         group.var = 'Phase')}, 
         cond=function(cond){as.character(cond); return(cond)},
         error=function(cond){as.character(cond); return(cond)}) -> filt.pb.phase


## Export

write.csv(filt.pb.phase, '../results/selected_markers/markers_with_filters.rge_perPhase_pseudobulk.csv')

[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


filter.2of3.over.2of3



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


Number of genes in G1: 37733

Number of genes in G2M: 37733

Number of genes in S: 37733



[1] "Filtering by p_val: 0.05"


Number of genes in G1: 2410

Number of genes in G2M: 1800

Number of genes in S: 1486



[1] "Filtering by logFC: 0.5"


Number of genes in G1: 1171

Number of genes in G2M: 751

Number of genes in S: 670



[1] "Comparison of conditions: ASD Ctrl"


G1

G2M

S



[1] "ASD markers"


Number of genes in G1: 803

Number of genes in G2M: 515

Number of genes in S: 443



[1] "Ctrl markers"


Number of genes in G1: 353

Number of genes in G2M: 217

Number of genes in S: 207



[1] "Filtering genes with consistent test/control logFC"
[1] "Test condition"


Number of genes in G1: 794

Number of genes in G2M: 509

Number of genes in S: 438



[1] "Control condition"


Number of genes in G1: 346

Number of genes in G2M: 212

Number of genes in S: 205



[1] "After condition comparison"


Number of genes in G1: 1140

Number of genes in G2M: 721

Number of genes in S: 643



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


filter.2of3.over.max



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


Number of genes in G1: 37733

Number of genes in G2M: 37733

Number of genes in S: 37733



[1] "Filtering by p_val: 0.05"


Number of genes in G1: 2410

Number of genes in G2M: 1800

Number of genes in S: 1486



[1] "Filtering by logFC: 0.5"


Number of genes in G1: 1171

Number of genes in G2M: 751

Number of genes in S: 670



[1] "Comparison of conditions: ASD Ctrl"


G1

G2M

S



[1] "ASD markers"


Number of genes in G1: 589

Number of genes in G2M: 269

Number of genes in S: 227



[1] "Ctrl markers"


Number of genes in G1: 261

Number of genes in G2M: 155

Number of genes in S: 151



[1] "Filtering genes with consistent test/control logFC"
[1] "Test condition"


Number of genes in G1: 589

Number of genes in G2M: 269

Number of genes in S: 225



[1] "Control condition"


Number of genes in G1: 261

Number of genes in G2M: 155

Number of genes in S: 151



[1] "After condition comparison"


Number of genes in G1: 850

Number of genes in G2M: 424

Number of genes in S: 376



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


filter.all.over.2of3



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


Number of genes in G1: 37733

Number of genes in G2M: 37733

Number of genes in S: 37733



[1] "Filtering by p_val: 0.05"


Number of genes in G1: 2410

Number of genes in G2M: 1800

Number of genes in S: 1486



[1] "Filtering by logFC: 0.5"


Number of genes in G1: 1171

Number of genes in G2M: 751

Number of genes in S: 670



[1] "Comparison of conditions: ASD Ctrl"


G1

G2M

S



[1] "ASD markers"


Number of genes in G1: 639

Number of genes in G2M: 387

Number of genes in S: 305



[1] "Ctrl markers"


Number of genes in G1: 278

Number of genes in G2M: 172

Number of genes in S: 173



[1] "Filtering genes with consistent test/control logFC"
[1] "Test condition"


Number of genes in G1: 639

Number of genes in G2M: 387

Number of genes in S: 305



[1] "Control condition"


Number of genes in G1: 278

Number of genes in G2M: 172

Number of genes in S: 173



[1] "After condition comparison"


Number of genes in G1: 917

Number of genes in G2M: 559

Number of genes in S: 478



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


filter.all.over.max



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


Number of genes in G1: 37733

Number of genes in G2M: 37733

Number of genes in S: 37733



[1] "Filtering by p_val: 0.05"


Number of genes in G1: 2410

Number of genes in G2M: 1800

Number of genes in S: 1486



[1] "Filtering by logFC: 0.5"


Number of genes in G1: 1171

Number of genes in G2M: 751

Number of genes in S: 670



[1] "Comparison of conditions: ASD Ctrl"


G1

G2M

S



[1] "ASD markers"


Number of genes in G1: 287

Number of genes in G2M: 101

Number of genes in S: 80



[1] "Ctrl markers"


Number of genes in G1: 147

Number of genes in G2M: 83

Number of genes in S: 81



[1] "Filtering genes with consistent test/control logFC"
[1] "Test condition"


Number of genes in G1: 287

Number of genes in G2M: 101

Number of genes in S: 80



[1] "Control condition"


Number of genes in G1: 147

Number of genes in G2M: 83

Number of genes in S: 81



[1] "After condition comparison"


Number of genes in G1: 434

Number of genes in G2M: 184

Number of genes in S: 161



# Add RG early phase results

## Phases from pseudobulk

In [176]:
phase.gene.bias <- filt.pb.phase%>%mutate(bias.up.down=plyr::mapvalues(sign(logfc), from=c(1,-1,0, NA), to=c('UP', 'DOWN', 'None', 'None')))

phase.gene.bias <- reshape2::dcast(phase.gene.bias, gene ~ Phase , value.var = 'bias.up.down')
colnames(phase.gene.bias)[-1] <- paste('bias', sep='.', colnames(phase.gene.bias)[-1])

phase.gene.bias <- apply(
    phase.gene.bias, 1, function(x){
        biases <- x[-1]
        x['bias.per.phase'] <- paste(paste(names(biases), biases, sep=':'), collapse='|')
        x['is.biased.in.any.phase'] <- any(biases!='None')
        x['same.bias.in.all.phases'] <- (length(unique(biases))==1)&(!'None' %in% biases)
        x['phase.bias'] <- paste(sort(unique(biases)), collapse=',')
        x
    })%>%t()

head(phase.gene.bias)

gene,bias.G1,bias.G2M,bias.S,bias.per.phase,is.biased.in.any.phase,same.bias.in.all.phases,phase.bias
A1BG,DOWN,UP,UP,bias.G1:DOWN|bias.G2M:UP|bias.S:UP,True,False,"DOWN,UP"
A1BG-AS1,DOWN,DOWN,DOWN,bias.G1:DOWN|bias.G2M:DOWN|bias.S:DOWN,True,True,DOWN
A1CF,UP,DOWN,DOWN,bias.G1:UP|bias.G2M:DOWN|bias.S:DOWN,True,False,"DOWN,UP"
A2M,DOWN,UP,UP,bias.G1:DOWN|bias.G2M:UP|bias.S:UP,True,False,"DOWN,UP"
A2M-AS1,UP,UP,UP,bias.G1:UP|bias.G2M:UP|bias.S:UP,True,True,UP
A2ML1,DOWN,DOWN,UP,bias.G1:DOWN|bias.G2M:DOWN|bias.S:UP,True,False,"DOWN,UP"


#### Results of PB RGE

In [177]:
# Annotate phase bias in result of RGE
pb_rge.with_pb_phase <- merge(
    filt.pb.rge%>% mutate(bias.up.down=plyr::mapvalues(sign(log2FoldChange), from=c(1,-1,0, NA), to=c('UP', 'DOWN', 'None', 'None'))), 
    phase.gene.bias, 
    by='gene', all=T,
)


# Annotate coincidence of bias with previous results
pb_rge.with_pb_phase$same.bias.with.all.phases <- with(pb_rge.with_pb_phase, {phase.bias == bias.up.down})
pb_rge.with_pb_phase$same.bias.with.any.phase <- apply(pb_rge.with_pb_phase, MARGIN=1, 
                                                        function(x){grepl(pattern=x['bias.up.down'], x=x['phase.bias'], fixed=T)})

table(pb_rge.with_pb_phase$same.bias.with.all.phases)
table(pb_rge.with_pb_phase$same.bias.with.any.phase)

head(pb_rge.with_pb_phase)

write.csv(pb_rge.with_pb_phase, '../results/selected_markers/markers_with_filters_and_pb_phases.pb_rge.csv')


FALSE  TRUE 
14899 22834 


 TRUE 
37733 

Unnamed: 0_level_0,gene,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,gene.is.tf,Imprinted.Status,Expressed.Allele,⋯,bias.up.down,bias.G1,bias.G2M,bias.S,bias.per.phase,is.biased.in.any.phase,same.bias.in.all.phases,phase.bias,same.bias.with.all.phases,same.bias.with.any.phase
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>,<chr>,<chr>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>
1,A1BG,32.65552211,0.1308107,0.08679529,1.50711746,0.131780553,0.280381286,False,Not in DB,Not in DB,⋯,UP,DOWN,UP,UP,bias.G1:DOWN|bias.G2M:UP|bias.S:UP,True,False,"DOWN,UP",False,True
2,A1BG-AS1,7.35297715,-0.24742138,0.16505485,-1.49902518,0.1338671,0.283618115,False,Not in DB,Not in DB,⋯,DOWN,DOWN,DOWN,DOWN,bias.G1:DOWN|bias.G2M:DOWN|bias.S:DOWN,True,True,DOWN,True,True
3,A1CF,0.20725657,0.09920926,1.22785182,0.08079905,0.935601763,,False,Not in DB,Not in DB,⋯,UP,UP,DOWN,DOWN,bias.G1:UP|bias.G2M:DOWN|bias.S:DOWN,True,False,"DOWN,UP",False,True
4,A2M,0.52103211,0.27161128,0.57447193,0.47280165,0.636354696,0.78381753,False,Not in DB,Not in DB,⋯,UP,DOWN,UP,UP,bias.G1:DOWN|bias.G2M:UP|bias.S:UP,True,False,"DOWN,UP",False,True
5,A2M-AS1,7.38696178,0.53908219,0.16937609,3.18275256,0.001458822,0.007910834,False,Not in DB,Not in DB,⋯,UP,UP,UP,UP,bias.G1:UP|bias.G2M:UP|bias.S:UP,True,True,UP,True,True
6,A2ML1,0.07158771,-0.44759408,2.93759833,-0.15236735,0.878897206,,False,Not in DB,Not in DB,⋯,DOWN,DOWN,DOWN,UP,bias.G1:DOWN|bias.G2M:DOWN|bias.S:UP,True,False,"DOWN,UP",False,True


#### Results of SC all

In [178]:
# Annotate phase bias in result of all markers
res.with_pb_phase <- merge(
    filt.res%>% mutate(bias.up.down=plyr::mapvalues(sign(avg_log2FC), from=c(1,-1,0, NA), to=c('UP', 'DOWN', 'None', 'None'))), 
    phase.gene.bias, 
    by='gene', all.x=T, all.y=F
)

# Annotate coincidence of bias with previous results
res.with_pb_phase$same.bias.with.all.phases <- with(res.with_pb_phase, {phase.bias == bias.up.down})
res.with_pb_phase$same.bias.with.any.phase <- apply(res.with_pb_phase, MARGIN=1, 
                                                     function(x){grepl(pattern=x['bias.up.down'], x=x['phase.bias'], fixed=T)})

table(res.with_pb_phase$same.bias.with.all.phases)
table(res.with_pb_phase$same.bias.with.any.phase)

head(res.with_pb_phase)

write.csv(res.with_pb_phase, '../results/selected_markers/markers_with_filters_and_pb_phases.nohighmito.csv')

The following `from` values were not present in `x`: NA




 FALSE   TRUE 
119184 107214 


 FALSE   TRUE 
 56207 170191 

Unnamed: 0_level_0,gene,p_val,avg_log2FC,pct.1,pct.2,p_val_adj,ident.1,ident.2,ncells.1,ncells.2,⋯,bias.up.down,bias.G1,bias.G2M,bias.S,bias.per.phase,is.biased.in.any.phase,same.bias.in.all.phases,phase.bias,same.bias.with.all.phases,same.bias.with.any.phase
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<int>,<int>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>
1,A1BG,0.00273237,-0.03120904,0.103,0.146,1,ASD,Ctrl,4754,2302,⋯,DOWN,DOWN,UP,UP,bias.G1:DOWN|bias.G2M:UP|bias.S:UP,True,False,"DOWN,UP",False,True
2,A1BG,0.81236035,-0.00357126,0.098,0.102,1,ASD,Ctrl,17534,15039,⋯,DOWN,DOWN,UP,UP,bias.G1:DOWN|bias.G2M:UP|bias.S:UP,True,False,"DOWN,UP",False,True
3,A1BG,0.96515642,0.01174854,0.078,0.062,1,ASD,Ctrl,256,32,⋯,UP,DOWN,UP,UP,bias.G1:DOWN|bias.G2M:UP|bias.S:UP,True,False,"DOWN,UP",False,True
4,A1BG,0.09186323,-0.0817377,0.057,0.072,1,ASD,Ctrl,2136,1326,⋯,DOWN,DOWN,UP,UP,bias.G1:DOWN|bias.G2M:UP|bias.S:UP,True,False,"DOWN,UP",False,True
5,A1BG,0.14248611,0.13532952,0.087,0.034,1,ASD,Ctrl,126,89,⋯,UP,DOWN,UP,UP,bias.G1:DOWN|bias.G2M:UP|bias.S:UP,True,False,"DOWN,UP",False,True
6,A1BG,0.23931497,-0.07603715,0.121,0.163,1,ASD,Ctrl,447,270,⋯,DOWN,DOWN,UP,UP,bias.G1:DOWN|bias.G2M:UP|bias.S:UP,True,False,"DOWN,UP",False,True


## Phases from seurat

In [179]:
scphase.gene.bias <- filt.res.rge%>%mutate(bias.up.down=plyr::mapvalues(sign(avg_log2FC), from=c(1,-1,0, NA), to=c('UP', 'DOWN', 'None', 'None')))

scphase.gene.bias <- reshape2::dcast(scphase.gene.bias, gene ~ Phase , value.var = 'bias.up.down')
colnames(scphase.gene.bias)[-1] <- paste('bias', sep='.', colnames(scphase.gene.bias)[-1])

scphase.gene.bias <- apply(
    scphase.gene.bias, 1, function(x){
        biases <- x[-1]
        x['bias.per.phase'] <- paste(paste(names(biases), biases, sep=':'), collapse='|')
        x['is.biased.in.any.phase'] <- any(biases!='None')
        x['same.bias.in.all.phases'] <- (length(unique(biases))==1)&(!'None' %in% biases)
        x['phase.bias'] <- paste(sort(unique(biases)), collapse=',')
        x
    })%>%t()

head(scphase.gene.bias)

The following `from` values were not present in `x`: NA



gene,bias.G1,bias.G2M,bias.S,bias.per.phase,is.biased.in.any.phase,same.bias.in.all.phases,phase.bias
A1BG,DOWN,UP,DOWN,bias.G1:DOWN|bias.G2M:UP|bias.S:DOWN,True,False,"DOWN,UP"
A1BG-AS1,DOWN,DOWN,DOWN,bias.G1:DOWN|bias.G2M:DOWN|bias.S:DOWN,True,True,DOWN
A1CF,UP,DOWN,DOWN,bias.G1:UP|bias.G2M:DOWN|bias.S:DOWN,True,False,"DOWN,UP"
A2M,DOWN,DOWN,DOWN,bias.G1:DOWN|bias.G2M:DOWN|bias.S:DOWN,True,True,DOWN
A2M-AS1,UP,UP,UP,bias.G1:UP|bias.G2M:UP|bias.S:UP,True,True,UP
A2ML1,DOWN,DOWN,DOWN,bias.G1:DOWN|bias.G2M:DOWN|bias.S:DOWN,True,True,DOWN


#### Results of SC all

In [180]:
# Annotate phase bias in result of all markers
res.with_sc_phase <- merge(
    filt.res%>% mutate(bias.up.down=plyr::mapvalues(sign(avg_log2FC), from=c(1,-1,0, NA), to=c('UP', 'DOWN', 'None', 'None'))), 
    scphase.gene.bias, 
    by='gene', all.x=T, all.y=F
)

# Annotate coincidence of bias with previous results
res.with_sc_phase$phase.bias.coincident.all <- with(res.with_sc_phase, {phase.bias == bias.up.down})
res.with_sc_phase$phase.bias.coincident.any <- apply(res.with_sc_phase, MARGIN=1, 
                                                     function(x){grepl(pattern=x['bias.up.down'], x=x['phase.bias'], fixed=T)})

table(res.with_sc_phase$phase.bias.coincident.all)
table(res.with_sc_phase$phase.bias.coincident.any)

head(res.with_sc_phase)

write.csv(res.with_sc_phase, '../results/selected_markers/markers_with_filters_and_sc_phases.nohighmito.csv')

The following `from` values were not present in `x`: NA




 FALSE   TRUE 
122454 103944 


 FALSE   TRUE 
 43223 183175 

Unnamed: 0_level_0,gene,p_val,avg_log2FC,pct.1,pct.2,p_val_adj,ident.1,ident.2,ncells.1,ncells.2,⋯,bias.up.down,bias.G1,bias.G2M,bias.S,bias.per.phase,is.biased.in.any.phase,same.bias.in.all.phases,phase.bias,phase.bias.coincident.all,phase.bias.coincident.any
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<int>,<int>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>
1,A1BG,0.00273237,-0.03120904,0.103,0.146,1,ASD,Ctrl,4754,2302,⋯,DOWN,DOWN,UP,DOWN,bias.G1:DOWN|bias.G2M:UP|bias.S:DOWN,True,False,"DOWN,UP",False,True
2,A1BG,0.81236035,-0.00357126,0.098,0.102,1,ASD,Ctrl,17534,15039,⋯,DOWN,DOWN,UP,DOWN,bias.G1:DOWN|bias.G2M:UP|bias.S:DOWN,True,False,"DOWN,UP",False,True
3,A1BG,0.96515642,0.01174854,0.078,0.062,1,ASD,Ctrl,256,32,⋯,UP,DOWN,UP,DOWN,bias.G1:DOWN|bias.G2M:UP|bias.S:DOWN,True,False,"DOWN,UP",False,True
4,A1BG,0.09186323,-0.0817377,0.057,0.072,1,ASD,Ctrl,2136,1326,⋯,DOWN,DOWN,UP,DOWN,bias.G1:DOWN|bias.G2M:UP|bias.S:DOWN,True,False,"DOWN,UP",False,True
5,A1BG,0.14248611,0.13532952,0.087,0.034,1,ASD,Ctrl,126,89,⋯,UP,DOWN,UP,DOWN,bias.G1:DOWN|bias.G2M:UP|bias.S:DOWN,True,False,"DOWN,UP",False,True
6,A1BG,0.23931497,-0.07603715,0.121,0.163,1,ASD,Ctrl,447,270,⋯,DOWN,DOWN,UP,DOWN,bias.G1:DOWN|bias.G2M:UP|bias.S:DOWN,True,False,"DOWN,UP",False,True


## Export

In [181]:
grep(colnames(res.with_pb_phase), pattern='filter.', fixed=T, value=T)
grep(colnames(pb_rge.with_pb_phase), pattern='filter.', fixed=T, value=T)
grep(colnames(filt.pb.phase), pattern='filter.', fixed=T, value=T)

In [182]:
filters <- c('filter.2of3.over.2of3',
'filter.2of3.over.max',
'filter.all.over.2of3',
'filter.all.over.max')

In [183]:
for (filt in c(NA, filters)){
    

    message(filt)
    if (is.na(filt)){
        filt <- 'None'
        dfs <- list(
            # res.with_sc_phase or res.with_pb_phase
            Seurat.All = res.with_pb_phase,
            Seurat.All.TFs = res.with_pb_phase[res.with_pb_phase[,'gene.is.tf'] ,],
            DESeq2.RGe = pb_rge.with_pb_phase,
            DESeq2.RGe.TFs = pb_rge.with_pb_phase[pb_rge.with_pb_phase[,'gene.is.tf'] ,],
            DESeq2.RGe.PerPhase=filt.pb.phase,
            DESeq2.RGe.PerPhase.TFs=filt.pb.phase[filt.pb.phase[,'gene.is.tf'] ,] 
        )
    } else {
        dfs <- list(
            # res.with_sc_phase or res.with_pb_phase
            Seurat.All.Filt = res.with_pb_phase[res.with_pb_phase[,filt],],
            Seurat.All.TFs.Filt = res.with_pb_phase[res.with_pb_phase[,filt] & res.with_pb_phase[,'gene.is.tf'] ,],
            DESeq2.RGe.Filt = pb_rge.with_pb_phase[pb_rge.with_pb_phase[,filt],],
            DESeq2.RGe.TFs.Filt = pb_rge.with_pb_phase[pb_rge.with_pb_phase[,filt] & pb_rge.with_pb_phase[,'gene.is.tf'] ,],
            DESeq2.RGe.PerPhase.Filt=filt.pb.phase[filt.pb.phase[,filt],],
            DESeq2.RGe.PerPhase.TFs.Filt=filt.pb.phase[filt.pb.phase[,filt] & filt.pb.phase[,'gene.is.tf'] ,] 
        )
        
    }
    
    openxlsx::write.xlsx(dfs, file=glue::glue(
        '../results/selected_markers/ASD_vs_Ctrl_markers.Annotated.Filter_{filt}.xlsx'))
}

NA

filter.2of3.over.2of3

filter.2of3.over.max

filter.all.over.2of3

filter.all.over.max



### Available filters

In [184]:
exp.filt.opts <- list(
    'filter.2of3.over.2of3' = 'At least 2/3 of one group has higher expression than 2/3 of the opposit group',
    'filter.2of3.over.max' = 'At least 2/3 of one group has higher expression than any of the opposit group',
    'filter.all.over.2of3' = 'All of one group has higher expression than 2/3 of the opposit group',
    'filter.all.over.max' = 'All of one group has higher expression than any of the opposit group'
    # 'NA' = 'Not filtered by number of overexpressing samples'
    )

phase.filt.opts <- list(
    # 'same.bias.with.all.phases'='All CC phases in RGe show the same bias',
    # 'same.bias.with.any.phase'='At least 1 CC phase in RGe show the same bias',
    'NA'='Not filtered by CC phase bias'
)

pval.filt.opts <- list(
    'p_val_adj'='Adjusted p-val < 0.05'#,
    # 'NA'='Not filtered by p-value'
)


tf.filt.opts <- list('gene.is.tf'='Only TFs', 'NA'='Not only TFs')

filt.grid.desc <- expand.grid(
    exp.filt=exp.filt.opts,
    phase.filt=phase.filt.opts,
    pval.filt=pval.filt.opts,
    tf.filt=tf.filt.opts)

filt.grid <- expand.grid(
    exp.filt=names(exp.filt.opts),
    phase.filt=names(phase.filt.opts),
    pval.filt=names(pval.filt.opts),
    tf.filt=names(tf.filt.opts))

In [185]:
save(list = c('exp.filt.opts', 'phase.filt.opts', 'pval.filt.opts','tf.filt.opts', 'filt.grid', 'filt.grid.desc'), file = '../data/filter.list.rda')

# Per donor

## Functions to build intersections

In [186]:
get_presence_table <- function(sets){
    
    elements <- unlist(sets)%>%unique()
    int.tbl <- lapply(sets, function(x){elements%in%x})%>%do.call(what='cbind')%>%as.data.frame()
    rownames(int.tbl) <- elements
    return(int.tbl)
}

In [187]:
get_intersections <- function(bool_tbl, return.as='list'){
    
    int.stat <- apply(bool_tbl, 1, function(x){colnames(bool_tbl)[x]%>% paste(collapse='&')})
    if (return.as == 'data.frame'){
        return(cbind(bool_tbl, int.stat)%>%as.data.frame())
    } else if (return.as=='table') {
        return(table(int.stat))
    } else if (return.as == 'list'){
        int.sets <- split(rownames(bool_tbl), int.stat)
        return(int.sets)
    }
}

## filters

In [188]:
## Tests that we want to run

all_tests.donors <- function(res, ...){
    
    fixed.args <- list(
        group.var= 'anno_cluster_fct',
        
        p_val_thres = 0.05,
        pct.exp.thres = .1,
        pct.exp.diff.thres = 0.05,
        topby='logfc', 
        logfc_thres = .4,
        
        compare.conditions = T,
        id.sample = 'cell_origin',
        control.cond = 'Ctrl',
        id_conditions=id_conditions,
        test.cond = 'ASD',
        comp.topn=Inf, 
        comp.topby='logfc',
        consistent.logfc = T,

        summary.function = max, fr.cond.over.control = 3/3,
        
        return_full = T
    )
    
    iter.args <- list(
        filter.over.max = list(p_val_var = 'p_val_adj', topn=Inf)
             )

    provided.args <- list(...)
    
    
    for (filt in (names(iter.args))){
        for(i in 1:3){print(paste(rep('#', 60), collapse=''))}
        message(filt)
        for(i in 1:3){print(paste(rep('#', 60), collapse=''))}

        iargs <- c(iter.args[[filt]], fixed.args)
        for (i in names(provided.args)){
            iargs[i] <- provided.args[i]
        }
        
        # return(iargs)
        # return(paste(paste(names(iargs), as.character(iargs), sep="="), collapse=", "))
        rlang::exec(.fn = filter.marker.genes, res=res, !!!iargs) -> res
        colnames(res) <- ifelse(colnames(res)=='filter.ok', filt, colnames(res))
        
    }

    return(res)
}


In [189]:
'dseus'%in%ls()

In [190]:
if (!'dseus'%in%ls()) {dseus <- list()}
get_donor_seu <- function(seu, donor){
    dseu <- subset(seu, (cell_origin == donor) | (condition=='Ctrl'))
    dseu <- ScaleData(dseu, verbose=T)
    return(dseu)
}

In [191]:
res.donors.annot <- annotate_markers(res.donors)

lapply(
    split(res.donors.annot, res.donors.annot$donor), function(filt.res){

        donor <- unique(as.character(filt.res$donor))
        if (donor %in% names(dseus)){
            dseu <- dseus[[donor]]
        } else{
            dseu <- get_donor_seu(seu, donor)
            dseus[[donor]] <- dseu
        }
        message(donor)
        tryCatch(expr={all_tests.donors(seu = dseu, filt.res,
                                        by.group = 'anno_cluster_fct')}, 
                 cond=function(cond){print(as.character(cond)); return(cond)},
                 error=function(cond){print(as.character(cond)); return(cond)}) -> filt.res
    }) %>% do.call(what='rbind') %>% as.data.frame() -> filt.res.donors

Centering and scaling data matrix

D8_ASD_375



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


filter.over.max



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


Number of genes in FGF17-like: 37733

Number of genes in Mes prog: 37733

Number of genes in Neral crest/Mes: 37733

Number of genes in Neuron: 37733

Number of genes in RG early: 37733

Number of genes in RG late: 37733



[1] "Filtering by p_val: 0.05"


Number of genes in FGF17-like: 1

Number of genes in Mes prog: 265

Number of genes in Neuron: 5

Number of genes in RG early: 1461

Number of genes in RG late: 74



[1] "Filtering by logFC: 0.4"


Number of genes in FGF17-like: 1

Number of genes in Mes prog: 26

Number of genes in Neuron: 5

Number of genes in RG early: 27

Number of genes in RG late: 34



[1] "Filtering by pct of expressing cells: 0.1"


Number of genes in FGF17-like: 1

Number of genes in Mes prog: 26

Number of genes in Neuron: 5

Number of genes in RG early: 27

Number of genes in RG late: 34



[1] "Filtering by difference of pct of expressing cells: 0.05"


Number of genes in FGF17-like: 1

Number of genes in Mes prog: 23

Number of genes in Neuron: 4

Number of genes in RG early: 25

Number of genes in RG late: 34



[1] "Comparison of conditions: ASD Ctrl"


FGF17-like

“Scaling data with a low number of groups may produce misleading results”
Mes prog

“Scaling data with a low number of groups may produce misleading results”
RG early

“Scaling data with a low number of groups may produce misleading results”
RG late

“Scaling data with a low number of groups may produce misleading results”
Neuron

“Scaling data with a low number of groups may produce misleading results”
The following `from` values were not present in `x`: D8_ASD_384, D8_ASD_494

The following `from` values were not present in `x`: D8_ASD_384, D8_ASD_494

The following `from` values were not present in `x`: D8_ASD_384, D8_ASD_494

The following `from` values were not present in `x`: D8_ASD_384, D8_ASD_494

The following `from` values were not present in `x`: D8_ASD_384, D8_ASD_494

The following `from` values were not present in `x`: D8_ASD_384, D8_ASD_494

The following `from` values were not present in `x`: D8_ASD_384, D8_ASD_494

The following `from` values were not prese

[1] "ASD markers"


Number of genes in Mes prog: 12

Number of genes in Neuron: 2

Number of genes in RG early: 11

Number of genes in RG late: 16



[1] "Ctrl markers"


Number of genes in FGF17-like: 1

Number of genes in Mes prog: 5

Number of genes in Neuron: 2

Number of genes in RG early: 8

Number of genes in RG late: 2



[1] "Filtering genes with consistent test/control logFC"
[1] "Test condition"


Number of genes in Mes prog: 12

Number of genes in Neuron: 2

Number of genes in RG early: 11

Number of genes in RG late: 16



[1] "Control condition"


Number of genes in FGF17-like: 1

Number of genes in Mes prog: 5

Number of genes in Neuron: 2

Number of genes in RG early: 8

Number of genes in RG late: 2



[1] "After condition comparison"


Number of genes in FGF17-like: 1

Number of genes in Mes prog: 17

Number of genes in Neuron: 4

Number of genes in RG early: 19

Number of genes in RG late: 18

Centering and scaling data matrix

D8_ASD_384



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


filter.over.max



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


Number of genes in FGF17-like: 37733

Number of genes in Mes prog: 37733

Number of genes in Neral crest/Mes: 37733

Number of genes in Neuron: 37733

Number of genes in RG early: 37733

Number of genes in RG late: 37733



[1] "Filtering by p_val: 0.05"


Number of genes in FGF17-like: 7

Number of genes in Mes prog: 1247

Number of genes in Neuron: 1

Number of genes in RG early: 3329

Number of genes in RG late: 335



[1] "Filtering by logFC: 0.4"


Number of genes in FGF17-like: 7

Number of genes in Mes prog: 85

Number of genes in Neuron: 1

Number of genes in RG early: 115

Number of genes in RG late: 124



[1] "Filtering by pct of expressing cells: 0.1"


Number of genes in FGF17-like: 7

Number of genes in Mes prog: 85

Number of genes in Neuron: 1

Number of genes in RG early: 115

Number of genes in RG late: 123



[1] "Filtering by difference of pct of expressing cells: 0.05"


Number of genes in FGF17-like: 7

Number of genes in Mes prog: 74

Number of genes in RG early: 106

Number of genes in RG late: 119



[1] "Comparison of conditions: ASD Ctrl"


FGF17-like

“Scaling data with a low number of groups may produce misleading results”
Mes prog

“Scaling data with a low number of groups may produce misleading results”
RG early

“Scaling data with a low number of groups may produce misleading results”
RG late

“Scaling data with a low number of groups may produce misleading results”
The following `from` values were not present in `x`: D8_ASD_375, D8_ASD_494

The following `from` values were not present in `x`: D8_ASD_375, D8_ASD_494

The following `from` values were not present in `x`: D8_ASD_375, D8_ASD_494

The following `from` values were not present in `x`: D8_ASD_375, D8_ASD_494

The following `from` values were not present in `x`: D8_ASD_375, D8_ASD_494

The following `from` values were not present in `x`: D8_ASD_375, D8_ASD_494

The following `from` values were not present in `x`: D8_ASD_375, D8_ASD_494

The following `from` values were not present in `x`: D8_ASD_375, D8_ASD_494



[1] "ASD markers"


Number of genes in FGF17-like: 6

Number of genes in Mes prog: 36

Number of genes in RG early: 60

Number of genes in RG late: 65



[1] "Ctrl markers"


Number of genes in FGF17-like: 1

Number of genes in Mes prog: 26

Number of genes in RG early: 39

Number of genes in RG late: 27



[1] "Filtering genes with consistent test/control logFC"
[1] "Test condition"


Number of genes in FGF17-like: 6

Number of genes in Mes prog: 36

Number of genes in RG early: 60

Number of genes in RG late: 65



[1] "Control condition"


Number of genes in FGF17-like: 1

Number of genes in Mes prog: 26

Number of genes in RG early: 39

Number of genes in RG late: 27



[1] "After condition comparison"


Number of genes in FGF17-like: 7

Number of genes in Mes prog: 62

Number of genes in RG early: 99

Number of genes in RG late: 92

Centering and scaling data matrix

D8_ASD_494



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


filter.over.max



[1] "############################################################"
[1] "############################################################"
[1] "############################################################"


Number of genes in FGF17-like: 37733

Number of genes in Mes prog: 37733

Number of genes in Neral crest/Mes: 37733

Number of genes in Neuron: 37733

Number of genes in RG early: 37733

Number of genes in RG late: 37733



[1] "Filtering by p_val: 0.05"


Number of genes in FGF17-like: 3

Number of genes in Mes prog: 2458

Number of genes in Neral crest/Mes: 63

Number of genes in Neuron: 104

Number of genes in RG early: 2833

Number of genes in RG late: 306



[1] "Filtering by logFC: 0.4"


Number of genes in FGF17-like: 3

Number of genes in Mes prog: 138

Number of genes in Neral crest/Mes: 49

Number of genes in Neuron: 69

Number of genes in RG early: 59

Number of genes in RG late: 86



[1] "Filtering by pct of expressing cells: 0.1"


Number of genes in FGF17-like: 3

Number of genes in Mes prog: 138

Number of genes in Neral crest/Mes: 49

Number of genes in Neuron: 69

Number of genes in RG early: 59

Number of genes in RG late: 86



[1] "Filtering by difference of pct of expressing cells: 0.05"


Number of genes in Mes prog: 110

Number of genes in Neral crest/Mes: 20

Number of genes in Neuron: 54

Number of genes in RG early: 40

Number of genes in RG late: 73



[1] "Comparison of conditions: ASD Ctrl"


Mes prog

“Scaling data with a low number of groups may produce misleading results”
RG early

“Scaling data with a low number of groups may produce misleading results”
RG late

“Scaling data with a low number of groups may produce misleading results”
Neral crest/Mes

“Scaling data with a low number of groups may produce misleading results”
Neuron

“Scaling data with a low number of groups may produce misleading results”
The following `from` values were not present in `x`: D8_ASD_375, D8_ASD_384

The following `from` values were not present in `x`: D8_ASD_375, D8_ASD_384

The following `from` values were not present in `x`: D8_ASD_375, D8_ASD_384

The following `from` values were not present in `x`: D8_Ctrl_317, D8_ASD_375, D8_ASD_384

The following `from` values were not present in `x`: D8_ASD_375, D8_ASD_384

The following `from` values were not present in `x`: D8_ASD_375, D8_ASD_384

The following `from` values were not present in `x`: D8_ASD_375, D8_ASD_384

The following `from` val

[1] "ASD markers"


Number of genes in Mes prog: 32

Number of genes in Neral crest/Mes: 5

Number of genes in Neuron: 12

Number of genes in RG early: 17

Number of genes in RG late: 41



[1] "Ctrl markers"


Number of genes in Mes prog: 68

Number of genes in Neral crest/Mes: 12

Number of genes in Neuron: 31

Number of genes in RG early: 16

Number of genes in RG late: 13



[1] "Filtering genes with consistent test/control logFC"
[1] "Test condition"


Number of genes in Mes prog: 32

Number of genes in Neral crest/Mes: 5

Number of genes in Neuron: 12

Number of genes in RG early: 17

Number of genes in RG late: 41



[1] "Control condition"


Number of genes in Mes prog: 68

Number of genes in Neral crest/Mes: 12

Number of genes in Neuron: 31

Number of genes in RG early: 16

Number of genes in RG late: 13



[1] "After condition comparison"


Number of genes in Mes prog: 100

Number of genes in Neral crest/Mes: 17

Number of genes in Neuron: 43

Number of genes in RG early: 33

Number of genes in RG late: 54



## cross results

In [192]:
table(filt.res.donors$filter.over.max)


 FALSE   TRUE 
678628    566 

In [193]:
donors <- samples[samples%in%unique(filt.res.donors$donor)]
donors

donors.ints <- c(
    paste(donors, collapse='&'),
    apply(subset(expand.grid(d1=donors, d2=donors), d1!=d2),1, sort)%>%t()%>%unique()%>% apply(1, paste, collapse='&')%>%unname(),
    donors)
donors.ints

In [196]:
crossed.res <- filt.res.donors

filt <- 'filter.over.max'
int.levels <- c( 'D8_ASD_375&D8_ASD_384&D8_ASD_494', 'D8_ASD_375&D8_ASD_384', 'D8_ASD_375&D8_ASD_494', 'D8_ASD_384&D8_ASD_494', 'D8_ASD_375', 'D8_ASD_384', 'D8_ASD_494')

filt.dat <- filt.res.donors[filt.res.donors[, filt],] %>% mutate(donor=factor(as.character(donor), donors))
bias_ct_donor.markers <- split(filt.dat,
                               apply(filt.dat[c('overexpressed.in', 'anno_cluster_fct')], 1, paste, collapse='...'))

all_intersections <- data.frame()
for (g in names(bias_ct_donor.markers)){

    donor.marker.split <- split(bias_ct_donor.markers[[g]]$gene, bias_ct_donor.markers[[g]]$donor) 
    intersections <-get_presence_table(donor.marker.split)%>% get_intersections(return.as='data.frame')
    intersections$gene <- rownames(intersections)
    strsplit(g, split='...', fixed=T)[[1]] -> bias.ct
    intersections <- intersections %>% mutate(overexpressed.in=bias.ct[1], anno_cluster_fct=bias.ct[2])
    colnames(intersections)[colnames(intersections)=='int.stat'] <- paste0(filt, '.crossed')
    all_intersections <- rbind(all_intersections, intersections) %>% as.data.frame
}

print(dim(all_intersections))
crossed.res <- merge(crossed.res, all_intersections[,-c(1:3)], by=c('gene', 'anno_cluster_fct', 'overexpressed.in'), all.x=T) 

crossed.res %>% 
    mutate(matching.donors = factor(filter.over.max.crossed, int.levels)) %>%
    mutate(filter.over.max.crossed=NULL) %>%
    select(matching.donors, donor, everything()) %>% 
    group_by(anno_cluster_fct, matching.donors) %>% arrange(gene, donor) %>% ungroup() %>%
    arrange(anno_cluster_fct, matching.donors) %>% 
    as.data.frame() -> crossed.res

subset(crossed.res, !is.na(matching.donors))

[1] 507   7


Unnamed: 0_level_0,matching.donors,donor,gene,anno_cluster_fct,overexpressed.in,p_val,avg_log2FC,pct.1,pct.2,p_val_adj,⋯,Imprinted.Status,Expressed.Allele,gene_biotype,protein_coding,rank.ASD,rank.Ctrl,chromosome,in.sex_chr,logfc,filter.over.max
Unnamed: 0_level_1,<fct>,<fct>,<chr>,<fct>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<chr>,<chr>,<chr>,<lgl>,<dbl>,<dbl>,<chr>,<lgl>,<dbl>,<lgl>
1,D8_ASD_375,D8_ASD_375,NNAT,FGF17-like,Ctrl,9.786744e-12,-1.88104826,0.000,0.750,3.692832e-07,⋯,Imprinted,Paternal,protein_coding,TRUE,Inf,9,20,FALSE,-1.88104826,TRUE
2,D8_ASD_384,D8_ASD_375,CSRP2,FGF17-like,ASD,5.039917e-02,0.63652145,0.839,0.781,1.000000e+00,⋯,Not in DB,Not in DB,protein_coding,TRUE,1130,Inf,12,FALSE,0.63652145,FALSE
3,D8_ASD_384,D8_ASD_384,CSRP2,FGF17-like,ASD,5.248847e-07,0.94249968,0.966,0.781,1.980548e-02,⋯,Not in DB,Not in DB,protein_coding,TRUE,272,Inf,12,FALSE,0.94249968,TRUE
4,D8_ASD_384,D8_ASD_494,CSRP2,FGF17-like,ASD,5.538582e-01,0.01286255,0.808,0.781,1.000000e+00,⋯,Not in DB,Not in DB,protein_coding,TRUE,95527,Inf,12,FALSE,0.01286255,FALSE
5,D8_ASD_384,D8_ASD_375,FGF8,FGF17-like,ASD,2.668402e-05,0.92950781,0.355,0.000,1.000000e+00,⋯,Not in DB,Not in DB,protein_coding,TRUE,292,Inf,10,FALSE,0.92950781,FALSE
6,D8_ASD_384,D8_ASD_384,FGF8,FGF17-like,ASD,1.399141e-08,1.54070324,0.558,0.000,5.279377e-04,⋯,Not in DB,Not in DB,protein_coding,TRUE,34,Inf,10,FALSE,1.54070324,TRUE
7,D8_ASD_384,D8_ASD_494,FGF8,FGF17-like,ASD,3.796737e-02,0.11542748,0.051,0.000,1.000000e+00,⋯,Not in DB,Not in DB,protein_coding,TRUE,28046,Inf,10,FALSE,0.11542748,FALSE
8,D8_ASD_384,D8_ASD_375,FZD5,FGF17-like,ASD,4.050033e-03,0.58516809,0.419,0.125,1.000000e+00,⋯,Not in DB,Not in DB,protein_coding,TRUE,1446,Inf,2,FALSE,0.58516809,FALSE
9,D8_ASD_384,D8_ASD_384,FZD5,FGF17-like,ASD,2.885716e-10,1.54827818,0.694,0.125,1.088867e-05,⋯,Not in DB,Not in DB,protein_coding,TRUE,32,Inf,2,FALSE,1.54827818,TRUE
10,D8_ASD_384,D8_ASD_375,SFRP1,FGF17-like,ASD,7.200298e-02,0.37547466,0.871,0.719,1.000000e+00,⋯,Not in DB,Not in DB,protein_coding,TRUE,4780,Inf,8,FALSE,0.37547466,FALSE


In [197]:
write.csv(crossed.res, '../results/selected_markers/markers_per_donor.nohighmito.with_filters_and_crossed.csv')