In [None]:
R.home()

In [None]:
baizer::pkglib(baizer, tidyverse, jsonlite, yaml, ComplexHeatmap)

In [None]:
sample <- snakemake@wildcards[['sample']]

# replace default configs with sample configs
config <- replace_item(snakemake@config, snakemake@config[[sample]])

In [None]:
Lmode <- config[['flt_mode']]
mRNA_gene_flt <- config[['mRNA_gene_flt']]
mRNA_umi_flt <- config[['mRNA_umi_flt']]
mRNA_mt_percent_flt <- config[['mRNA_mt_percent_flt']]
FB_umi_flt <- config[['FB_umi_flt']]
VDJB_umi_flt <- config[['VDJB_umi_flt']]
VDJT_umi_flt <- config[['VDJT_umi_flt']]

In [None]:
Pstat <- snakemake@output[['filter_stat']]
Pstat_dir <- snakemake@params[['stat_dir']]
Pfilter_dir <- snakemake@output[['filter_dir']]
dir.create(Pfilter_dir, recursive = TRUE)

In [None]:
subdirs <- dir(Pstat_dir)

In [None]:
Lstat <- list()
Lflt <- list()

In [None]:
# mRNA
if ('mRNA' %in% subdirs) {
    mRNA <- read_csv(str_glue('{Pstat_dir}/mRNA/mRNA.csv'))

    mRNA <- mRNA %>% 
        mutate(across(c(nFeature_RNA, nCount_RNA, mt_percent), ~ifelse(is.na(.x), 0, .x))) %>%
        mutate(
            mRNA_gene_keep = nFeature_RNA > mRNA_gene_flt,
            mRNA_umi_keep = nCount_RNA > mRNA_umi_flt,
            mRNA_mt_percent_keep = mt_percent < mRNA_mt_percent_flt,
            mRNA_keep = mRNA_gene_keep * mRNA_umi_keep * mRNA_mt_percent_keep
        ) %>% mutate(across(matches('_keep$'), as.numeric))
    mRNA_flt <- mRNA %>% filter(mRNA_keep==TRUE)
    write_excel_csv(mRNA_flt, str_glue('{Pfilter_dir}/mRNA_flt.csv'))
    
    Lflt$mRNA <- mRNA
    Lstat[['mRNA']][['raw']] <- nrow(mRNA)
    Lstat[['mRNA']][['flt']] <- nrow(mRNA_flt)
}

In [None]:
# FB
if ('FB' %in% subdirs) {
    FB <- read_csv(str_glue('{Pstat_dir}/FB/FB.csv'))

    FB <- FB %>%
        mutate(
            FB_NC_keep = NC_load == FALSE,
            FB_umi_keep = sum_BD > FB_umi_flt,
            FB_keep = FB_NC_keep * FB_umi_keep
        ) %>% mutate(across(matches('_keep$'), as.numeric))
    
    FB_flt <- FB %>% filter(FB_keep==TRUE)
    write_excel_csv(FB_flt, str_glue('{Pfilter_dir}/FB_flt.csv'))
    
    Lflt$FB <- FB
    
    Lstat[['FB']][['raw']] <- nrow(FB)
    Lstat[['FB']][['flt']] <- nrow(FB_flt)
}

In [None]:
# VDJB
if ('VDJB' %in% subdirs) {
    VDJB <- read_csv(str_glue('{Pstat_dir}/VDJB/VDJB.csv'))
    
    VDJB <- VDJB %>% 
        mutate(across(c(umis_H, umis_L), ~ifelse(is.na(.x), 0, .x))) %>%
        mutate(
            VDJB_productive_cellranger_keep = productive_cellranger,
            VDJB_unique_keep = unique, 
            VDJB_umi_H_keep = umis_H > VDJB_umi_flt,
            VDJB_umi_L_keep = umis_L > VDJB_umi_flt,
            VDJB_keep = VDJB_productive_cellranger_keep * VDJB_unique_keep * VDJB_umi_H_keep * VDJB_umi_L_keep
        ) %>% mutate(across(matches('_keep$'), as.numeric))
    
    VDJB_flt <- VDJB %>% filter(VDJB_keep==TRUE)
    write_excel_csv(VDJB_flt, str_glue('{Pfilter_dir}/VDJB_flt.csv'))
    
    Lflt$VDJB <- VDJB
    
    Lstat[['VDJB']][['raw']] <- nrow(VDJB)
    Lstat[['VDJB']][['flt']] <- nrow(VDJB_flt)
}

In [None]:
# VDJT
if ('VDJT' %in% subdirs) {
    VDJT <- read_csv(str_glue('{Pstat_dir}/VDJT/VDJT.csv'))
    
    VDJT <- VDJT %>% 
        mutate(across(c(umis_H, umis_L), ~ifelse(is.na(.x), 0, .x))) %>%
        mutate(
            VDJT_productive_cellranger_keep = productive_cellranger,
            VDJT_unique_keep = unique, 
            VDJT_umi_H_keep = umis_H > VDJT_umi_flt,
            VDJT_umi_L_keep = umis_L > VDJT_umi_flt,
            VDJT_keep = VDJT_productive_cellranger_keep * VDJT_unique_keep * VDJT_umi_H_keep * VDJT_umi_L_keep
        ) %>% mutate(across(matches('_keep$'), as.numeric))
    
    VDJT_flt <- VDJT %>% filter(VDJT_keep==TRUE)
    write_excel_csv(VDJT_flt, str_glue('{Pfilter_dir}/VDJT_flt.csv'))
    
    Lflt$VDJT <- VDJT
    
    Lstat[['VDJT']][['raw']] <- nrow(VDJT)
    Lstat[['VDJT']][['flt']] <- nrow(VDJT_flt)
}

In [None]:
# output

for (mode_name in names(Lmode)) {
    
    mode <- intersect(Lmode[[mode_name]], subdirs)
    if (length(mode) < 2) {
        next
    }
    
    # common unflt
    TBunflt <- Lflt[mode] %>% reduce(inner_join, by='cell')
    Vkeep <- TBunflt %>% 
        select(str_c(mode, '_keep')) %>%
        apply(1, all) %>%
        unname
    TBunflt$keep <- Vkeep

    # unflt
    TBunflt %>%
        write_excel_csv(str_glue('{Pfilter_dir}/{mode_name}_unflt.csv'))

    # keep matrix
    TBkeep <- TBunflt %>% select(cell, matches('_keep$'))
    TBkeep %>% write_excel_csv(str_glue('{Pfilter_dir}/{mode_name}_keep.csv'))

    # flt
    TBflt <- TBunflt %>% filter(keep==TRUE) %>% select(!matches('_keep$'))

    
    TBflt %>% write_excel_csv(str_glue('{Pfilter_dir}/{mode_name}_flt.csv'))
    
    Lstat[[mode_name]][['raw']] <- nrow(TBunflt)
    Lstat[[mode_name]][['flt']] <- nrow(TBflt)
    
    
    # copy
    file.copy(str_glue('{Pfilter_dir}/{mode_name}_keep.csv'),
              str_glue('{Pstat_dir}/{mode_name}_keep.csv'))
    file.copy(str_glue('{Pfilter_dir}/{mode_name}_flt.csv'), 
              str_glue('{Pstat_dir}/{mode_name}_flt.csv'))
    
}

In [None]:
# write
write_yaml(Lstat, file=Pstat)
file.copy(Pstat, str_c(Pstat_dir, '/', basename(Pstat)), overwrite=TRUE)