In [None]:
R.home()

In [None]:
baizer::pkglib(Seurat, tidyverse, baizer, yaml)

In [None]:
sample <- snakemake@wildcards[['sample']]

# replace default configs with sample configs
config <- replace_item(snakemake@config, snakemake@config[[sample]])

In [None]:
# Seurat readin
if (config[[sample]][['mRNA']]) {
    Lcount <- Read10X(
        data.dir = str_c(snakemake@input[['count_dir']], str_glue(config[['count_FB_dir']]), sep='/')
    )
    Mcount <- Lcount[[config[['FB_type']]]]
    Lcount_raw <- Read10X(
        data.dir = str_c(snakemake@input[['count_dir']], str_glue(config[['count_FB_raw_dir']]), sep='/')
    )
    Mcount_raw <- Lcount[[config[['FB_type']]]]
} else {
    Mcount <- Read10X(
        data.dir = str_c(snakemake@input[['count_dir']], str_glue(config[['count_FB_dir']]), sep='/')
    )
    Mcount_raw <- Read10X(
        data.dir = str_c(snakemake@input[['count_dir']], str_glue(config[['count_FB_raw_dir']]), sep='/')
    )
}
TBfb <- as.matrix(Mcount) %>% t %>% as.data.frame %>% r2c('cell')
TBfb_raw <- as.matrix(Mcount_raw) %>% t %>% as.data.frame %>% r2c('cell')

In [None]:
# save cells in FB library
Vcells <- TBfb$cell
if (config[[sample]][['VDJB']]) {
    Vcells <- union(Vcells,
        read_tsv(str_c(snakemake@input[['count_dir']], str_glue(config[['count_VDJB_airr']]), sep='/')) %>% pull(cell_id) %>% unique
    )
}
if (config[[sample]][['VDJT']]) {
    Vcells <- union(Vcells,
        read_tsv(str_c(snakemake@input[['count_dir']], str_glue(config[['count_VDJT_airr']]), sep='/')) %>% pull(cell_id) %>% unique
    )
}
TBfb <- TBfb_raw %>% filter(cell %in% Vcells)

In [None]:
# fb corr
Lcorr <- config[[sample]][['FB_corr']]
for (fb in names(Lcorr)) {
    fb_col <- str_subset(colnames(TBfb), str_glue('{fb}$'))
    TBfb[[fb_col]] <- TBfb[[fb_col]] * Lcorr[[fb]]
}

In [None]:
# sum
TBfb <- bind_cols(TBfb,
    tibble(
    sum_HT=TBfb %>% select(matches('^HT_')) %>% rowSums,
    sum_NC=TBfb %>% select(matches('^NC_')) %>% rowSums,
    sum_BD=TBfb %>% select(matches('^BD_')) %>% rowSums,
    )
)

In [None]:
Lfb <- config[[sample]][['id2seq']]

In [None]:
# BD

if (Lfb$BD %>% length > 0) {
    # BD list
    Vcol_BD <- Lfb$BD %>% names %>% str_c('BD_', .)
    Lcol_BD <- list()
    for (fb in Vcol_BD %>% str_replace('#\\d+$', '') %>% unique){
        Lcol_BD[[fb]] <- (Vcol_BD %>% str_subset(fb))}
    
    
    # ratio
    for (n in names(Lcol_BD)){
        V <- TBfb %>% select(all_of(Lcol_BD[[n]])) %>% rowSums
        TBfb[, n] <- V
        TBfb <- TBfb %>% mutate('{n}_ratio' := (get(n) / sum_BD) %>% round(2)) 
    }
    
    # BD_type
    make_comma_col <- function(row, Vname){Vname[c(row >= config[['Nratio_BD']])] %>% str_replace('BD_', '') %>% str_c(collapse=',')}
    
    TBfb$BD_type <-  TBfb %>% select(str_c(names(Lcol_BD), '_ratio')) %>% apply(1, make_comma_col, names(Lcol_BD)) 
}

In [None]:
# HT
if (Lfb$HT %>% length > 0) {
    TBhash_group <- TBfb %>% select(cell, starts_with('HT_'), sum_HT) %>% 
        pivot_longer(-c(cell, sum_HT), names_to=c('.value', 'HT_group'), names_sep='_') %>%
        group_by(cell) %>% mutate(HT_ratio=round(HT/sum_HT, 2)) %>%
        arrange(cell, desc(HT_ratio)) %>% dplyr::slice(1) %>% ungroup %>%
        mutate(group=case_when(sum_HT < 0.0001 ~ 'no hashtag',
                       sum_HT < config[['Nlim_HT']] ~ 'low hashtag',
                       HT_ratio > config[['Nratio_HT']] ~ HT_group, 
                       TRUE ~ 'mixed')) %>%
        select(cell, HT_group, HT_ratio) 
    TBfb <- TBfb %>% left_join(TBhash_group, by='cell')
}

In [None]:
# NC
if (Lfb$NC %>% length > 0) {
    TBfb <- TBfb %>% 
        mutate(NC_load = ifelse(sum_NC == 0, FALSE, 
                sum_NC / (sum_BD + sum_NC) > config[['Nratio_NC']]
               )
        )
} else {
    TBfb <- TBfb %>% mutate(NC_load=FALSE)
}

In [None]:
TBfb <- TBfb %>% mutate(batch = sample, .after=cell)

In [None]:
# stat

TBmtx <- read_csv(str_c(snakemake@input[['count_dir']], str_glue(config[['count_FB_mtx']]), sep='/')) %>% 
    dplyr::rename(item=`Metric Name`, value=`Metric Value`) %>%
    filter(`Library Type` == config[['FB_type']])

In [None]:
Lstat <- TBmtx %>% distinct(item, value) %>% pivot_wider(names_from='item', values_from='value') %>% select(any_of(c(
    'cells'='Cells', 
    'mean_reads'='Mean reads per cell', 
    'median_umis'='Median UMI counts per cell',
    'total_reads'='Number of reads', 
    'FB_reads'='Fraction antibody reads usable', 
    'aggregate_reads'='Fraction antibody reads in aggregate barcodes', 
    'Q30_barcode'='Q30 barcodes', 
    'Q30_umi'='Q30 UMI', 
    'Q30_R2'='Q30 RNA read'
))) %>% mutate(cells_saved=as.character(nrow(TBfb)), .after=cells) %>% as.list

In [None]:
# write
dir.create(dirname(snakemake@output[['FB_csv']]), recursive = TRUE)
write_excel_csv(TBfb, snakemake@output[['FB_csv']])
write_yaml(Lstat, file=snakemake@output[['FB_stat']])

In [None]:
dir.create(snakemake@output[['stat_dir']], recursive = TRUE)
file.copy(snakemake@output[['FB_csv']], 
          str_c(snakemake@output[['stat_dir']], '/', basename(snakemake@output[['FB_csv']])), 
          overwrite=TRUE)
file.copy(snakemake@output[['FB_stat']], 
          str_c(snakemake@output[['stat_dir']], '/', basename(snakemake@output[['FB_stat']])), 
          overwrite=TRUE)