In [None]:
baizer::pkglib(Seurat, tidyverse, baizer, yaml)

In [None]:
sample <- snakemake@wildcards[['sample']]

# replace default configs with sample configs
config <- replace_item(snakemake@config, snakemake@config[[sample]])

In [None]:
Pcount <- snakemake@input[['FB_count_dir']]
Pmtx <- str_glue('{Pcount}/filtered_feature_bc_matrix')
Pcsv <- snakemake@output[['FB_csv']]
Pstat <- snakemake@output[['FB_stat']]
Pstat_dir <- snakemake@output[['stat_dir']]

Nlim_HT <- config[['Nlim_HT']]
Nratio_HT <- config[['Nratio_HT']]
Nratio_BD <- config[['Nratio_BD']]
Nratio_NC <- config[['Nratio_NC']]
Lfb <- config[[sample]][['id2seq']]
Lcorr <- config[[sample]][['FB_corr']]

In [None]:
# Seurat readin
Mcount <- Read10X(data.dir = Pmtx)
TBfb <- as.matrix(Mcount) %>% t %>% as.data.frame %>% r2c('cell')

In [None]:
# fb corr
for (fb in names(Lcorr)) {
    fb_col <- str_subset(colnames(TBfb), str_glue('{fb}$'))
    TBfb[[fb_col]] <- TBfb[[fb_col]] * Lcorr[[fb]]
}

In [None]:
# sum
TBfb <- bind_cols(TBfb,
    tibble(
    sum_HT=TBfb %>% select(matches('^HT_')) %>% rowSums,
    sum_NC=TBfb %>% select(matches('^NC_')) %>% rowSums,
    sum_BD=TBfb %>% select(matches('^BD_')) %>% rowSums,
    )
)

In [None]:
# BD

# BD list
Vcol_BD <- Lfb$BD %>% names %>% str_c('BD_', .)
Lcol_BD <- list()
for (fb in Vcol_BD %>% str_replace('#\\d+$', '') %>% unique){
    Lcol_BD[[fb]] <- (Vcol_BD %>% str_subset(fb))}


# ratio
for (n in names(Lcol_BD)){
    V <- TBfb %>% select(all_of(Lcol_BD[[n]])) %>% rowSums
    TBfb[, n] <- V
    TBfb <- TBfb %>% mutate('{n}_ratio' := (get(n) / sum_BD) %>% round(2)) 
}

# BD_type
make_comma_col <- function(row, Vname){Vname[c(row >= Nratio_BD)] %>% str_replace('BD_', '') %>% str_c(collapse=',')}

TBfb$BD_type <-  TBfb %>% select(str_c(names(Lcol_BD), '_ratio')) %>% apply(1, make_comma_col, names(Lcol_BD))

In [None]:
# HT
if (Lfb$HT %>% length > 0) {
    TBhash_group <- TBfb %>% select(cell, starts_with('HT_'), sum_HT) %>% 
        pivot_longer(-c(cell, sum_HT), names_to=c('.value', 'HT_group'), names_sep='_') %>%
        group_by(cell) %>% mutate(HT_ratio=round(HT/sum_HT, 2)) %>%
        arrange(cell, desc(HT_ratio)) %>% dplyr::slice(1) %>% ungroup %>%
        mutate(group=case_when(sum_HT < 0.0001 ~ 'no hashtag',
                       sum_HT < Nlim_HT ~ 'low hashtag',
                       HT_ratio > Nratio_HT ~ HT_group, 
                       TRUE ~ 'mixed')) %>%
        select(cell, HT_group, HT_ratio) 
    TBfb <- TBfb %>% left_join(TBhash_group, by='cell')
}

In [None]:
# NC
if (Lfb$NC %>% length > 0) {
    TBfb <- TBfb %>% 
        mutate(NC_load = ifelse(sum_NC == 0, FALSE, 
                sum_NC / (sum_BD + sum_NC) > Nratio_NC
               )
        )
}

In [None]:
# stat
Lstat <- list()

In [None]:
# write
dir.create(dirname(Pcsv), recursive = TRUE)
write_excel_csv(TBfb, Pcsv)
write_yaml(Lstat, file=Pstat)

In [None]:
dir.create(Pstat_dir, recursive = TRUE)
file.copy(Pcsv, str_c(Pstat_dir, '/', basename(Pcsv)), overwrite=TRUE)
file.copy(Pstat, str_c(Pstat_dir, '/', basename(Pstat)), overwrite=TRUE)