In [None]:
R.home()

In [None]:
baizer::pkglib(baizer, tidyverse, ggsci, ggbeeswarm, plutor, ComplexHeatmap, 
               plotly, ggplotify, data.tree, echarts4r, jsonlite, knitr, yaml,
               shazam, stringdist, igraph)

In [None]:
sample <- snakemake@wildcards[['sample']]

# replace default configs with sample configs
config <- replace_item(snakemake@config, snakemake@config[[sample]])

In [None]:
filter_dir <- snakemake@input[['filter_dir']]
stat_dir <- snakemake@params[['stat_dir']]

In [None]:
# theme and colors
mytheme <- readLines(snakemake@params[['echarts_theme']]) %>% str_c(collapse='')
theme_list <- fromJSON(snakemake@params[['echarts_theme']])
Vcol <- theme_list$color
grey_col <- '#f5f5f5'

In [None]:
# list to save all results
Lres <- list()

In [None]:
for (flt_mode in names(config[['flt_mode']])) {
    if (str_c(flt_mode, '_flt.csv') %in% dir(stat_dir)) {
        TBkeep <- read_csv(str_glue('{filter_dir}/{flt_mode}_keep.csv'))
        TBflt <- read_csv(str_glue('{filter_dir}/{flt_mode}_flt.csv'))
        
        Lres[[flt_mode]] <- list(df=TBflt, keep=TBkeep)
    }
}

In [None]:
# filter

In [None]:
for (flt_mode in names(Lres)) {
    TBflt <- Lres[[flt_mode]][['df']]
    TBkeep <- Lres[[flt_mode]][['keep']]
    
    TBkeep <- TBkeep %>% c2r('cell') %>% seriate_df %>% r2c('cell')
    TBkeep_longer <- TBkeep %>% pivot_longer(-cell, names_to='term', values_to='value')

    e1 <- TBkeep_longer %>% 
      mutate(term=str_replace(term, '_keep$', '')) %>%
      e_charts(term) %>% 
      e_heatmap(cell, value) %>%
      e_x_axis(axisLabel=list(interval=0, rotate=45)) %>%
      e_y_axis(name='Cell', nameLocation='center', 
               axisLabel=list(show=FALSE), axisTick=list(show=FALSE),
              ) %>%
      e_visual_map(value, type='piecewise', splitNumber=2, orient='horizontal', top='top', left='center',
                    pieces=list(list(min=0, max=0, label='removed'), 
                                list(min=1, max=1, label='kept'))) %>%
      e_theme_custom(mytheme) %>%
      e_grid(containLabel=TRUE)

    e2 <- as_tibble(TBkeep %>% select(-cell) %>% apply(2, sum), rownames='term') %>%
      mutate(term=str_replace(term, '_keep$', '')) %>%
      e_charts(term) %>%
      e_bar(value) %>%
      e_x_axis(axisLabel=list(interval=0, rotate=45)) %>%
      e_y_axis(name='Kept cells', nameLocation='middle', nameGap=30) %>%
      e_mark_line(data = list(yAxis = nrow(TBflt)), title = "filtered") %>%
      e_legend(show=FALSE) %>%
      e_theme_custom(mytheme) %>%
      e_grid(containLabel=TRUE)

    Lres[[flt_mode]][['filter']] <- e_arrange(e1, e2, cols=2)
    
}

In [None]:
# mRNA

In [None]:
for (flt_mode in names(Lres)) {
    if (('mRNA' %in% config[['flt_mode']][[flt_mode]]) && (config[[sample]][['mRNA']] == TRUE)) {
        TBflt <- Lres[[flt_mode]][['df']]
        
        celltypes_in_plot <- TBflt %>% fancy_count(celltype, sort=TRUE) %>% 
            slice(seq_len(config[['show_celltypes']] )) %>% pull(celltype)
        celltypes_main_in_plot <- TBflt %>% fancy_count(celltype_main, sort=TRUE) %>% 
            slice(seq_len(config[['show_celltypes_main']] )) %>% pull(celltype_main)
        
        # mutate celltype
        TBflt <- TBflt %>% 
            mutate(ct_plot = ifelse(celltype %in% celltypes_in_plot, celltype, 'Other')) %>%
            mutate(ct_main_plot = ifelse(celltype_main %in% celltypes_main_in_plot, celltype_main, 'Other')) %>% 
            mutate(
                ct_plot=fct_infreq(ct_plot) %>% fct_relevel('Other', after=Inf),
                ct_main_plot=fct_infreq(ct_main_plot) %>% fct_relevel('Other', after=Inf),
            )

        e1 <- TBflt %>%
          group_by(ct_plot) %>%
          e_charts(UMAP_1) %>%
          e_scatter(UMAP_2, symbol_size = 8) %>%
          e_legend(type='scroll') %>%
          e_theme_custom(mytheme) %>%
          e_y_axis(axisLine=list(show=FALSE), axisTick=list(show=FALSE)) %>%
          e_x_axis(axisLine=list(show=FALSE), axisTick=list(show=FALSE)) %>%
          e_tooltip()

        e2 <- TBflt %>%
          group_by(ct_main_plot) %>%
          e_charts(UMAP_1) %>%
          e_scatter(UMAP_2, symbol_size = 8) %>%
          e_legend(type='scroll') %>%
          e_theme_custom(mytheme) %>%
          e_y_axis(axisLine=list(show=FALSE), axisTick=list(show=FALSE)) %>%
          e_x_axis(axisLine=list(show=FALSE), axisTick=list(show=FALSE)) %>%
          e_tooltip()

        e3 <- TBflt %>%
          group_by(ct_main_plot) %>%
          e_charts() %>%
          e_boxplot(nCount_RNA) %>%
          e_y_axis(name='umis', nameLocation='middle', nameGap=30) %>%
          e_tooltip()

        e4 <- TBflt %>%
          group_by(ct_main_plot) %>%
          e_charts() %>%
          e_boxplot(nFeature_RNA) %>%
          e_y_axis(name='genes', nameLocation='middle', nameGap=30) %>%
          e_tooltip()
        
        # celltype sunburst
        TBc1 <- TBflt %>% count(parents=celltype_main, labels=celltype)
        TBc2 <- TBflt %>% count(labels=celltype_main) %>%
            mutate(parents='Everything')
        TBc3 <- tibble(parents='', labels='Everything', n=0)

        TBp <- bind_rows(TBc1, TBc2, TBc3) %>% rename(value=n) %>%
            mutate(
                labels=ifelse(parents==labels, str_c(labels, ' '), labels)
            )

        # clean cell type
        TBp <- TBp %>% mutate(
            parents=str_replace(parents, '_cells*$', ''),
            labels=str_replace(labels, '_cells*$', ''),
        )

        e5 <- FromDataFrameNetwork(TBp) %>% 
          e_charts() %>%
          e_sunburst(label=list(
              minAngle=10, width=80, overflow='truncate'
          )) %>%
          e_tooltip()
        
        Lres[[flt_mode]][['mRNA']] <- e_arrange(e1, e2, e3, e4, e5, cols=2)

    }
}

In [None]:
# hashtag

In [None]:
for (flt_mode in names(Lres)) {
    if (('FB' %in% config[['flt_mode']][[flt_mode]]) && (length(config[[sample]][['id2seq']][['HT']]) > 0)) {
        
        TBflt <- Lres[[flt_mode]][['df']]
        TBfb_ht <- TBflt %>% select(cell, matches('^HT_'), sum_HT)

        e1 <- TBfb_ht %>% 
            group_by(HT_group) %>% 
            e_charts(sum_HT) %>% 
            e_scatter(HT_ratio, symbol_size=10) %>%
            e_x_axis(name='All hashtag umi', nameLocation='middle', nameGap=30) %>%
            e_y_axis(name='Major hashtag ratio', nameLocation='middle', nameGap=30) %>%
            e_tooltip(trigger = 'axis') %>%
            e_theme_custom(mytheme)

        e2 <- TBfb_ht %>% count(HT_group) %>%
          e_charts(HT_group) %>%
          e_pie(n, radius = c("50%", "70%")) %>%
          e_theme_custom(mytheme)

        Lres[[flt_mode]][['HT']] <- e_arrange(e1, e2, cols=2)
    }
}

In [None]:
# binding feature barcode

In [None]:
for (flt_mode in names(Lres)) {
    if (('FB' %in% config[['flt_mode']][[flt_mode]]) && (length(config[[sample]][['id2seq']][['BD']]) > 0)) {

        # BD ratio
        TBflt <- Lres[[flt_mode]][['df']]
        TBp <- TBflt %>% select(cell, matches('^BD_[^#]+_ratio$')) %>% 
            c2r('cell') %>% seriate_df %>% r2c('cell') %>% 
            pivot_longer(-cell, names_to='x', values_to='v') %>%
            mutate(x=str_replace(x, 'BD_(.+?)_ratio', '\\1'))

        e1 <- TBp %>% 
          e_charts(x) %>% 
          e_heatmap(cell, v) %>%
          e_x_axis(axisLabel=list(interval=0)) %>%
          e_y_axis(name='Cell', nameLocation='center', 
                   axisLabel=list(show=FALSE), axisTick=list(show=FALSE),
                  ) %>%
          e_visual_map(v, text=c('ratio', ''), orient='horizontal', top='top', 
                       left='center', precision=2) %>%
          e_theme_custom(mytheme)

        # BD type
        e2 <- TBflt %>% count(BD_type) %>%
          e_charts(BD_type) %>%
          e_pie(n, radius = c("50%", "70%"), label=list(show=TRUE, formatter='{b}({c}, {d}%)'))

        # absolute value
        TBp <- TBflt %>% select(cell, matches('^BD_[^#]+$'), -matches('_ratio$'), -BD_type) %>%
            c2r('cell') %>% seriate_df %>% r2c('cell') %>% 
            pivot_longer(-cell, names_to='x', values_to='v') %>%
            mutate(x=str_replace(x, 'BD_', ''))

        e3 <- TBp %>% 
          e_charts(x) %>% 
          e_heatmap(cell, v) %>%
          e_x_axis(axisLabel=list(interval=0)) %>%
          e_y_axis(name='Cell', nameLocation='center', 
                   axisLabel=list(show=FALSE), axisTick=list(show=FALSE),
                  ) %>%
          e_visual_map(v, text=c('umi', ''), orient='horizontal', top='top', left='center') %>%
          e_theme_custom(mytheme)

        TBp <- TBflt %>% select(cell, all_of(str_c('BD_', names(config[[sample]][['id2seq']][['BD']])))) %>%
            c2r('cell') %>% seriate_df %>% r2c('cell') %>% 
            pivot_longer(-cell, names_to='x', values_to='v') %>%
            mutate(x=str_replace(x, 'BD_', ''))
        e4 <- TBp %>% 
          e_charts(x) %>% 
          e_heatmap(cell, v) %>%
          e_x_axis(axisLabel=list(interval=0)) %>%
          e_y_axis(name='Cell', nameLocation='center', 
                   axisLabel=list(show=FALSE), axisTick=list(show=FALSE),
                  ) %>%
          e_visual_map(v, text=c('umi', ''), orient='horizontal', top='top', left='center') %>%
          e_theme_custom(mytheme)

        Lres[[flt_mode]][['BD']] <- e_arrange(e1, e2, e3, e4, cols=2)
    }
}

In [None]:
# VDJB

In [None]:
for (flt_mode in names(Lres)) {
    if (('VDJB' %in% config[['flt_mode']][[flt_mode]]) && (config[[sample]][['VDJB']] == TRUE)) {
        
        # class
        TBflt <- Lres[[flt_mode]][['df']]
        tbclass <- TBflt %>% 
            fancy_count(class, ext=c_gene_H, ext_fmt='ratio') %>%
            kable(align = "c")

        TBp <- TBflt %>% count(clone=clone_cellranger) 
        Nclones <- TBp %>% filter(n > 1) %>% nrow
        Nclones_percent <- float_to_percent(Nclones / nrow(TBflt))

        e1 <- TBp %>%
          e_charts(clone) %>%
          e_pie(n, radius = c("50%", "70%"), 
                label=list(show=TRUE, position='center', 
                           formatter=Nclones_percent, fontSize=30), 
                itemStyle=list(borderColor='grey')) %>%
          e_legend(show=FALSE) %>% 
          pe_color(gradient_color(Vcol, n=seq_len(Nclones)), default=grey_col)

        e2 <- TBflt %>% select(class, v_domain_shm_ratio_H) %>%
          filter(!is.na(class)) %>%
          group_by(class) %>%
          e_chart() %>%
          e_density(v_domain_shm_ratio_H, areaStyle = list(opacity = 0.4), smooth = TRUE) %>%
          e_tooltip() %>%
          e_x_axis(name='Heavy Chain SHM', nameLocation='middle', nameGap=30) %>%
          e_y_axis(name='Density', nameLocation='middle', nameGap=30)
        
        e3 <- TBflt %>% select(class, v_domain_shm_ratio_L) %>%
          filter(!is.na(class)) %>%
          group_by(class) %>%
          e_chart() %>%
          e_density(v_domain_shm_ratio_L, areaStyle = list(opacity = 0.4), smooth = TRUE) %>%
          e_tooltip() %>%
          e_x_axis(name='Light Chain SHM', nameLocation='middle', nameGap=30) %>%
          e_y_axis(name='Density', nameLocation='middle', nameGap=30)
        
        ##################################
        # mutation site
        TBp <- TBflt %>% select(matches('^imgt_H')) %>% colSums %>% 
            as_tibble(rownames='site') %>%
            mutate(site=str_replace(site, 'imgt_H', '')) %>%
            mutate(value = round(value / nrow(TBflt), 3))

        e4 <- TBp %>% e_chart(site) %>% 
            e_line(value, smooth=TRUE) %>%
            e_tooltip() %>%
            e_legend(show=FALSE) %>%
            e_x_axis(name='Heavy Chain IMGT numbering', nameLocation='middle', nameGap=35) %>%
            e_y_axis(name='Mutation Rate', nameLocation='middle', nameGap=35) 
        
        ##################################
        # mutation site of main v genes
        TBhsite <- TBflt %>% 
            mutate(
                v_gene_H_group = ifelse(v_gene_H %in% top_item(v_gene_H, n=5), v_gene_H, 'Other'),
                v_gene_H_group = fct_infreq(v_gene_H_group) %>% fct_relevel('Other', after=Inf)
            ) %>%
            arrange(v_gene_H_group) %>%
            select(v_gene_H_group, matches('^imgt_H'))

        Lhsite <- TBhsite %>% 
            group_split(v_gene_H_group, .keep=FALSE) %>%
            map(
                ~colSums(.x) %>% as_tibble(rownames='site') %>%
                    mutate(site=str_replace(site, 'imgt_H', '')) %>%
                    mutate(value = round(value / nrow(TBflt), 3))
               )

        site_order <- Lhsite %>% map(~.x[['site']]) %>% reduce(union) %>%
            sortf(~ reg_match(.x, "\\d+") %>% as.double())

        TBp <- Lhsite %>% reduce(~full_join(.x, .y, by='site'))

        colnames(TBp) <- c('site', levels(TBhsite$v_gene_H_group))

        TBp <- TBp %>% mutate(site=factor(site, site_order)) %>%
            arrange(site)

        e5 <- TBp %>% pivot_longer(-site, names_to='v_gene_H', values_to='value') %>%
            group_by(v_gene_H) %>%
            e_chart(site) %>% 
            e_line(value, smooth=TRUE) %>%
            e_tooltip() %>%
            e_legend(show=TRUE) %>%
            e_x_axis(name='Heavy Chain IMGT numbering', nameLocation='middle', nameGap=35) %>%
            e_y_axis(name='Mutation Rate', nameLocation='middle', nameGap=35) 
        
        ##################################
        # Hchain dist
        TBclone_H <- collapseClones(TBflt, cloneColumn="clone_changeo_H", 
                           sequenceColumn="seq_align_nt_H",
                           germlineColumn="gm_align_nt_H") %>% 
            select(clone_changeo_H, seq_align_nt_H, gm_align_nt_H)
        v <- pull(TBclone_H, gm_align_nt_H, clone_changeo_H)


        DFdist <- v %>%
            stringdistmatrix %>%
            as.matrix %>%
            as.data.frame

        rownames(DFdist) <- names(v)
        colnames(DFdist) <- names(v)

        # mds
        TBmds <- DFdist %>% 
            cmdscale(k=2) %>%
            as_tibble(rownames='clone_changeo_H') %>%
            rename(MDS_1=V1, MDS_2=V2)

        # join
        TBnode <- TBflt %>% summarise(n=n(), 
                            v_gene_H = unique(v_gene_H) %>% str_c(collapse=','),
                            j_gene_H = unique(j_gene_H) %>% str_c(collapse=','),
                            class = unique(class) %>% str_c(collapse=','),
                            v_domain_shm_ratio_H = median(v_domain_shm_ratio_H),
                            .by=clone_changeo_H) %>%
                        mutate(clone_changeo_H=as.character(clone_changeo_H)) %>%
                        left_join(TBmds, by='clone_changeo_H')


        e6 <- TBnode %>% 
            group_by(class) %>%
            e_charts(MDS_1) %>%
            e_grid(show=FALSE) %>%
            e_scatter(MDS_2, symbol_size = mm_norm(TBnode$n, low=5, high=20)) %>%
            e_y_axis(axisLine=list(show=FALSE), axisLabel=list(show=FALSE), axisTick=list(show=FALSE)) %>%
            e_x_axis(axisLine=list(show=FALSE), axisLabel=list(show=FALSE), axisTick=list(show=FALSE)) %>%
            e_hide_grid_lines(which = c("x", "y")) %>%
            e_tooltip() %>%
            e_theme_custom(mytheme)


        Lres[[flt_mode]][['VDJB']] <- list(table=tbclass, 
                            plot=e_arrange(e1, e1, e2, e3, e4, e5, e6, cols=2)
                        )
    }
}

In [None]:
# metadata

In [None]:
TBmeta <- read_csv(snakemake@params[['metadata']])
TBmeta_smp <- TBmeta %>% filter(sample==.env[['sample']]) 
if (nrow(TBmeta_smp) > 0) {
    Lres[['metadata']] <- TBmeta_smp %>% 
        remove_nacol %>% t %>% as_tibble(rownames='items') %>%
        select(items=1, value=2) %>% kable(align = "c")
}

In [None]:
# cells

In [None]:
flt_stat <- read_yaml(str_c(stat_dir, '/filter_stat.yaml'))

In [None]:
Lres[['cells']] <- bind_cols(
    names(flt_stat) %>% as_tibble,
    flt_stat %>% map_dfr(~as_tibble(.x))
) %>% rename(item=value) %>%
    mutate(yield = float_to_percent(flt / raw, 2)) %>%
    kable(align = "c")

In [None]:
# out

In [None]:
out <- snakemake@output[['visualize_rds']]
dir.create(dirname(out), recursive = TRUE)
saveRDS(Lres, out)