In [None]:
R.home()

In [None]:
baizer::pkglib(Seurat, SingleR, celldex, tidyverse, baizer, yaml)

In [None]:
sample <- snakemake@wildcards[['sample']]

# replace default configs with sample configs
config <- replace_item(snakemake@config, snakemake@config[[sample]])

In [None]:
# Seurat readin

In [None]:
if (config[[sample]][['FB']]) {
    Lcount <- Read10X(
        data.dir = str_c(snakemake@input[['count_dir']], str_glue(config[['count_mRNA_dir']]), sep='/')
    )
    Mcount <- Lcount[['Gene Expression']]
} else {
    Mcount <- Read10X(
        data.dir = str_c(snakemake@input[['count_dir']], str_glue(config[['count_mRNA_dir']]), sep='/')
    )
}

Mcount <- Mcount[rowSums(Mcount) > 10, colSums(Mcount) > 10]
SE <- CreateSeuratObject(as.data.frame(Mcount), project=sample, 
                         min.cells=config[['Nmin_cell_to_keep_gene']],
                         min.features=config[['Nmin_gene_to_keep_cell']])
SE@meta.data <- SE@meta.data %>% rename(batch=orig.ident)

In [None]:
# mitochondria umi percent
SE@meta.data[,'mt_percent'] <- PercentageFeatureSet(SE, pattern = "^mt-")

In [None]:
# cell type

In [None]:
celltype_ref <- str_glue("src/celltype-ref/{config[['species']]}.rds")

In [None]:
SEref <- readRDS(celltype_ref)[[config[['celltype_ref_tissue']]]]

In [None]:
SE <- Seurat::NormalizeData(SE,
    normalization.method = "LogNormalize", scale.factor = 10000
  )

In [None]:
parse_SingleR <- function(x) {
  if (!is(x, "DFrame")) {
    stop("x should be the output of SingleR::SingleR()")
  }

  res <- x %>%
    as_tibble(rownames = "cell") %>%
    pivot_longer(-c("cell", "labels", "delta.next", "pruned.labels"),
      names_to = c(".value", "celltype"), names_sep = "cores."
    )

  # celltype in colnames and labels have some differences in symbol
  res <- res %>% filter(
    baizer::reg_join(.data[["labels"]], "[\\da-zA-Z]") ==
      baizer::reg_join(.data[["celltype"]], "[\\da-zA-Z]")
  )

  # celltype in colnames have some duplication
  # e.g. "scores.NK.cells..NK.H..MCMV1." and
  # "scores.NK.cells..NK.H.MCMV1." in mouse reference
  res <- res %>%
    dplyr::arrange(.data[["cell"]], dplyr::desc(.data[["s"]])) %>%
    dplyr::distinct(.data[["cell"]], .keep_all = TRUE)

  # select columns
  res <- res %>%
    dplyr::select(-"celltype") %>%
    dplyr::select(dplyr::all_of(c("cell",
      "celltype" = "labels", "celltype_score" = "s",
      "delta_next" = "delta.next",
      "celltype_pruned" = "pruned.labels"
    )))
  return(res)
}

In [None]:
SingleR_SE <- function(x, ref) {
  if (!is(x, "Seurat")) {
    stop("x should be a Seurat object")
  }

  if (!is(ref, "SummarizedExperiment")) {
    stop("ref should be a SummarizedExperiment object")
  }

  TBref_celltype <- SummarizedExperiment::colData(ref) %>%
    as_tibble() %>%
    dplyr::distinct(.data[["label.fine"]], .keep_all = TRUE)
  fine2main <- TBref_celltype %>%
    dplyr::pull(.data[["label.main"]], .data[["label.fine"]])

  # call SingleR
  TBannot <- SingleR::SingleR( # nolint
    test = Seurat::as.SingleCellExperiment(x),
    ref = ref, labels = ref$label.fine
  ) %>%
    parse_SingleR()
  # add main celltype
  TBannot <- TBannot %>% dplyr::mutate( # nolint
    celltype_main = fine2main[.data[["celltype"]]],
    .after = .data[["celltype"]]
  )
  # add meta.data
  res <- Seurat::AddMetaData(x, TBannot %>% baizer::c2r("cell"))

  return(res)
}

In [None]:
SE <- SingleR_SE(SE, SEref)

In [None]:
# reduction

In [None]:
reduction_SE <- function(x, use_dim = 30) {
  # Normalizing the data
  x <- Seurat::NormalizeData(x,
    normalization.method = "LogNormalize", scale.factor = 10000
  )
  # Identification of highly variable features (feature selection)
  x <- Seurat::FindVariableFeatures(x,
    selection.method = "vst", nfeatures = 2000
  )
  # Scaling the data
  x <- Seurat::ScaleData(x)
  # Dimension reduction
  x <- Seurat::RunPCA(x,
    features = Seurat::VariableFeatures(x), verbose = FALSE
  )
  x <- Seurat::RunUMAP(x, dims = 1:use_dim)

  res <- Seurat::AddMetaData(
    x,
    as.data.frame(x@reductions$umap@cell.embeddings)
  )

  return(res)
}

In [None]:
SE <- reduction_SE(SE, config[['Ndim_umap_use']])

In [None]:
# stat

TBmtx <- read_csv(str_c(snakemake@input[['count_dir']], str_glue(config[['count_mRNA_mtx']]), sep='/')) %>% 
    dplyr::rename(item=`Metric Name`, value=`Metric Value`) %>%
    filter(`Library Type` == 'Gene Expression')

In [None]:
Lstat <- TBmtx %>% distinct(item, value) %>% pivot_wider(names_from='item', values_from='value') %>% select(any_of(c(
    'cells'='Cells', 
    'median_genes'='Median genes per cell', 
    'mean_reads'='Mean reads per cell', 
    'median_umis'='Median UMI counts per cell', 
    'align_rate'='Mapped to genome', 
    'total_reads'='Number of reads', 
    'Q30_barcode'='Q30 barcodes', 
    'Q30_umi'='Q30 UMI',
    'Q30_RNA'='Q30 RNA read'
))) %>% as.list

In [None]:
# write
dir.create(dirname(snakemake@output[['mRNA_csv']]), recursive = TRUE)
write_excel_csv(SE@meta.data %>% r2c('cell'), snakemake@output[['mRNA_csv']])
saveRDS(SE, snakemake@output[['mRNA_rds']])
write_yaml(Lstat, file=snakemake@output[['mRNA_stat']])

In [None]:
dir.create(snakemake@output[['stat_dir']], recursive = TRUE)
file.copy(snakemake@output[['mRNA_csv']], 
          str_c(snakemake@output[['stat_dir']], '/', basename(snakemake@output[['mRNA_csv']])), 
          overwrite=TRUE)
file.copy(snakemake@output[['mRNA_stat']], 
          str_c(snakemake@output[['stat_dir']], '/', basename(snakemake@output[['mRNA_stat']])), 
          overwrite=TRUE)