In [None]:
library(Seurat)
library(Matrix)
library(tidyverse)

In [None]:
source("../../util/sc_preprocess.R")

In [None]:
# The following RDS files are products from import_DGE.R
dge_base   <- readRDS("vFeb2021_SPLiT_original_rawdge.rds")
dge_to10x  <- readRDS("vFeb2021_SPLiT_to_10x_rawdge.rds")
dge_todrop <- readRDS("vFeb2021_SPLiT_to_drop_rawdge.rds")
dge_to10x_keepUMI  <- readRDS("vFeb2021_SPLiT_to_10x_keepUMI_rawdge.rds")

In [None]:
cnt_base <- colSums(dge_base)
cnt_10x <- colSums(dge_to10x)
cnt_drop <- colSums(dge_todrop)

In [None]:
cnt_base[cnt_base < 1000] %>% length

In [None]:
cnt_10x[cnt_10x < 1000] %>% length

In [None]:
cnt_drop[cnt_drop < 1000] %>% length

In [None]:
dge_to10x         <- refresh_dge(dge_to10x,
                                 direct = T,
                                 header = F,
                                 src = 2,
                                 dest = 1,
                                 path = "barcode_correspondence_data//Fig4/split_to_10x.merged.tsv",
                                 add="-1")
dge_todrop        <- refresh_dge(dge_todrop,
                                 direct = T,
                                 header = F,
                                 src = 2,
                                 dest = 1,
                                 path = "barcode_correspondence_data//Fig4/split_to_drop.merged.tsv")
dge_to10x_keepUMI <- refresh_dge(dge_to10x_keepUMI,
                                 direct = T,
                                 header = F,
                                 src = 2,
                                 dest = 1,
                                 path = "barcode_correspondence_data//Fig4/split_to_10x_keepUMI.merged.tsv",
                                 add="-1")

In [None]:
dge_to10x <- dge_to10x[,colSums(dge_to10x)>20]
dge_to10x_keepUMI <- dge_to10x_keepUMI[,colSums(dge_to10x_keepUMI)>20]

In [None]:
cell.inter <- intersect(intersect(colnames(dge_base),colnames(dge_to10x)),
                        intersect(colnames(dge_todrop),colnames(dge_to10x_keepUMI)))
dge_base <- dge_base[,cell.inter]
dge_to10x <- dge_to10x[,cell.inter]
dge_todrop <- dge_todrop[,cell.inter]
dge_to10x_keepUMI <- dge_to10x_keepUMI[,cell.inter]

In [None]:
dge_target_list <- list(dge_to10x,dge_todrop,dge_to10x_keepUMI)

In [None]:
dge_base[1:5,1:5]

In [None]:
use_cells <- get_cells_used(dge_base,dge_target_list,percent_mito = 5)

In [None]:
dge_base   <- dge_base[,use_cells]
dge_to10x  <- dge_to10x[,use_cells]
dge_todrop <- dge_todrop[,use_cells]
dge_to10x_keepUMI  <- dge_to10x_keepUMI[,use_cells]


In [None]:
orig.intersect.seu   <- CreateSeuratObject(dge_base)
to10x.intersect.seu  <- CreateSeuratObject(dge_to10x)
todrop.intersect.seu <- CreateSeuratObject(dge_todrop)
to10x.keepUMI.seu    <- CreateSeuratObject(dge_to10x_keepUMI)


In [None]:
high_var_genes <- getVarGenes_untilPCA(orig.intersect.seu,num_varFeatures = 5000)

In [None]:
orig.intersect.seu   <- getVarGenes_untilPCA(orig.intersect.seu,var_genes=high_var_genes,num_varFeatures = 5000)
to10x.intersect.seu  <- getVarGenes_untilPCA(to10x.intersect.seu,var_genes=high_var_genes,num_varFeatures = 5000)
todrop.intersect.seu <- getVarGenes_untilPCA(todrop.intersect.seu,var_genes=high_var_genes,num_varFeatures = 5000)
to10x.keepUMI.seu    <- getVarGenes_untilPCA(to10x.keepUMI.seu,var_genes=high_var_genes,num_varFeatures = 5000)


In [None]:
orig.intersect.seu   <- seurat_processing_UMAP(orig.intersect.seu,dimlimit = 20,res = 0.6)
to10x.intersect.seu  <- seurat_processing_UMAP(to10x.intersect.seu,dimlimit = 20,res = 0.6)
todrop.intersect.seu <- seurat_processing_UMAP(todrop.intersect.seu,dimlimit = 20,res = 0.6)
to10x.keepUMI.seu  <- seurat_processing_UMAP(to10x.keepUMI.seu,dimlimit = 20,res = 0.6)

In [None]:
store_dir <- "seurat_rds/"

saveRDS(orig.intersect.seu,paste0(store_dir,"vFeb2021_SPLiT_original_Seurat.rds"))
saveRDS(to10x.intersect.seu,paste0(store_dir,"vFeb2021_SPLiT_to_10x_Seurat.rds"))
saveRDS(todrop.intersect.seu,paste0(store_dir,"vFeb2021_SPLiT_to_drop_Seurat.rds"))
saveRDS(to10x.keepUMI.seu,paste0(store_dir,"vFeb2021_SPLiT_to_10x_keepUMI_Seurat.rds"))

In [None]:
v <- colnames(orig.intersect.seu) %>% head(n=40)

In [None]:
#Fig drawing
draw_umap(seu_base = orig.intersect.seu,
          seu_to10x = to10x.intersect.seu,
          seu_todrop = todrop.intersect.seu,
          col_seed = 6,
          w=6,
          h=6,
          outdir = "~/work//barista//paper_data/scRNA/Figure_Feb2021/",
          outname = "SPLiT_randUMI_samegene")

In [None]:
#Export CellCellDist
v.vln.to10x <- get_dist_pcaspace_scatter(orig.intersect.seu,
                                         to10x.intersect.seu,
                                         outdir = "figure_out/",
                                         outname = "SPLiT_to_10x_randUMI")
v.vln.todrop <- get_dist_pcaspace_scatter(orig.intersect.seu,
                                          todrop.intersect.seu,
                                          outdir = "figure_out/",
                                          outname = "SPLiT_to_drop_randUMI")
v.vln.to10x_keep <- get_dist_pcaspace_scatter(orig.intersect.seu,
                                              to10x.keepUMI.seu,
                                              outdir = "figure_out/",
                                              outname = "SPLiT_to_10x_keepUMI")
v.vln.scramble <- get_dist_pcaspace_scatter(orig.intersect.seu,
                                            orig.intersect.seu,
                                            scramble = T,
                                            outdir = "figure_out/",
                                            outname = "SPLiT_orig_scramble_samegene")