In [None]:
library(Seurat)
library(Matrix)
library(tidyverse)

In [None]:
source("../../util/sc_preprocess.R")

In [None]:
# The following RDS files are products from import_DGE.R
dge_base   <- readRDS("vFeb2021_Drop_original_rawdge.rds")
dge_to10x  <- readRDS("vFeb2021_Drop_to_10x_rawdge.rds")
dge_todrop <- readRDS("vFeb2021_Drop_to_drop_rawdge.rds")
dge_to10x_keepUMI  <- readRDS("vFeb2021_Drop_to_10x_keepUMI_rawdge.rds")
dge_todrop_keepUMI <- readRDS("vFeb2021_Drop_to_drop_keepUMI_rawdge.rds")


In [None]:
ncol(dge_base)

In [None]:
bclist_to10x  <- read.table("barcode_correspondence_data//Fig4/bclist_drop_to_10X_random.txt.gz",header=T)
bclist_toDrop <- read.table("barcode_correspondence_data//Fig4/bclist_drop_to_drop_random.txt.gz",header=T)
bclist_to10x_keepUMI  <- read.table("barcode_correspondence_data//Fig4/bclist_drop_to_10X_keepUMI.txt.gz",header=T); colnames(bclist_to10x_keepUMI) <- c("segment1.cellID","exCellID")
bclist_toDrop_keepUMI <- read.table("barcode_correspondence_data//Fig4/bclist_drop_to_drop_keepUMI.txt.gz",header=T)

In [None]:
colnames(dge_to10x) <- gsub("-1","",colnames(dge_to10x))
colnames(dge_to10x_keepUMI) <- gsub("-1","",colnames(dge_to10x_keepUMI))

dge_to10x_new  <- refresh_dge(dge_to10x,bclist_to10x$segment1.cellID,bclist_to10x$exCellID)
dge_todrop_new <- refresh_dge(dge_todrop,bclist_toDrop$segment1.cellID,bclist_toDrop$exCellID)
dge_to10x_keepUMI_new  <- refresh_dge(dge_to10x_keepUMI,bclist_to10x_keepUMI$segment1.cellID,bclist_to10x_keepUMI$exCellID)
dge_todrop_keepUMI_new <- refresh_dge(dge_todrop_keepUMI,bclist_toDrop_keepUMI$segment1.cellID,bclist_toDrop_keepUMI$exCellID)

In [None]:
colnames(dge_to10x_keepUMI_new) %>% head

In [None]:
dge_target_list <- list(dge_to10x_new,dge_todrop_new,dge_to10x_keepUMI_new,dge_todrop_keepUMI_new)

In [None]:
use_cells <- get_cells_used(dge_base,dge_target_list,percent_mito = 20,min_features = 200)

In [None]:
length(use_cells)

In [None]:
dge_base_intersect   <- dge_base[,use_cells]
dge_to10x_intersect  <- dge_to10x_new[,use_cells]
dge_todrop_intersect <- dge_todrop_new[,use_cells]
dge_to10x_keepUMI_intersect <- dge_to10x_keepUMI_new[,use_cells]
dge_todrop_keepUMI_intersect<- dge_todrop_keepUMI_new[,use_cells]

In [None]:
orig.intersect.seu   <- CreateSeuratObject(dge_base_intersect)
to10x.intersect.seu  <- CreateSeuratObject(dge_to10x_intersect)
todrop.intersect.seu <- CreateSeuratObject(dge_todrop_intersect)
to10x.keepUMI.seu    <- CreateSeuratObject(dge_to10x_keepUMI_intersect)
todrop.keepUMI.seu   <- CreateSeuratObject(dge_todrop_keepUMI_intersect)

In [None]:
high_var_genes <- getVarGenes_untilPCA(orig.intersect.seu,num_varFeatures = 5000)

In [None]:
orig.intersect.seu   <- getVarGenes_untilPCA(orig.intersect.seu,var_genes=high_var_genes,num_varFeatures = 5000)
to10x.intersect.seu  <- getVarGenes_untilPCA(to10x.intersect.seu,var_genes=high_var_genes,num_varFeatures = 5000)
todrop.intersect.seu <- getVarGenes_untilPCA(todrop.intersect.seu,var_genes=high_var_genes,num_varFeatures = 5000)
to10x.keepUMI.seu    <- getVarGenes_untilPCA(to10x.keepUMI.seu,var_genes=high_var_genes,num_varFeatures = 5000)
todrop.keepUMI.seu   <- getVarGenes_untilPCA(todrop.keepUMI.seu,var_genes=high_var_genes,num_varFeatures = 5000)

In [None]:
orig.intersect.seu   <- seurat_processing_UMAP(orig.intersect.seu,dimlimit = 20,res = 0.6)
to10x.intersect.seu  <- seurat_processing_UMAP(to10x.intersect.seu,dimlimit = 20,res = 0.6)
todrop.intersect.seu <- seurat_processing_UMAP(todrop.intersect.seu,dimlimit = 20,res = 0.6)
to10x.keepUMI.seu  <- seurat_processing_UMAP(to10x.keepUMI.seu,dimlimit = 20,res = 0.6)
todrop.keepUMI.seu <- seurat_processing_UMAP(todrop.keepUMI.seu,dimlimit = 20,res = 0.6)

In [None]:
store_dir <- "seurat_rds/"

saveRDS(orig.intersect.seu,paste0(store_dir,"Feb2021_Drop_original_Seurat.rds"))
saveRDS(to10x.intersect.seu,paste0(store_dir,"Feb2021_Drop_to_10x_Seurat.rds"))
saveRDS(todrop.intersect.seu,paste0(store_dir,"Feb2021_Drop_to_drop_Seurat.rds"))
saveRDS(to10x.keepUMI.seu,paste0(store_dir,"Feb2021_Drop_to_10x_keepUMI_Seurat.rds"))
saveRDS(todrop.keepUMI.seu,paste0(store_dir,"Feb2021_Drop_to_drop_keepUMI_Seurat.rds"))

In [None]:
#Fig drawing
draw_umap(seu_base = orig.intersect.seu,
          seu_to10x = to10x.intersect.seu,
          seu_todrop = todrop.intersect.seu,
          col_seed = 18,
          w=6,
          h=6,
          outdir = "figure_out/",
          outname = "drop_randUMI_samegene")

In [None]:
#Export CellCellDist
v.vln.to10x <- get_dist_pcaspace_scatter(orig.intersect.seu,
                          to10x.intersect.seu,
                          outdir = "figure_out/",
                          outname = "drop_to_10x_randUMI")
v.vln.todrop <- get_dist_pcaspace_scatter(orig.intersect.seu,
                          todrop.intersect.seu,
                          outdir = "figure_out/",
                          outname = "drop_to_drop_randUMI")
v.vln.to10x_keep <- get_dist_pcaspace_scatter(orig.intersect.seu,
                          to10x.keepUMI.seu,
                          outdir = "figure_out/",
                          outname = "drop_to_10x_keepUMI")
v.vln.todrop_keep <- get_dist_pcaspace_scatter(orig.intersect.seu,
                          todrop.keepUMI.seu,
                          outdir = "figure_out/",
                          outname = "drop_to_drop_keepUMI")
v.vln.scramble <- get_dist_pcaspace_scatter(orig.intersect.seu,
                                            orig.intersect.seu,
                                            scramble = T,
                                            outdir = "figure_out/",
                                            outname = "drop_orig_scramble")