In [1]:
suppressWarnings(suppressPackageStartupMessages({
    library(Seurat)
    library(Signac)
    library(EnsDb.Hsapiens.v86)
    library(dplyr)
    library(ggplot2)
    library(bedr)
    library(SeuratDisk)
    library(scales)
    library(reshape2)
    library(Hmisc)
    library(tidyr)
    library(tidyverse)
    library(crayon)
    library(SeuratData)
    #library(CelliD)
    library(readr)
}))

# Definitions etc

In [2]:
set.seed(1234)
options(digits=2)
stats <- c()

In [3]:
if(!exists("papermill_run")) {
    prj_name = "Screen1_66guides"
    secondary_a_path = "/home/vsevim/prj/1012-ckd/S1/analysis/secondary/"
    save_seurat_h5 = "YES"
    n_libs = "4"

    # prj_name = "Screen2_101vecs"
    # n_libs = "9"
    # secondary_a_path = "/home/vsevim/prj/1012-ckd/S2/analysis/secondary/"
    # save_seurat_h5 = "YES"
}

In [4]:
custom_theme <- theme(
  plot.title = element_text(size=16, hjust = 0.5), 
  legend.key.size = unit(0.7, "cm"), 
  legend.text = element_text(size = 14))

# Load Seurat files

In [5]:
# Load Seurat objects
#libs = c("Lib_1", "Lib_2", "Lib_3", "Lib_4", "Lib_5", "Lib_6", "Lib_7", "Lib_8", "Lib_9")

libs = paste0("Lib_", 1:as.integer(n_libs))
integrated_h5_path <- paste0(secondary_a_path, "integrated/seurat_objects/integrated_wo_guide_calls")
seurat_list = list()

for(lib in libs) {
    h5_path = paste0(secondary_a_path, lib, "/seurat_objects/", prj_name, "_", lib, ".h5seurat")
    print(h5_path)
    seurat_list[[lib]] = LoadH5Seurat(h5_path, verbose = FALSE)
    #seurat_list[[lib]]$library <- lib
    DefaultAssay(seurat_list[[lib]]) <- "SCT"
}

[1] "/home/vsevim/prj/1012-ckd/S1/analysis/secondary/Lib_1/seurat_objects/Screen1_66guides_Lib_1.h5seurat"


Validating h5Seurat file



[1] "/home/vsevim/prj/1012-ckd/S1/analysis/secondary/Lib_2/seurat_objects/Screen1_66guides_Lib_2.h5seurat"


Validating h5Seurat file



[1] "/home/vsevim/prj/1012-ckd/S1/analysis/secondary/Lib_3/seurat_objects/Screen1_66guides_Lib_3.h5seurat"


Validating h5Seurat file



[1] "/home/vsevim/prj/1012-ckd/S1/analysis/secondary/Lib_4/seurat_objects/Screen1_66guides_Lib_4.h5seurat"


Validating h5Seurat file



In [6]:
integration_features <- SelectIntegrationFeatures(object.list = seurat_list)

In [7]:
anchors <- FindIntegrationAnchors(object.list = seurat_list, anchor.features = integration_features, verbose = FALSE)

"Some cell names are duplicated across objects provided. Renaming to enforce unique cell names."


In [8]:
# this command creates an 'integrated' data assay
seurat_combined <- IntegrateData(anchorset = anchors, verbose = FALSE)

# Run SCTransform on the combined

In [9]:
DefaultAssay(seurat_combined) <- "integrated"

In [10]:
seurat_combined <- SCTransform(seurat_combined, vst.flavor = "v2", verbose = FALSE) %>% 
          RunPCA() %>% 
          RunUMAP(dims = 1:40, reduction.name = 'umap.rna', reduction.key = 'rnaUMAP_', verbose=FALSE)

PC_ 1 
Positive:  CRYAB, FTL, APOE, MT1X, FN1, SOD2, SERPINA1, G0S2, S100A1, PLIN2 
	   CDH6, MT1E, FGB, SERPINE1, FTH1, CTSB, VCAM1, MGST1, TFPI2, RARRES2 
	   VGF, GPX3, RHEX, LINC01705, NUPR1, RRAD, AKR1C3, IL32, PDZK1IP1, SLC2A9 
Negative:  KCNIP4, ERBB4, MECOM, WFDC2, AC019197.1, FAM155A, MAL, SFRP1, S100A2, BST2 
	   KITLG, PDE1A, KRT19, CSGALNACT1, MUC1, KRT7, ACSL4, SNED1, ITGA2, PKHD1 
	   TFCP2L1, SKAP1, LRP1B, SCN2A, GPRC5A, ARHGEF38, PCSK5, TFAP2B, DENND2A, HOXB3 
PC_ 2 
Positive:  KCNIP4, SERPINA1, ERBB4, NEAT1, MALAT1, SPP1, MECOM, WFDC2, FTL, SOD2 
	   FTH1, MT1X, AC079352.1, SLPI, FXYD2, APOE, CRYAB, LINC01320, PDE1A, ZIM3 
	   FKBP5, MDM2, ADAMTS9-AS2, CLU, GDF15, LRP1B, SERPINA3, CPD, FGB, PKP4 
Negative:  CENPF, TOP2A, MKI67, ASPM, HMGB2, TUBA1B, CCNB1, TPX2, HMMR, NUSAP1 
	   DLGAP5, UBE2C, ANLN, PRC1, CDK1, CEP55, CKS2, PCLAF, RRM2, CDKN3 
	   UBE2S, GTSE1, PBK, CENPE, PTTG1, TUBB4B, HJURP, H2AFZ, BIRC5, CDC20 
PC_ 3 
Positive:  CDH6, PLCB4, AC079352.1, NEAT1, MALA

### Perform clustering

In [11]:
seurat_combined <- FindNeighbors(seurat_combined, reduction = "pca", dims = 1:40)
seurat_combined <- FindClusters(seurat_combined, resolution = 0.5)

Computing nearest neighbor graph

Computing SNN



Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 27141
Number of edges: 944607

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8694
Number of communities: 13
Elapsed time: 6 seconds


## Add stats slot

In [12]:
df_stats = NULL
for(i in 1:length(seurat_list)) {
    df_stats = rbind(df_stats, as.data.frame(seurat_list[[i]]@misc$stats))
}

Misc(seurat_combined, slot = 'stats') <- df_stats

### Save H5

In [13]:
SaveH5Seurat(seurat_combined, integrated_h5_path, verbose = FALSE, overwrite = T)

"Overwriting previous file /home/vsevim/prj/1012-ckd/S1/analysis/secondary/integrated/seurat_objects/integrated_wo_guide_calls.h5seurat"
Creating h5Seurat file for version 3.1.5.9900



In [14]:
df_stats

Screen,Library,Cells_recovered,Genes_recovered,Protospacers_in_assay,Max_mito_perc_threshold,Min_mito_perc_threshold,Max_n_RNA_threshold,Min_n_RNA_threshold,Max_n_genes_threshold,Min_n_genes_threshold,Cells_after_QC_filtering,Cells_with_sgRNA_count_less_than_100,Doublets_in_filtered_cells,Total_doublets_before_filtering,Singlets_after_filtering,Singlets_perc_after_filtering
<chr>,<chr>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<int>,<int>,<int>,<int>,<dbl>
Screen1_66guides,Lib_1,9169,31847,132,17,3,98264,-22058,12173,2000,8012,1785,718,918,7293,0.91
Screen1_66guides,Lib_2,9443,30856,132,18,3,52856,-10516,9736,2000,8565,2220,1033,1168,7527,0.88
Screen1_66guides,Lib_3,7113,29531,132,17,3,34198,-5058,8024,2000,6298,1128,512,617,5785,0.92
Screen1_66guides,Lib_4,8015,31390,132,17,3,79578,-14582,11188,2000,7201,1572,665,793,6536,0.91


In [15]:
nrow(seurat_list[[1]]@meta.data)

In [16]:
length(Cells(subset(seurat_combined, subset = library == 'Lib_1')))
length(Cells(subset(seurat_combined, subset = (status == 'singlet') & (library == 'Lib_1') )))

In [17]:
unique(seurat_combined$status)

In [18]:
unique(seurat_list[[4]]$status)