In [17]:
suppressWarnings(suppressPackageStartupMessages({
    library(Seurat)
    library(Signac)
    library(EnsDb.Hsapiens.v86)
    library(dplyr)
    library(ggplot2)
    library(bedr)
    library(SeuratDisk)
    library(scales)
    library(reshape2)
    library(Hmisc)
    library(tidyr)
    library(tidyverse)
    library(crayon)
    library(SeuratData)
    #library(CelliD)
    library(readr)
}))

# Definitions etc

In [18]:
set.seed(1234)
options(digits=2)
stats <- c()

In [35]:
if(!exists("papermill_run")) {
    prj_name = "Screen1_66guides"
    secondary_a_path = "/home/vsevim/prj/1012-ckd/S1/analysis/secondary/"
    save_seurat_h5 = "YES"
    n_libs = "4"

    # prj_name = "Screen2_101vecs"
    # n_libs = "9"
    # secondary_a_path = "/home/vsevim/prj/1012-ckd/S2/analysis/secondary/"
    # save_seurat_h5 = "YES"
}

In [20]:
custom_theme <- theme(
  plot.title = element_text(size=16, hjust = 0.5), 
  legend.key.size = unit(0.7, "cm"), 
  legend.text = element_text(size = 14))

# Load Seurat files

In [22]:
# Load Seurat objects
#libs = c("Lib_1", "Lib_2", "Lib_3", "Lib_4", "Lib_5", "Lib_6", "Lib_7", "Lib_8", "Lib_9")

libs = paste0("Lib_", 1:as.integer(n_libs))
integrated_h5_path <- paste0(secondary_a_path, "integrated/seurat_objects/integrated")
seurat_list = list()

for(lib in libs) {
    h5_path = paste0(secondary_a_path, lib, "/seurat_objects/", prj_name, "_", lib, ".h5seurat")
    print(h5_path)
    seurat_list[[lib]] = LoadH5Seurat(h5_path, verbose = FALSE)
    #seurat_list[[lib]]$library <- lib
    DefaultAssay(seurat_list[[lib]]) <- "SCT"
}

[1] "/home/vsevim/prj/1012-ckd/S1/analysis/secondary/Lib_1/seurat_objects/Screen1_66guides_Lib_1.h5seurat"


Validating h5Seurat file



[1] "/home/vsevim/prj/1012-ckd/S1/analysis/secondary/Lib_2/seurat_objects/Screen1_66guides_Lib_2.h5seurat"


Validating h5Seurat file



[1] "/home/vsevim/prj/1012-ckd/S1/analysis/secondary/Lib_3/seurat_objects/Screen1_66guides_Lib_3.h5seurat"


Validating h5Seurat file



[1] "/home/vsevim/prj/1012-ckd/S1/analysis/secondary/Lib_4/seurat_objects/Screen1_66guides_Lib_4.h5seurat"


Validating h5Seurat file



In [23]:
integration_features <- SelectIntegrationFeatures(object.list = seurat_list)

In [24]:
anchors <- FindIntegrationAnchors(object.list = seurat_list, anchor.features = integration_features, verbose = FALSE)

"Some cell names are duplicated across objects provided. Renaming to enforce unique cell names."


In [25]:
# this command creates an 'integrated' data assay
seurat_combined <- IntegrateData(anchorset = anchors, verbose = FALSE)

# Run SCTransform on the combined

In [26]:
DefaultAssay(seurat_combined) <- "integrated"

In [27]:
seurat_combined <- SCTransform(seurat_combined, vst.flavor = "v2", verbose = FALSE) %>% 
          RunPCA() %>% 
          RunUMAP(dims = 1:40, reduction.name = 'umap.rna', reduction.key = 'rnaUMAP_', verbose=FALSE)

PC_ 1 
Positive:  FTL, CRYAB, FTH1, MT1X, MT2A, MT1E, APOE, SERPINA1, G0S2, S100A1 
	   PLIN2, SOD2, SAA1, FN1, SAA2, RPL13, MGST1, TMSB10, PDZK1IP1, CTSB 
	   GAPDH, RPS18, RARRES2, IFI27, NUPR1, TPT1, RRAD, SERPINE1, RPLP1, TFPI2 
Negative:  KCNIP4, ERBB4, MECOM, FAM155A, AC019197.1, WFDC2, PDE1A, MALAT1, KITLG, PKHD1 
	   BICC1, NEAT1, SFRP1, MAL, BST2, ACSL4, LRP1B, AC079352.1, SNED1, EFNA5 
	   SKAP1, SCN2A, CSGALNACT1, PLCB4, KAZN, TFCP2L1, S100A2, SPON1, PDE4D, ITGA2 
PC_ 2 
Positive:  MALAT1, SOD2, FTL, FTH1, NEAT1, APOE, SERPINA1, NRXN3, FKBP5, AL138828.1 
	   MT1X, AL357507.1, MDM2, AC079352.1, GBE1, PLIN2, GPC6, ZIM3, FNIP2, CPD 
	   MSC-AS1, CRYAB, LINC01320, ABLIM3, ZNF385B, RRAD, FGB, PTCHD4, ZFPM2-AS1, FGF14 
Negative:  CENPF, TOP2A, MKI67, TUBA1B, ASPM, HMGB2, CCNB1, TPX2, HMMR, DLGAP5 
	   NUSAP1, UBE2C, TUBB4B, KRT19, ANLN, CKS2, S100A2, CDK1, CEP55, PCLAF 
	   STMN1, PRC1, UBE2S, PTTG1, CDKN3, TUBB, RRM2, GTSE1, H2AFZ, PBK 
PC_ 3 
Positive:  CDH6, CENPF, MKI67, TOP2A

### Perform clustering

In [28]:
seurat_combined <- FindNeighbors(seurat_combined, reduction = "pca", dims = 1:40)
seurat_combined <- FindClusters(seurat_combined, resolution = 0.5)

Computing nearest neighbor graph

Computing SNN



Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 24563
Number of edges: 847306

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8467
Number of communities: 12
Elapsed time: 6 seconds


### Save H5

In [29]:
SaveH5Seurat(seurat_combined, integrated_h5_path, verbose = FALSE, overwrite = T)

"Overwriting previous file /home/vsevim/prj/1012-ckd/S1/analysis/secondary/integrated/seurat_objects/integrated.h5seurat"
Creating h5Seurat file for version 3.1.5.9900



In [30]:
length(Cells(seurat_combined))