# 3D UMAP of the drug-subset data - part 1

Loading the R packages.

In [1]:
suppressWarnings({suppressMessages({
    library(ggplot2)
    library(Seurat)
    library(readxl)
    library(AnnotationHub)
    library(ensembldb)
    library(tidyr)
    library(dplyr)
    library(viridis)
    library(harmony)
    library(parallel)
})})

Loading the data.

In [2]:
sc_data <- readRDS("HGSOC_CellHashing_CLUSTERED.RDS")

Retrieving the drugs for iterating in them.

In [3]:
drugs <- unique(sc_data@meta.data$Treatment_group)

For each drug, we obtain the subset including treated and untreated cells (DMSO) for the three models.

In [4]:
set.seed(1)

drug_sub <- function(d, seurat_object){
    
        # Subsetting cells
        sub_drug_DMSO <- subset(x = seurat_object, subset = Treatment_group == d)

        # Re-launching on the RNA assay, as default
        sub_drug_DMSO <- SCTransform(sub_drug_DMSO,
                                    vars.to.regress = c("percent.rb", 
                                                        "percent.mt", 
                                                        "nFeature_RNA", 
                                                        "nCount_RNA",
                                                        "S.Score", 
                                                        "G2M.Score"),  
                                    method = "glmGamPoi",
                                    return.only.var.genes = FALSE, 
                                    variable.features.n = 2000,
                                    vst.flavor = "v2", verbose = FALSE)
        
        # PCA and integration. UMAP is run in the following script to get the 3D UMAPs.
        sub_drug_DMSO <- RunPCA(sub_drug_DMSO, verbose = FALSE, assay.use = "SCT")
        sub_drug_DMSO <- RunHarmony(sub_drug_DMSO, assay.use = "SCT", group.by.vars = "model") # Since the three models are present, batch correction is needed
        
        # Saving
        saveRDS(object = sub_drug_DMSO, file = paste0(d, "_subset.RDS"))
}

In [5]:
mclapply(X = drugs, function(x) drug_sub(x, sc_data), mc.cores = 5)

In [6]:
sessionInfo()

R version 4.2.2 (2022-10-31)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Rocky Linux 8.8 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /homedir01/adini22/.conda/envs/cellhashing_preprocessing/lib/libopenblasp-r0.3.21.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] parallel  stats4    stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] harmony_0.1.1           Rcpp_1.0.10             viridis_0.6.2          
 [4] viridisLite_0.4.2       dplyr_1.1.2             tidyr_1.3.0            
 [7] ensembldb_2.22.0        AnnotationFilter_1.22.0 GenomicFeatures_1.50.3 
[10] Annotatio