# Differential expression analysis by subsetting, re-normalizing and variance stabilizing the dataset, then performing Wilcoxon test

In [1]:
library(Seurat)
library(parallel)

Attaching SeuratObject

Attaching SeuratObject



In [2]:
hgsoc <- readRDS("HGSOC_CellHashing_CLUSTERED.RDS")

In [3]:
PDC2 <- subset(hgsoc, subset = model == "PDC2")
PDC3 <- subset(hgsoc, subset = model == "PDC3")
JHOS2 <- subset(hgsoc, subset = model == "JHOS2")

In [4]:
drug_sub_dge <- function(d, seurat_object){
        
        # Subsetting cells
        sub_drug_DMSO <- subset(x = seurat_object, subset = Treatment_group == d | Treatment_group == "DMSO")

        # Re-launching on the RNA assay, as default
        sub_drug_DMSO <- SCTransform(sub_drug_DMSO,
                                    vars.to.regress = c("percent.rb", 
                                                        "percent.mt", 
                                                        "nFeature_RNA", 
                                                        "nCount_RNA",
                                                        "S.Score", 
                                                        "G2M.Score"),  
                                    method = "glmGamPoi",
                                    return.only.var.genes = FALSE, 
                                    variable.features.n = 2000,
                                    vst.flavor = "v2", verbose = FALSE)
        
        # They will come from the same batch so no integration needed
        DefaultAssay(sub_drug_DMSO) <- "SCT"
        Idents(sub_drug_DMSO) <- "Treatment_group"
        dge_df <- FindMarkers(object = sub_drug_DMSO, 
                              verbose = TRUE,
                              ident.1 = d, ident.2 = "DMSO",
                              assay = "SCT", 
                              slot = "data", 
                              method = "wilcox") # Choosing wilcoxon test
        dge_df$drug <- d
        dge_df$gene <- rownames(dge_df)
        return(dge_df)
}

In [5]:
drug_list <- unique(hgsoc@meta.data$Treatment_group)[-which(unique(hgsoc@meta.data$Treatment_group) == "DMSO")]

In [6]:
PDC2_dge <- mclapply(drug_list, function(x) drug_sub_dge(x, PDC2), mc.cores = 10)
names(PDC2_dge) <- drug_list

In [7]:
JHOS2_dge <- mclapply(drug_list, function(x) drug_sub_dge(x, JHOS2), mc.cores = 10)
names(JHOS2_dge) <- drug_list

In [8]:
PDC3_dge <- mclapply(drug_list, function(x) drug_sub_dge(x, PDC3), mc.cores = 10)
names(PDC3_dge) <- drug_list

In [9]:
saveRDS(object = PDC2_dge, file = "PDC2_drugs_treated_vs_untreated_REVISION_SUBSET_WILCOX.RDS")
saveRDS(object = PDC3_dge, file = "PDC3_drugs_treated_vs_untreated_REVISION_SUBSET_WILCOX.RDS")
saveRDS(object = JHOS2_dge, file = "JHOS2_drugs_treated_vs_untreated_REVISION_SUBSET_WILCOX.RDS")

In [10]:
PDC2_dge <- do.call(what = rbind, args = PDC2_dge)
PDC3_dge <- do.call(what = rbind, args = PDC3_dge)
JHOS2_dge <- do.call(what = rbind, args = JHOS2_dge)

In [11]:
PDC2_dge$model <- "PDC2"
PDC3_dge$model <- "PDC3"
JHOS2_dge$model <- "JHOS2"

In [12]:
write.table(x = PDC2_dge, file = "PDC2_drugs_treated_vs_untreated_REVISION_SUBSET_WILCOX.txt", sep = "\t", quote = F)
write.table(x = PDC3_dge, file = "PDC3_drugs_treated_vs_untreated_REVISION_SUBSET_WILCOX.txt", sep = "\t", quote = F)
write.table(x = JHOS2_dge, file = "JHOS2_drugs_treated_vs_untreated_REVISION_SUBSET_WILCOX.txt", sep = "\t", quote = F)

In [13]:
sessionInfo()

R version 4.2.2 (2022-10-31)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Rocky Linux 8.8 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /homedir01/adini22/.conda/envs/cellhashing_preprocessing/lib/libopenblasp-r0.3.21.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
[1] SeuratObject_4.1.3 Seurat_4.3.0.9001 

loaded via a namespace (and not attached):
  [1] nlme_3.1-162           spatstat.sparse_3.0-0  matrixStats_0.62.0    
  [4] RcppAnnoy_0.0.20       RColorBrewer_1.1-3     httr_1.4.4            
  [7] repr_1.1.4   

R version 4.2.2 (2022-10-31)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Rocky Linux 8.8 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /homedir01/adini22/.conda/envs/cellhashing_preprocessing/lib/libopenblasp-r0.3.21.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
[1] SeuratObject_4.1.3 Seurat_4.3.0.9001 

loaded via a namespace (and not attached):
  [1] nlme_3.1-162           spatstat.sparse_3.0-0  matrixStats_0.62.0    
  [4] RcppAnnoy_0.0.20       RColorBrewer_1.1-3     httr_1.4.4            
  [7] repr_1.1.4   