# Basic 3WNN Clustering w/ Gated Labels

In [None]:
quiet_library <- function(...) {
    suppressPackageStartupMessages(library(...))
}
quiet_library(Seurat)
quiet_library(ggplot2)
quiet_library(Matrix)
quiet_library(H5weaver)
quiet_library(dplyr)
quiet_library(viridis)
quiet_library(harmony)
quiet_library(Nebulosa)
quiet_library(ArchR)
quiet_library(cowplot)

In [None]:
addArchRThreads(32)
addArchRGenome("hg38")

# Load mnp Subset

In [None]:
mnp <- readRDS('mnp_merge_updated.rds')

# Add Metadata

In [None]:
mnp <- SetIdent(mnp, value = 'pbmc_sample_id')
mnp <- RenameIdents(mnp,
                           'PB00173-02' = 'Positive', #validated
                           'PB00192-02' = 'Negative', #validated
                           'PB00197-02' = 'Negative', #validated
                           'PB00593-04' = 'Positive', #validated
                           'PB00172-02' = 'Positive',
                           'PB00182-02' = 'Negative',
                           'PB00323-02' = 'Positive',
                           'PB00807-02' = 'Positive',
                           'PB00124-02' = 'Positive',
                           'PB00127-02' = 'Negative',
                           'PB00334-03' = 'Negative',
                           'PB00353-03' = 'Negative',
                           'PB00368-04' = 'Positive',
                           'PB00377-03' = 'Negative',
                           'PB00545-02' = 'Negative',
                           'PB00599-02' = 'Positive')
mnp$CMV <- Idents(mnp)

In [None]:
mnp <- SetIdent(mnp, value = 'pbmc_sample_id')
mnp <- RenameIdents(mnp,
                           'PB00173-02' = 'UP1006', #validated
                           'PB00192-02' = 'UP1007', #validated
                           'PB00197-02' = 'UP1010', #validated
                           'PB00593-04' = 'BR2002', #validated
                           'PB00172-02' = 'UP1002',
                           'PB00182-02' = 'UP1003',
                           'PB00323-02' = 'BR2004',
                           'PB00807-02' = 'UP1005',
                           'PB00124-02' = 'UP1001',
                           'PB00127-02' = 'UP1004',
                           'PB00334-03' = 'BR2009',
                           'PB00353-03' = 'BR2008',
                           'PB00368-04' = 'BR2005',
                           'PB00377-03' = 'BR2015',
                           'PB00545-02' = 'BR2042',
                           'PB00599-02' = 'BR2052')
mnp$subject_id <- Idents(mnp)

In [None]:
mnp$age_cmv <- paste0(mnp$pediatric_senior,"_CMV-",mnp$CMV)

# RNA Clustering

In [None]:
outlier_genes <- c('AC105402.3','MTRNR2L8','EDA','IFNG-AS1')

In [None]:
mnp <- suppressWarnings(SCTransform(mnp)) %>% RunPCA()

In [None]:
var_feat_sct <- mnp@assays$SCT@var.features
length(var_feat_sct)

In [None]:
outlier_genes %in% mnp@assays$SCT@var.features

In [None]:
var_feats <- mnp@assays$SCT@var.features
var_feats_trim <- var_feats[!(var_feats %in% outlier_genes)]
length(var_feats_trim)

In [None]:
mnp <- suppressWarnings(SCTransform(mnp, residual.features = var_feats_trim)) %>% RunPCA()

In [None]:
options(repr.plot.width = 8, repr.plot.height = 4)
mnp <- suppressWarnings(RunHarmony(object = mnp, reduction.save = 'rna_harmony',group.by.vars = 'batch_id', reduction = 'pca', 
                                           plot_convergence = T, assay.use = 'SCT'))

In [None]:
ElbowPlot(mnp, ndims = 50)

In [None]:
mnp <- RunUMAP(mnp, dims = 1:20, reduction = 'rna_harmony')

## RNA UMAPs

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
p1 <- DimPlot(mnp, reduction = 'umap', group.by = 'predicted.t_celltype.l2', shuffle = TRUE)
p2 <- DimPlot(mnp, reduction = 'umap', group.by = 'pediatric_senior', shuffle = TRUE)
p3 <- DimPlot(mnp, reduction = 'umap', group.by = 'age_cmv', shuffle = TRUE)
p4 <- DimPlot(mnp, reduction = 'umap', group.by = 'pbmc_sample_id', shuffle = TRUE)
p1 + p2 + p3 + p4

In [None]:
options(repr.plot.width = 20, repr.plot.height = 12)
suppressMessages(FeaturePlot(mnp, features = c('sct_IFNG-AS1','sct_EDA','sct_SOX4','sct_ZNF683'), 
                             ncol = 2,reduction = 'umap', order = TRUE) & scale_color_viridis())

# ADT Clustering

In [None]:
adts_to_remove <- c('CD134','CD172a','CD183','CD366','CX3CR1','TCRab','TCRgd','CD137','CD24','CD294','CD304','CD40','CD80','CD86','IgG1-K-Isotype-Control')
adt_mtx <- mnp@assays$ADT@counts
length(rownames(adt_mtx))

In [None]:
idx <- which(rownames(adt_mtx) %in% adts_to_remove)
clean_adt_mtx <- adt_mtx[-idx,]
length(rownames(clean_adt_mtx))

In [None]:
mnp[['clean_adt']] <- CreateAssayObject(clean_adt_mtx)
DefaultAssay(mnp) <- 'clean_adt'

In [None]:
# cluster by ADTs, make sure to save under all new reductions and keys 
VariableFeatures(mnp) <- rownames(mnp[["clean_adt"]])
mnp <- NormalizeData(mnp, normalization.method = 'CLR', margin = 2) %>% 
     ScaleData() %>% RunPCA(reduction.name = 'apca', approx = FALSE, reduction.key = 'APC_')

In [None]:
options(repr.plot.width = 8, repr.plot.height = 4)
mnp <- suppressWarnings(RunHarmony(object = mnp, reduction.save = 'adt_harmony',group.by.vars = 'batch_id', reduction = 'apca', 
                                           plot_convergence = T, assay.use = 'clean_adt'))

In [None]:
mnp <- RunUMAP(mnp, reduction = 'adt_harmony', reduction.name = 'adtumap', dims = 1:20)

## ADT UMAPs

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
p1 <- DimPlot(mnp, reduction = 'adtumap', group.by = 'predicted.t_celltype.l2')
p2 <- DimPlot(mnp, reduction = 'adtumap', group.by = 'pediatric_senior')
p3 <- DimPlot(mnp, reduction = 'adtumap', group.by = 'age_cmv')
p4 <- DimPlot(mnp, reduction = 'adtumap', group.by = 'pbmc_sample_id')
p1 + p2 + p3 + p4

# ATAC Clustering

## Load ArchR Project

In [None]:
proj <- loadArchRProject(path = '../../04_Figures/Figure5/')
proj

## Subset ArchR Project

In [None]:
bcs <- colnames(mnp[['RNA']])
head(bcs)

In [None]:
idx_new <- which(proj$cellNames_clean %in% bcs)
head(idx_new)

In [None]:
cells_subset <- proj$cellNames[idx_new]
proj_subset <- proj[cells_subset,]
proj_subset

## ATAC Clustering

In [None]:
proj_subset <- addIterativeLSI(proj_subset, name = 'IterativeLSI', force = TRUE, varFeatures = 75000)

In [None]:
proj_subset <- addHarmony(proj_subset, reducedDims = 'IterativeLSI', groupBy = 'batch_id', force = TRUE)

In [None]:
#proj_subset <- addClusters(proj_subset, reducedDims = 'IterativeLSI', name = 'Clusters', force = TRUE, resolution = 0.2)
proj_subset <- addClusters(proj_subset, reducedDims = 'Harmony', name = 'Clusters', force = TRUE, resolution = 0.2)

In [None]:
#proj_subset <- addUMAP(proj_subset, reducedDims = 'IterativeLSI', name = 'UMAP', force = TRUE)
proj_subset <- addUMAP(proj_subset, reducedDims = 'Harmony', name = 'UMAP', force = TRUE)

In [None]:
options(repr.plomnpidth = 8, repr.plot.height = 8)
plotEmbedding(ArchRProj = proj_subset, colorBy = "cellColData", name = "Clusters", embedding = "UMAP")
# plotEmbedding(ArchRProj = proj_subset, colorBy = "cellColData", name = "pediatric_senior", embedding = "UMAP")
# plotEmbedding(ArchRProj = proj_subset, colorBy = "cellColData", name = "predicted.t_celltype.l2", embedding = "UMAP")

## Export LSI

In [None]:
#lsi <- getReducedDims(proj_subset, reducedDims = 'IterativeLSI')
lsi <- getReducedDims(proj_subset, reducedDims = 'Harmony')

## Import LSI to Seurat Object

In [None]:
rna_metadata <- mnp@meta.data
archr_style_barcodes <- paste(rna_metadata$batch_id, "-P1_",rna_metadata$pbmc_sample_id,"#",rownames(rna_metadata), sep = "")
head(archr_style_barcodes)

In [None]:
# create data frame w/ seurat barcodes and archr style barcodes
bc_df <- data.frame(seurat_bcs = colnames(mnp@assays$RNA),
                    archr_style = archr_style_barcodes)

In [None]:
# Find Intersection & subset LSI
bc_idx <- intersect(rownames(lsi), archr_style_barcodes)

In [None]:
lsi_subset <- subset(lsi, subset = rownames(lsi) %in% bc_idx)
bc_df_subset <- subset(bc_df, subset = bc_df$archr_style %in% bc_idx)
length(rownames(lsi_subset))
length(rownames(bc_df_subset))

In [None]:
table(bc_df_subset$archr_style == rownames(lsi_subset))

In [None]:
lsi_subset <- lsi_subset[match(bc_df_subset$archr_style, rownames(lsi_subset)),]

In [None]:
table(bc_df_subset$archr_style == rownames(lsi_subset))

In [None]:
# replace LSI archr barcodes for seurat barcodes
rownames(lsi_subset) <- bc_df_subset$seurat_bcs

In [None]:
mnp <- subset(mnp, cells = bc_df_subset$seurat_bcs)

In [None]:
table(colnames(mnp[['RNA']]) == rownames(lsi_subset))

In [None]:
mnp[["lsit"]] <- CreateDimReducObject(embeddings = lsi_subset, key = "lsit_", assay = "Tiles")

## ATAC clustering in Seurat Object

In [None]:
DefaultAssay(mnp) <- 'Tiles'
mnp <- RunUMAP(mnp, reduction = 'lsit', reduction.name = 'atac_umap', dims = 1:15, verbose = T, reduction.key = 'atacumap_')

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
p1 <- DimPlot(mnp, reduction = 'atac_umap', group.by = 'predicted.t_celltype.l2')
p2 <- DimPlot(mnp, reduction = 'atac_umap', group.by = 'pediatric_senior')
p3 <- DimPlot(mnp, reduction = 'atac_umap', group.by = 'age_cmv')
p4 <- DimPlot(mnp, reduction = 'atac_umap', group.by = 'pbmc_sample_id')
p1 + p2 + p3 + p4

In [None]:
#options(repr.plot.width = 20, repr.plot.height = 20)
#DimPlot(mnp, reduction = 'atac_umap', group.by = 'gating_celltype', split.by = 'gating_celltype', ncol = 4)

# 3WNN Clustering

In [None]:
mnp <- FindMultiModalNeighbors(mnp, reduction.list = list("rna_harmony","adt_harmony",'lsit'), dims.list = list(1:20,1:20,1:29), weighted.nn.name = "X3way.weighted.nn",
                                      k.nn = 30, knn.range = 500, prune.SNN = 1/20)

In [None]:
mnp <- RunUMAP(mnp, nn.name = "X3way.weighted.nn", reduction.name = "wnn.3.umap", reduction.key = "wnn.3.umap_")

In [None]:
mnp <- FindClusters(mnp, graph.name = "wsnn", algorithm = 3, resolution = 0.5, verbose = TRUE)

## 3WNN UMAPs

In [None]:
options(repr.plot.width = 18, repr.plot.height = 8)
p1 <- DimPlot(mnp, reduction = 'wnn.3.umap', group.by = 'wsnn_res.0.5', label = TRUE)
p2 <- DimPlot(mnp, reduction = 'wnn.3.umap', group.by = 'mnp_analysis', shuffle = TRUE)
p1 + p2

## Consolidate cell type labels

In [None]:
mnp <- SetIdent(mnp, value = 'mnp_analysis')
mnp <- RenameIdents(mnp,
                    'CD8 MAIT' = 'MAIT',
                    'dnT MAIT' = 'MAIT',
                    'gdT' = 'gdT',
                    'MNP-2' = 'MNP-2')
mnp$cell.types <- Idents(mnp)

In [None]:
options(repr.plot.width = 18, repr.plot.height = 8)
p1 <- DimPlot(mnp, reduction = 'wnn.3.umap', group.by = 'wsnn_res.0.5', label = TRUE)
p2 <- DimPlot(mnp, reduction = 'wnn.3.umap', group.by = 'cell.types', shuffle = TRUE, cols = c('#fdbf6f','#33a02c','#1f78b4'))
p1 + p2

In [None]:
options(repr.plot.width = 9.5, repr.plot.height = 8)
DimPlot(mnp, reduction = 'wnn.3.umap', group.by = 'cell.types', shuffle = TRUE, cols = c('#fdbf6f','#33a02c','#1f78b4'))

In [None]:
# Open a pdf file
pdf("plots/mnp2_clustering.pdf", width = 9.5, height = 8) 
# 2. Create a plot
DimPlot(mnp, reduction = 'wnn.3.umap', group.by = 'cell.types', shuffle = TRUE, cols = c('#fdbf6f','#33a02c','#1f78b4'))
# Close the pdf file
dev.off() 

## Expression UMAPs

In [None]:
mnp <- NormalizeData(mnp, assay = 'RNA')

In [None]:
options(repr.plot.width = 7, repr.plot.height = 7)
DefaultAssay(mnp) <- 'RNA'
p1 <- plot_density(mnp, c("TRDC", "TRGC2"), joint = TRUE)
p2 <- plot_density(mnp, c("TRAC", "TRBC2"), joint = TRUE)
p3 <- plot_density(mnp, 'NCAM1')
p4 <- plot_density(mnp, 'NKG7')
p6 <- plot_density(mnp, 'KLRC2')
DefaultAssay(mnp) <- 'clean_adt'
p5 <- plot_density(mnp, c("TCR-Valpha7.2", "CD161"), joint = TRUE, pal = 'inferno')
# p4[[3]]
# p4

In [None]:
options(repr.plot.width = 7, repr.plot.height = 7)
DefaultAssay(mnp) <- 'RNA'
p1 <- plot_density(mnp, c("TRDC", "TRGC2"), joint = TRUE)
p2 <- plot_density(mnp, c("TRAC", "TRBC2"), joint = TRUE)
p3 <- plot_density(mnp, 'NCAM1')
p4 <- plot_density(mnp, 'KLRC2')
DefaultAssay(mnp) <- 'clean_adt'
p6 <- plot_density(mnp, 'CD56', pal = 'inferno')
p5 <- plot_density(mnp, c("TCR-Valpha7.2", "CD161"), joint = TRUE, pal = 'inferno')
# p4[[3]]
# p4

In [None]:
options(repr.plot.width = 16, repr.plot.height = 8)
plot_grid(p1[[3]], p2[[3]], p3, p4, p5[[1]], p6, ncol = 3)

In [None]:
# Open a pdf file
pdf("plots/mnp2_nebulosa_markers_updated.pdf", width = 16, height = 8) 
# 2. Create a plot
plot_grid(p1[[3]], p2[[3]], p3, p4, p5[[1]], p6, ncol = 3)
# Close the pdf file
dev.off() 

# CD8 Plots

In [None]:
trim_cd8_na <- readRDS(file = '../Figure4/trim_cd8_na.rds')

In [None]:
trim_cd8_na <- SetIdent(trim_cd8_na, value = 'pure_celltype')
cd8na <- subset(trim_cd8_na, idents = 'CD8 Naive')

In [None]:
cd8na$cell.types <- cd8na$pure_celltype

In [None]:
# remove any overlapping cells
cd8na_clean <- subset(cd8na, cells = colnames(mnp[['RNA']]), invert = TRUE)
cd8na_clean
cd8na

In [None]:
mnp_na_merge <- merge(mnp, cd8na_clean)

In [None]:
mnp_na_merge <- NormalizeData(mnp_na_merge, assay = 'RNA')
# mnp_na_merge <- NormalizeData(mnp_na_merge, assay = 'clean_adt')

## VlnPlots

In [None]:
options(repr.plot.width = 32, repr.plot.height = 4)
DefaultAssay(mnp_na_merge) <- 'RNA'
p1 <- VlnPlot(mnp_na_merge, features = 'CD8A', pt.size = 0, cols = c('#807dba','#33a02c','#fdbf6f','#1f78b4')) & NoLegend() & theme(axis.text.x = element_text(angle=0, hjust=0.5),
                                                                                                                           axis.title.x = element_blank(),
                                                                                                                           axis.title.y = element_blank())
p2 <- VlnPlot(mnp_na_merge, features = 'CD8B', pt.size = 0, cols = c('#807dba','#33a02c','#fdbf6f','#1f78b4')) & NoLegend() & theme(axis.text.x = element_text(angle=0, hjust=0.5),
                                                                                                                           axis.title.x = element_blank(),
                                                                                                                           axis.title.y = element_blank())
p3 <- VlnPlot(mnp_na_merge, features = 'CD3D', pt.size = 0, cols = c('#807dba','#33a02c','#fdbf6f','#1f78b4')) & NoLegend() & theme(axis.text.x = element_text(angle=0, hjust=0.5),
                                                                                                                           axis.title.x = element_blank(),
                                                                                                                           axis.title.y = element_blank())
p4 <- VlnPlot(mnp_na_merge, features = 'NKG7', pt.size = 0, cols = c('#807dba','#33a02c','#fdbf6f','#1f78b4')) & NoLegend() & theme(axis.text.x = element_text(angle=0, hjust=0.5),
                                                                                                                           axis.title.x = element_blank(),
                                                                                                                           axis.title.y = element_blank())
plot_grid(p1, p2, p3, p4, ncol = 4, scale = 0.90)

In [None]:
# Open a pdf file
pdf("plots/mnp2_vln_plots.pdf", width = 32, height = 4) 
# 2. Create a plot
plot_grid(p1, p2, p3, p4, ncol = 4, scale = 0.90)
# Close the pdf file
dev.off() 

# Save

In [None]:
saveRDS(mnp, 'mnp_analysis_3wnn_updated.rds')

In [None]:
mnp <- readRDS('mnp_analysis_3wnn_updated.rds')

In [None]:
table(mnp$cell.types)

In [None]:
sessionInfo()