# Basic 3WNN Clustering w/ Gated Labels

In [None]:
quiet_library <- function(...) {
    suppressPackageStartupMessages(library(...))
}
quiet_library(Seurat)
quiet_library(ggplot2)
quiet_library(Matrix)
quiet_library(H5weaver)
quiet_library(dplyr)
quiet_library(viridis)
quiet_library(harmony)
quiet_library(Nebulosa)
quiet_library(ArchR)

In [None]:
addArchRThreads(32)
addArchRGenome("hg38")

# Load dnT Subset

In [None]:
dnT <- readRDS('nonMAIT_merge.rds')

# Add Metadata

In [None]:
dnT <- SetIdent(dnT, value = 'pbmc_sample_id')
dnT <- RenameIdents(dnT,
                           'PB00173-02' = 'Positive', #validated
                           'PB00192-02' = 'Negative', #validated
                           'PB00197-02' = 'Negative', #validated
                           'PB00593-04' = 'Positive', #validated
                           'PB00172-02' = 'Positive',
                           'PB00182-02' = 'Negative',
                           'PB00323-02' = 'Positive',
                           'PB00807-02' = 'Positive',
                           'PB00124-02' = 'Positive',
                           'PB00127-02' = 'Negative',
                           'PB00334-03' = 'Negative',
                           'PB00353-03' = 'Negative',
                           'PB00368-04' = 'Positive',
                           'PB00377-03' = 'Negative',
                           'PB00545-02' = 'Negative',
                           'PB00599-02' = 'Positive')
dnT$CMV <- Idents(dnT)

In [None]:
dnT <- SetIdent(dnT, value = 'pbmc_sample_id')
dnT <- RenameIdents(dnT,
                           'PB00173-02' = 'UP1006', #validated
                           'PB00192-02' = 'UP1007', #validated
                           'PB00197-02' = 'UP1010', #validated
                           'PB00593-04' = 'BR2002', #validated
                           'PB00172-02' = 'UP1002',
                           'PB00182-02' = 'UP1003',
                           'PB00323-02' = 'BR2004',
                           'PB00807-02' = 'UP1005',
                           'PB00124-02' = 'UP1001',
                           'PB00127-02' = 'UP1004',
                           'PB00334-03' = 'BR2009',
                           'PB00353-03' = 'BR2008',
                           'PB00368-04' = 'BR2005',
                           'PB00377-03' = 'BR2015',
                           'PB00545-02' = 'BR2042',
                           'PB00599-02' = 'BR2052')
dnT$subject_id <- Idents(dnT)

In [None]:
dnT$age_cmv <- paste0(dnT$pediatric_senior,"_CMV-",dnT$CMV)

# RNA Clustering

In [None]:
outlier_genes <- c('AC105402.3','MTRNR2L8','EDA','IFNG-AS1')

In [None]:
dnT <- suppressWarnings(SCTransform(dnT)) %>% RunPCA()

In [None]:
var_feat_sct <- dnT@assays$SCT@var.features
length(var_feat_sct)

In [None]:
outlier_genes %in% dnT@assays$SCT@var.features

In [None]:
var_feats <- dnT@assays$SCT@var.features
var_feats_trim <- var_feats[!(var_feats %in% outlier_genes)]
length(var_feats_trim)

In [None]:
dnT <- suppressWarnings(SCTransform(dnT, residual.features = var_feats_trim)) %>% RunPCA()

In [None]:
options(repr.plot.width = 8, repr.plot.height = 4)
dnT <- suppressWarnings(RunHarmony(object = dnT, reduction.save = 'rna_harmony',group.by.vars = 'batch_id', reduction = 'pca', 
                                           plot_convergence = T, assay.use = 'SCT'))

In [None]:
ElbowPlot(dnT, ndims = 50)

In [None]:
dnT <- RunUMAP(dnT, dims = 1:20, reduction = 'rna_harmony')

## RNA UMAPs

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
p1 <- DimPlot(dnT, reduction = 'umap', group.by = 'predicted.t_celltype.l2', shuffle = TRUE)
p2 <- DimPlot(dnT, reduction = 'umap', group.by = 'pediatric_senior', shuffle = TRUE)
p3 <- DimPlot(dnT, reduction = 'umap', group.by = 'age_cmv', shuffle = TRUE)
p4 <- DimPlot(dnT, reduction = 'umap', group.by = 'pbmc_sample_id', shuffle = TRUE)
p1 + p2 + p3 + p4

In [None]:
options(repr.plot.width = 20, repr.plot.height = 12)
suppressMessages(FeaturePlot(dnT, features = c('sct_IFNG-AS1','sct_EDA','sct_SOX4','sct_ZNF683'), 
                             ncol = 2,reduction = 'umap', order = TRUE) & scale_color_viridis())

# ADT Clustering

In [None]:
adts_to_remove <- c('CD134','CD172a','CD183','CD366','CX3CR1','TCRab','TCRgd','CD137','CD24','CD294','CD304','CD40','CD80','CD86','IgG1-K-Isotype-Control')
adt_mtx <- dnT@assays$ADT@counts
length(rownames(adt_mtx))

In [None]:
idx <- which(rownames(adt_mtx) %in% adts_to_remove)
clean_adt_mtx <- adt_mtx[-idx,]
length(rownames(clean_adt_mtx))

In [None]:
dnT[['clean_adt']] <- CreateAssayObject(clean_adt_mtx)
DefaultAssay(dnT) <- 'clean_adt'

In [None]:
# cluster by ADTs, make sure to save under all new reductions and keys 
VariableFeatures(dnT) <- rownames(dnT[["clean_adt"]])
dnT <- NormalizeData(dnT, normalization.method = 'CLR', margin = 2) %>% 
     ScaleData() %>% RunPCA(reduction.name = 'apca', approx = FALSE, reduction.key = 'APC_')

In [None]:
options(repr.plot.width = 8, repr.plot.height = 4)
dnT <- suppressWarnings(RunHarmony(object = dnT, reduction.save = 'adt_harmony',group.by.vars = 'batch_id', reduction = 'apca', 
                                           plot_convergence = T, assay.use = 'clean_adt'))

In [None]:
dnT <- RunUMAP(dnT, reduction = 'adt_harmony', reduction.name = 'adtumap', dims = 1:20)

## ADT UMAPs

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
p1 <- DimPlot(dnT, reduction = 'adtumap', group.by = 'predicted.t_celltype.l2')
p2 <- DimPlot(dnT, reduction = 'adtumap', group.by = 'pediatric_senior')
p3 <- DimPlot(dnT, reduction = 'adtumap', group.by = 'age_cmv')
p4 <- DimPlot(dnT, reduction = 'adtumap', group.by = 'pbmc_sample_id')
p1 + p2 + p3 + p4

# ATAC Clustering

## Load ArchR Project

In [None]:
proj <- loadArchRProject(path = 'gdT/')
proj

## Subset ArchR Project

In [None]:
bcs <- colnames(dnT[['RNA']])
head(bcs)

In [None]:
idx_new <- which(proj$cellNames_clean %in% bcs)
head(idx_new)

In [None]:
cells_subset <- proj$cellNames[idx_new]
proj_subset <- proj[cells_subset,]
proj_subset

## ATAC Clustering

In [None]:
proj_subset <- addIterativeLSI(proj_subset, name = 'IterativeLSI', force = TRUE)

In [None]:
proj_subset <- addHarmony(proj_subset, reducedDims = 'IterativeLSI', groupBy = 'batch_id', force = TRUE)

In [None]:
#proj_subset <- addClusters(proj_subset, reducedDims = 'IterativeLSI', name = 'Clusters', force = TRUE, resolution = 0.2)
proj_subset <- addClusters(proj_subset, reducedDims = 'Harmony', name = 'Clusters', force = TRUE, resolution = 0.2)

In [None]:
#proj_subset <- addUMAP(proj_subset, reducedDims = 'IterativeLSI', name = 'UMAP', force = TRUE)
proj_subset <- addUMAP(proj_subset, reducedDims = 'Harmony', name = 'UMAP', force = TRUE)

In [None]:
options(repr.plodnTidth = 8, repr.plot.height = 8)
plotEmbedding(ArchRProj = proj_subset, colorBy = "cellColData", name = "Clusters", embedding = "UMAP")
# plotEmbedding(ArchRProj = proj_subset, colorBy = "cellColData", name = "pediatric_senior", embedding = "UMAP")
# plotEmbedding(ArchRProj = proj_subset, colorBy = "cellColData", name = "predicted.t_celltype.l2", embedding = "UMAP")

## Export LSI

In [None]:
#lsi <- getReducedDims(proj_subset, reducedDims = 'IterativeLSI')
lsi <- getReducedDims(proj_subset, reducedDims = 'Harmony')

## Import LSI to Seurat Object

In [None]:
rna_metadata <- dnT@meta.data
archr_style_barcodes <- paste(rna_metadata$batch_id, "-P1_",rna_metadata$pbmc_sample_id,"#",rownames(rna_metadata), sep = "")
head(archr_style_barcodes)

In [None]:
# create data frame w/ seurat barcodes and archr style barcodes
bc_df <- data.frame(seurat_bcs = colnames(dnT@assays$RNA),
                    archr_style = archr_style_barcodes)

In [None]:
# Find Intersection & subset LSI
bc_idx <- intersect(rownames(lsi), archr_style_barcodes)

In [None]:
lsi_subset <- subset(lsi, subset = rownames(lsi) %in% bc_idx)
bc_df_subset <- subset(bc_df, subset = bc_df$archr_style %in% bc_idx)
length(rownames(lsi_subset))
length(rownames(bc_df_subset))

In [None]:
table(bc_df_subset$archr_style == rownames(lsi_subset))

In [None]:
lsi_subset <- lsi_subset[match(bc_df_subset$archr_style, rownames(lsi_subset)),]

In [None]:
table(bc_df_subset$archr_style == rownames(lsi_subset))

In [None]:
# replace LSI archr barcodes for seurat barcodes
rownames(lsi_subset) <- bc_df_subset$seurat_bcs

In [None]:
dnT <- subset(dnT, cells = bc_df_subset$seurat_bcs)

In [None]:
table(colnames(dnT[['RNA']]) == rownames(lsi_subset))

In [None]:
dnT[["lsit"]] <- CreateDimReducObject(embeddings = lsi_subset, key = "lsit_", assay = "Tiles")

## ATAC clustering in Seurat Object

In [None]:
DefaultAssay(dnT) <- 'Tiles'
dnT <- RunUMAP(dnT, reduction = 'lsit', reduction.name = 'atac_umap', dims = 1:15, verbose = T, reduction.key = 'atacumap_')

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
p1 <- DimPlot(dnT, reduction = 'atac_umap', group.by = 'predicted.t_celltype.l2')
p2 <- DimPlot(dnT, reduction = 'atac_umap', group.by = 'pediatric_senior')
p3 <- DimPlot(dnT, reduction = 'atac_umap', group.by = 'age_cmv')
p4 <- DimPlot(dnT, reduction = 'atac_umap', group.by = 'pbmc_sample_id')
p1 + p2 + p3 + p4

In [None]:
#options(repr.plot.width = 20, repr.plot.height = 20)
#DimPlot(dnT, reduction = 'atac_umap', group.by = 'gating_celltype', split.by = 'gating_celltype', ncol = 4)

# 3WNN Clustering

In [None]:
dnT <- FindMultiModalNeighbors(dnT, reduction.list = list("rna_harmony","adt_harmony",'lsit'), dims.list = list(1:20,1:20,1:29), weighted.nn.name = "X3way.weighted.nn",
                                      k.nn = 30, knn.range = 500, prune.SNN = 1/20)

In [None]:
dnT <- RunUMAP(dnT, nn.name = "X3way.weighted.nn", reduction.name = "wnn.3.umap", reduction.key = "wnn.3.umap_")

In [None]:
dnT <- FindClusters(dnT, graph.name = "wsnn", algorithm = 3, resolution = 1, verbose = TRUE)

## 3WNN UMAPs

In [None]:
options(repr.plot.width = 18, repr.plot.height = 8)
p1 <- DimPlot(dnT, reduction = 'wnn.3.umap', group.by = 'wsnn_res.1', label = TRUE)
p2 <- DimPlot(dnT, reduction = 'wnn.3.umap', group.by = 'predicted.celltype.l2', shuffle = TRUE)
p1 + p2

# Constant Region Gene Expression

In [None]:
options(repr.plot.width = 15, repr.plot.height = 18)
DefaultAssay(dnT) <- 'SCT'
FeaturePlot(dnT, features = c('TRGC1','TRGC2','TRDC',
                              'TRAC','TRBC1','TRBC2',
                              'adt_TCRgd','cleanadt_CD16','cleanadt_CD56',
                              'cleanadt_TCR-Valpha7.2','cleanadt_CD161'), ncol=3, order = TRUE, reduction = 'wnn.3.umap') & scale_color_viridis()

In [None]:
DefaultAssay(dnT) <- 'SCT'
p1 <- plot_density(dnT, c("TRDC", "TRGC1"), joint = TRUE, reduction = 'wnn.3.umap')
p2 <- plot_density(dnT, c("TRDC", "TRGC2"), joint = TRUE, reduction = 'wnn.3.umap')
p3 <- plot_density(dnT, c("TRAC", "TRBC1"), joint = TRUE, reduction = 'wnn.3.umap')
p4 <- plot_density(dnT, c("TRAC", "TRBC2"), joint = TRUE, reduction = 'wnn.3.umap')
DefaultAssay(dnT) <- 'clean_adt'
p5 <- plot_density(dnT, c("CD56", "CD16"), joint = TRUE, reduction = 'wnn.3.umap')
p6 <- plot_density(dnT, c("CD161", "TCR-Valpha7.2"), joint = TRUE, reduction = 'wnn.3.umap')

In [None]:
library(cowplot)

In [None]:
options(repr.plot.width = 12, repr.plot.height = 18)
plot_grid(p1[[3]], p2[[3]], p3[[3]], p4[[3]], p5[[3]],p6[[3]], ncol = 2)

# Subset gdT

In [None]:
dnT <- SetIdent(dnT, value = 'wsnn_res.1')
gdT <- subset(dnT, idents = c(0,1,2,3,4,6,7,8,16,18))
gdT$mnp_analysis <- 'gdT'

In [None]:
saveRDS(gdT, file = 'gdT_subset.rds')

In [None]:
sessionInfo()