In [None]:
quiet_library <- function(...) { suppressPackageStartupMessages(library(...)) }
quiet_library(ArchR)
quiet_library(hise)
quiet_library(Seurat)
quiet_library(ggplot2)
quiet_library(Matrix)
quiet_library(H5weaver)
quiet_library(dplyr)
#quiet_library(viridis)
#quiet_library(harmony)
#quiet_library(Nebulosa)
quiet_library(stringr)

In [None]:
addArchRThreads(32)
addArchRGenome("hg38")

In [None]:
proj_subset <- loadArchRProject(path = 'ATAC/')
proj_subset

# TF Volcano Plot

In [None]:
markerPeaks_na <- getMarkerFeatures(proj_subset, groupBy = "age_celltype", useGroups = "Pediatric_CD4 Naive", bgdGroups = "Senior_CD4 Naive", maxCells = 10000,
useMatrix = "PeakMatrix")

In [None]:
cisbpEnrich1 <- peakAnnoEnrichment(
seMarker = markerPeaks_na,
ArchRProj = proj_subset,
peakAnnotation = "Motif",
cutOff = "FDR <= 0.1 & Log2FC < 0"
)

In [None]:
cisbprEnrich2 <- peakAnnoEnrichment(
seMarker = markerPeaks_na,
ArchRProj = proj_subset,
peakAnnotation = "Motif",
cutOff = "FDR <= 0.1 & Log2FC >= 0"
)

In [None]:
JDF1 <- as.data.frame(assays(cisbpEnrich1)) %>% mutate(TF = gsub("_.*", "", rownames(.))) %>%
tidyr::pivot_wider(id_cols = TF, names_from = group_name, values_from=value) %>%
mutate(Enrichment = -as.numeric(Enrichment), mlog10Padj = as.numeric(mlog10Padj), Group = "Adult CD4Na")

In [None]:
JDF2 <- as.data.frame(assays(cisbprEnrich2)) %>% mutate(TF = gsub("_.*", "", rownames(.))) %>%
tidyr::pivot_wider(id_cols = TF, names_from = group_name, values_from=value) %>%
mutate(Enrichment = as.numeric(Enrichment), mlog10Padj = as.numeric(mlog10Padj), Group = "Pediatric CD4Na")

In [None]:
volcano_motif_list <- c('BATF3','BATF','NFATC2','NFATC3','NFATC4','HOXB4','HOXC4','HOXD4','IRF8','IRF4','IRF7','SP1','SP4','SP9','KLF14','KLF3','KLF2',
                       'ETV1','ETV2','RELA','CREB1','ATF1','IKZF1','SPIB','SPI1','NFKB1','NFKB2','SOX10','SOX4','ATF3','ATF7','CREM','TCFL5')

In [None]:
# JDF <- rbind(JDF1, JDF2) %>% mutate(delabel = ifelse(abs(Enrichment) > 1 & mlog10Padj > 15, TF, NA))
JDF <- rbind(JDF1, JDF2) %>% mutate(delabel = ifelse(TF %in% volcano_motif_list & abs(Enrichment) > 1 & mlog10Padj > 3, TF,NA))

In [None]:
JDF <- mutate(JDF, test = ifelse(abs(Enrichment) > 1 & mlog10Padj > 5,Group,paste0(Group, '_NA')))

In [None]:
JDF <- mutate(JDF, test2 = ifelse(TF %in% volcano_motif_list & abs(Enrichment) > 1 & mlog10Padj > 5, 'Special','Not Special'))

In [None]:
head(JDF)

In [None]:
library(ggrepel)
library(scales)
squish_trans <- function(from, to, factor) {
  
  trans <- function(x) {
    
    if (any(is.na(x))) return(x)

    # get indices for the relevant regions
    isq <- x > from & x < to
    ito <- x >= to
    
    # apply transformation
    x[isq] <- from + (x[isq] - from)/factor
    x[ito] <- from + (to - from)/factor + (x[ito] - to)
    
    return(x)
  }

  inv <- function(x) {
    
    if (any(is.na(x))) return(x)

    # get indices for the relevant regions
    isq <- x > from & x < from + (to - from)/factor
    ito <- x >= from + (to - from)/factor
    
    # apply transformation
    x[isq] <- from + (x[isq] - from) * factor
    x[ito] <- to + (x[ito] - (from + (to - from)/factor))
    
    return(x)
  }
  
  # return the transformation
  return(trans_new("squished", trans, inv))
}

In [None]:
options(repr.plot.width = 14, repr.plot.height = 8)
ggplot(JDF, aes(x = Enrichment, y= mlog10Padj, label=delabel)) + geom_point(aes(color = test, size = test2)) + theme_minimal() +
geom_vline(xintercept=c(-1, 1), col="red") +
scale_color_manual(values = c('#d95f02','gray','#1b9e77','gray')) +
geom_hline(yintercept=5, col="red") + ggtitle("CISBP Motif Enrichment of CD4Na Age") +
coord_cartesian(clip = "off") + scale_x_continuous(trans = squish_trans(-1,1,20),
                                                    breaks = c(-1.5,-1.25,-1,0,1,1.25,1.5),
                                                    labels = c(-1.5,-1.25,-1,0,1,1.25,1.5)) +
geom_text_repel(box.padding = 0.5, max.overlaps = 20,xlim = c(-Inf, Inf), ylim = c(-Inf, Inf), size = 7) +
ylab("- Log of Adjusted P Value") + xlab("Enrichment in either Positive or Negative Different Peakset") +
theme(plot.margin = unit(c(2,5,2,15),"mm"),
      panel.border = element_rect(colour = 'black', fill = NA, size = 0.5))

In [None]:
options(repr.plot.width = 11.5, repr.plot.height = 10)
ggplot(JDF, aes(x = Enrichment, y= mlog10Padj, label=delabel)) + geom_point(aes(color = test, size = test2)) + theme_minimal() +
# geom_vline(xintercept=c(-1, 1), col="red") + 
scale_color_manual(values = c('#d95f02','gray','#1b9e77','gray')) +
# geom_hline(yintercept=5, col="red") + 
ggtitle("JASPAR Motif Enrichment of CD4 Naive Cell States") +
coord_cartesian(clip = "off") +  scale_x_continuous(trans = squish_trans(-1,1,20),
                                                    breaks = c(-1.5,-1.25,-1,0,1,1.25,1.5),
                                                    labels = c(-1.5,-1.25,-1,0,1,1.25,1.5)) + 
geom_text_repel(max.overlaps = Inf,xlim = c(-Inf, Inf), ylim = c(-Inf, Inf), size = 6) +
ylab("- Log of Adjusted P Value") + xlab("Enrichment in either Positive or Negative Different Peakset") +
theme(plot.margin = unit(c(2,5,2,15),"mm"),
      panel.border = element_rect(colour = 'black', fill = NA, size = 0.5))

In [None]:
# Open a pdf file
pdf("plots/cd4na_age_volcano.pdf", width = 11.5, height = 10) 
# 2. Create a plot
ggplot(JDF, aes(x = Enrichment, y= mlog10Padj, label=delabel)) + geom_point(aes(color = test, size = test2)) + theme_minimal() +
# geom_vline(xintercept=c(-1, 1), col="red") + 
scale_color_manual(values = c('#d95f02','gray','#1b9e77','gray')) +
# geom_hline(yintercept=5, col="red") + 
ggtitle("JASPAR Motif Enrichment of CD4 Naive Cell States") +
coord_cartesian(clip = "off") +  scale_x_continuous(trans = squish_trans(-1,1,20),
                                                    breaks = c(-1.5,-1.25,-1,0,1,1.25,1.5),
                                                    labels = c(-1.5,-1.25,-1,0,1,1.25,1.5)) + 
geom_text_repel(max.overlaps = Inf,xlim = c(-Inf, Inf), ylim = c(-Inf, Inf), size = 6) +
ylab("- Log of Adjusted P Value") + xlab("Enrichment in either Positive or Negative Different Peakset") +
theme(plot.margin = unit(c(2,5,2,15),"mm"),
      panel.border = element_rect(colour = 'black', fill = NA, size = 0.5))
# Close the pdf file
dev.off() 

## Marker Peaks by Age

In [None]:
table(proj_subset$age_celltype)

In [None]:
markerPeaks_na <- getMarkerFeatures(proj_subset, groupBy = "age_celltype", useGroups = "Pediatric_CD4 Naive", bgdGroups = "Senior_CD4 Naive", maxCells = 1000,
useMatrix = "PeakMatrix")

In [None]:
markerPeaks_scm <- getMarkerFeatures(proj_subset, groupBy = "age_celltype", useGroups = "Pediatric_CD4 SCM", bgdGroups = "Senior_CD4 SCM", maxCells = 1000,
useMatrix = "PeakMatrix")

In [None]:
markerPeaks_treg <- getMarkerFeatures(proj_subset, groupBy = "age_celltype", useGroups = "Pediatric_CD25neg Treg", bgdGroups = "Senior_CD25neg Treg", maxCells = 1000,
useMatrix = "PeakMatrix")

In [None]:
na_markerList <- getMarkers(markerPeaks_na, cutOff = "FDR <= 0.1 & Log2FC >= 0.5")
scm_markerList <- getMarkers(markerPeaks_scm, cutOff = "FDR <= 0.1 & Log2FC >= 0.5")
treg_markerList <- getMarkers(markerPeaks_treg, cutOff = "FDR <= 0.1 & Log2FC >= 0.5")

In [None]:
head(na_markerList$`Pediatric_CD4 Naive`)

In [None]:
na_peaks <- paste0(na_markerList$`Pediatric_CD4 Naive`$seqnames,"_",na_markerList$`Pediatric_CD4 Naive`$start)
scm_peaks <- paste0(scm_markerList$`Pediatric_CD4 SCM`$seqnames,"_",scm_markerList$`Pediatric_CD4 SCM`$start)
treg_peaks <- paste0(treg_markerList$`Pediatric_CD25neg Treg`$seqnames,"_",treg_markerList$`Pediatric_CD25neg Treg`$start)

In [None]:
library(UpSetR)

In [None]:
listInput <- list(Naive = na_peaks, SCM = scm_peaks, Treg = treg_peaks)

In [None]:
options(repr.plot.width = 10, repr.plot.height = 5)
upset(fromList(listInput), order.by = "freq", text.scale = 2)

In [None]:
# Open a pdf file
pdf("plots/dap_upset_plot.pdf", width = 10, height = 5) 
# 2. Create a plot
upset(fromList(listInput), order.by = "freq", text.scale = 2)
# Close the pdf file
dev.off() 

# ChromVar Plots

### Add WNN UMAP to ArchR Project

In [None]:
cd4_na <- readRDS(file = 'cd4_na_labeled.rds')

In [None]:
addEmbedding <- function(ArchRProj= NULL, name= NULL, type = "Generic", dfEmbedding = NULL,model= NULL, modelName= NULL, embeddingParams= NULL){

	if(any(! rownames(dfEmbedding) %in% getCellNames(ArchRProj))){
		stop("All cell names for the embedding dataframe must be present in the ArchR  Project")
	}
	
	if(!is.null(model)){
		dir <- getOutputDirectory(ArchRProj)
		route <- paste(dir,"/Embeddings/",modelName,".rds",sep = "")

		saveRDS(model, file = route)
	}else{

		route = NA

	}

	colnames(dfEmbedding) = paste(type,colnames(dfEmbedding),sep = "#")
	
	if(!is.null(embeddingParams)){

		ArchRProj@embeddings[[name]] <- SimpleList(
      			df = dfEmbedding, 
      			params = c(
        		embeddingParams,
        		dimsToUse = dimsToUse,
        		scaleDims = scaleDims,
        		corCutOff = corCutOff,
        		nr=nr,
        		nc=nc,
        		uwotModel = route,
        		estimateUMAP = estimateUMAP,
        		projectID = projectDF))
	}else{
		ArchRProj@embeddings[[name]] <- SimpleList(
      			df = dfEmbedding)
	}

	return(ArchRProj)
}

In [None]:
wnn_umap <- cd4_na@reductions$wnn.3.umap@cell.embeddings
rownames(wnn_umap) <- paste(cd4_na$batch_id, "-P1_",cd4_na$pbmc_sample_id,"#",rownames(wnn_umap), sep = "")
head(wnn_umap)

In [None]:
table(rownames(wnn_umap) %in% proj_subset$cellNames)
table(rownames(wnn_umap) == proj_subset$cellNames)

In [None]:
wnn_umap_ordered <- wnn_umap[match(proj_subset$cellNames, rownames(wnn_umap)),]

In [None]:
table(rownames(wnn_umap_ordered) == proj_subset$cellNames)

In [None]:
proj_subset <- addEmbedding(ArchRProj = proj_subset, name = 'WNN_UMAP', dfEmbedding = wnn_umap_ordered)

In [None]:
motifs <- c('CREM','ATF7','TCFL5','KLF15','IRF8','SP1','SOX4','SOX10','FOXP1')
markerMotifs <- getFeatures(proj_subset, select = paste(motifs, collapse="|"), useMatrix = "MotifMatrix")
markerMotifs

In [None]:
markerMotifs <- grep("z:", markerMotifs, value = TRUE)
# markerMotifs <- markerMotifs[markerMotifs %ni% "z:SREBF1_22"]
markerMotifs

In [None]:
proj_subset <- addImputeWeights(proj_subset)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 8)
p <- plotEmbedding(
    ArchRProj = proj_subset, 
    colorBy = "MotifMatrix", 
    name = sort(markerMotifs), 
    embedding = "WNN_UMAP",
    imputeWeights = getImputeWeights(proj_subset)
)

In [None]:
options(repr.plot.width = 18, repr.plot.height = 12)
p2 <- lapply(p, function(x){
    x + guides(color = FALSE, fill = FALSE) + 
    theme_ArchR(baseSize = 6.5) +
    theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) +
    theme(
        axis.text.x=element_blank(), 
        axis.ticks.x=element_blank(), 
        axis.text.y=element_blank(), 
        axis.ticks.y=element_blank()
    )
})
do.call(cowplot::plot_grid, c(list(ncol = 3),p2))

In [None]:
options(repr.plot.width = 6, repr.plot.height = 6)
p[[1]]

In [None]:
# Open a pdf file
pdf("plots/ATF7_chromvar.pdf", width = 6, height = 6) 
# 2. Create a plot
p[[1]]
# Close the pdf file
dev.off() 

In [None]:
# Open a pdf file
pdf("plots/CREM_chromvar.pdf", width = 6, height = 6) 
# 2. Create a plot
p[[2]]
# Close the pdf file
dev.off() 

In [None]:
# Open a pdf file
pdf("plots/FOXP1_chromvar.pdf", width = 6, height = 6) 
# 2. Create a plot
p[[3]]
# Close the pdf file
dev.off() 

In [None]:
# Open a pdf file
pdf("plots/IRF8_chromvar.pdf", width = 6, height = 6) 
# 2. Create a plot
p[[4]]
# Close the pdf file
dev.off() 

In [None]:
# Open a pdf file
pdf("plots/KLF15_chromvar.pdf", width = 6, height = 6) 
# 2. Create a plot
p[[5]]
# Close the pdf file
dev.off() 

In [None]:
# Open a pdf file
pdf("plots/SOX10_chromvar.pdf", width = 6, height = 6) 
# 2. Create a plot
p[[6]]
# Close the pdf file
dev.off() 

In [None]:
# Open a pdf file
pdf("plots/SOX4_chromvar.pdf", width = 6, height = 6) 
# 2. Create a plot
p[[7]]
# Close the pdf file
dev.off() 

In [None]:
# Open a pdf file
pdf("plots/SP1_chromvar.pdf", width = 6, height = 6) 
# 2. Create a plot
p[[8]]
# Close the pdf file
dev.off() 

In [None]:
# Open a pdf file
pdf("plots/TCFL5_chromvar.pdf", width = 6, height = 6) 
# 2. Create a plot
p[[9]]
# Close the pdf file
dev.off() 

In [None]:
motifs <- c('TCF7')
markerMotifs <- getFeatures(proj_subset, select = paste(motifs, collapse="|"), useMatrix = "MotifMatrix")
markerMotifs

In [None]:
markerMotifs <- grep("z:", markerMotifs, value = TRUE)
# markerMotifs <- markerMotifs[markerMotifs %ni% "z:SREBF1_22"]
markerMotifs

In [None]:
options(repr.plot.width = 8, repr.plot.height = 8)
p <- plotEmbedding(
    ArchRProj = proj_subset, 
    colorBy = "MotifMatrix", 
    name = sort(markerMotifs), 
    embedding = "WNN_UMAP",
    imputeWeights = getImputeWeights(proj_subset)
)

In [None]:
p