In [None]:
library(devtools)
install_github("r3fang/SnapATAC")

In [None]:
install.packages('ClusterProfiler')

In [None]:
BiocManager::install('DO.db',force = TRUE)

# library

In [None]:
library(SnapATAC)
library(Matrix)
library(Seurat)
library(MAESTRO)
library(Signac)
library(qs)
library(harmony)
library(VennDiagram)
library(tidyverse)
library(hrbrthemes)
library(tm)
library(proustr)
library(SingleCellExperiment)
library(clusterProfiler)

In [None]:
barcode_key <- read.table('example//PBMC/barcode_key.txt')
rownames(barcode_key) <- barcode_key[,'ATAC']

# ATAC

In [None]:
atac_pbmc_count <- Read10X_h5('example/PBMC/data/PBMC_ATAC_500bin/PBMC_500bin_peak_count.h5')

In [None]:
atac_pbmc_count <- BinarizeCounts(atac_pbmc_count)

In [None]:
atac_pbmc_res <- ATACRunSeurat(inputMat = atac_pbmc_count,
                                 project = "atac",
                                 min.c = 10,
                                 min.p = 200,
                                 method = "LSI",
                                 dims.use = 1:30,
                                 cluster.res = 0.6,
                                 only.pos = TRUE,
                                 peaks.test.use = "presto",
                                 peaks.cutoff = 1e-05,
                                 peaks.pct = 0.1,
                                 peaks.logfc = 0.2,
                                 outdir = 'example/PBMC/data/PBMC_ATAC_500bin/analysis/')

In [None]:
metadata <- read.table('example/PBMC/analysis/metadata.txt')
rownames(metadata) <- paste0(barcode_key[rownames(metadata),'RNA'],'-1')

In [None]:
atac_pbmc_res$ATAC@meta.data$Celltype <- metadata[rownames(atac_pbmc_res$ATAC@meta.data),'Celltype']

In [None]:
DimPlot(atac_pbmc_res$ATAC, group.by = 'Celltype')

In [None]:
qsave(atac_pbmc_res, 'example/PBMC/data/PBMC_ATAC_500bin/analysis/atac_pbmc_res.qs')

In [None]:
atac_pbmc_res <- qread('example/PBMC/data/PBMC_ATAC_500bin/analysis/atac_pbmc_res.qs')

In [None]:
write.table(atac_pbmc_res$ATAC@meta.data, 'example/PBMC/data/PBMC_ATAC_500bin/analysis/meta_bin.txt')

In [None]:
atac_pbmc_res$ATAC@meta.data

In [None]:
useful_bc <- rownames(atac_pbmc_res$ATAC@meta.data)[atac_pbmc_res$ATAC@meta.data[,'Celltype'] %in% c('CD14Mono', 'CD16Mono', 'CD4T', 'CD8T', 'B')]

In [None]:
DefaultAssay(atac_pbmc_res$ATAC) <- 'ATAC'

In [None]:
atac_pbmc_se <- as.SingleCellExperiment(atac_pbmc_res$ATAC[,useful_bc])

In [None]:
saveRDS(atac_pbmc_se, 'example/PBMC/data/PBMC_ATAC_500bin/analysis/PBMC_TBMono_500bin.rds')

# H3K27ac

In [None]:
h3k27ac_pbmc_count <- Read10X_h5('example/histone/peak_base/cuttagpro/PBMC_H3K27ac_500bin/PBMC_H3K27ac_500bin_peak_count.h5')
h3k27ac_pbmc_count <- BinarizeCounts(h3k27ac_pbmc_count)

In [None]:
H3K27ac_pbmc_res <- ATACRunSeurat(inputMat = h3k27ac_pbmc_count,
                                 project = "h3k27ac",
                                 min.c = 20,
                                 min.p = 200,
                                 method = "LSI",
                                 dims.use = 1:30,
                                 cluster.res = 0.6,
                                 only.pos = TRUE,
                                 peaks.test.use = "presto",
                                 peaks.cutoff = 1e-05,
                                 peaks.pct = 0.1,
                                 peaks.logfc = 0.2,
                                 outdir = 'example/histone/peak_base/cuttagpro/PBMC_H3K27ac_500bin/analysis')

In [None]:
DimPlot(H3K27ac_pbmc_res$ATAC)

In [None]:
annotated_h3k27ac <- readRDS('example/histone/peak_base/cuttagpro/H3K27ac.rds')

In [None]:
write.table(annotated_h3k27ac@meta.data, 'example/histone/peak_base/cuttagpro/H3K27ac_metadata.txt', quote=FALSE, sep='\t')

In [None]:
metadata <- annotated_h3k27ac@meta.data

In [None]:
H3K27ac_pbmc_res$ATAC@meta.data$Celltype <- metadata[rownames(H3K27ac_pbmc_res$ATAC@meta.data),'predicted.celltype.l1']

In [None]:
DimPlot(H3K27ac_pbmc_res$ATAC, group.by = 'Celltype')

In [None]:
qsave(H3K27ac_pbmc_res, 'example/histone/peak_base/cuttagpro/PBMC_H3K27ac_500bin/analysis/H3K27ac_pbmc_res.qs')

In [None]:
H3K27ac_pbmc_res <- qread('example/histone/peak_base/cuttagpro/PBMC_H3K27ac_500bin/analysis/H3K27ac_pbmc_res.qs')

In [None]:
real_h3k27ac <- as.SingleCellExperiment(H3K27ac_pbmc_res$ATAC)

In [None]:
saveRDS(real_h3k27ac, 'example/histone/peak_base/cuttagpro/PBMC_H3K27ac_500bin/analysis/real_SE.rds')

In [None]:
write.table(atac_pbmc_res$ATAC@meta.data, 'example/PBMC/data/PBMC_ATAC_500bin/analysis/meta_bin.txt')

In [None]:
atac_imputed_h3k27ac_count <- Read10X('example/histone/peak_base/cuttagpro/SCRIPT_PBMC/imputationPBMC1022/imputed_H3K27ac_mtx/', gene.column = 1)

In [None]:
dim(atac_imputed_h3k27ac_count)

In [None]:
atac_impute_h3k27ac_res <- ATACRunSeurat(inputMat = atac_imputed_h3k27ac_count,
                                 project = "h3k27ac",
                                 min.c = 20,
                                 min.p = 200,
                                 method = "LSI",
                                 dims.use = 1:30,
                                 cluster.res = 0.6,
                                 only.pos = TRUE,
                                 peaks.test.use = "presto",
                                 peaks.cutoff = 1e-05,
                                 peaks.pct = 0.1,
                                 peaks.logfc = 0.2,
                                 outdir = 'example/histone/peak_base/cuttagpro/SCRIPT_PBMC/imputationPBMC1022/analysis')

In [None]:
DimPlot(atac_impute_h3k27ac_res$ATAC)

In [None]:
imputed_h3k27ac_se <- as.SingleCellExperiment(atac_impute_h3k27ac_res$ATAC)

In [None]:
qsave(atac_impute_h3k27ac_res, 'example/histone/peak_base/cuttagpro/SCRIPT_PBMC/imputationPBMC1022/analysis/seurat.qs')

In [None]:
saveRDS(imputed_h3k27ac_se, 'example/histone/peak_base/cuttagpro/SCRIPT_PBMC/imputationPBMC1022/analysis/SE.rds')

In [None]:
real_h3k27ac <- H3K27ac_pbmc_res$ATAC

In [None]:
h3k27ac_pbmc_count <- Read10X_h5('example/histone/peak_base/cuttagpro/PBMC_H3K27ac_500bin/PBMC_H3K27ac_500bin_peak_count.h5')
h3k27ac_pbmc_count <- BinarizeCounts(h3k27ac_pbmc_count)

In [None]:
impute_h3k27ac <- atac_impute_h3k27ac_res$ATAC

In [None]:
atac_imputed_h3k27ac_count <- atac_impute_h3k27ac_res$ATAC@assays$ATAC@counts

In [None]:
library(reticulate)
use_python("/fs/home/dongxin/Applications/miniconda3/envs/MAESTRO/bin/python", required = TRUE)

In [None]:
real.pbmc.gene_10k <- ATACCalculateGenescore(h3k27ac_pbmc_count, organism = "GRCh38", decaydistance = 10000, model = 'Simple')
# real_h3k27ac <- ATACAttachGenescore(ATAC = real_h3k27ac, RPmatrix = real.pbmc.gene_10k)

In [None]:
impute.pbmc.gene_10k <- ATACCalculateGenescore(atac_imputed_h3k27ac_count, organism = "GRCh38", decaydistance = 10000, model = 'Simple')
impute_h3k27ac <- ATACAttachGenescore(ATAC = impute_h3k27ac, RPmatrix = impute.pbmc.gene_10k)

In [None]:
FeaturePlot(real_h3k27ac, 'STAT1')

In [None]:
FeaturePlot(impute_h3k27ac, 'STAT1')

In [None]:
qsave(real_h3k27ac, 'example/histone/peak_base/cuttagpro/SCRIPT_PBMC/real_RP_inte_10k.qs')

In [None]:
qsave(impute_h3k27ac, 'example/histone/peak_base/cuttagpro/SCRIPT_PBMC/imputationPBMC1022/imputed_RP_inte_10k.qs')

In [None]:
qsave(impute.pbmc.gene_10k, 'example/histone/peak_base/cuttagpro/SCRIPT_PBMC/impute_RP_inte_10k.qs')

In [None]:
real_h3k27ac <- qread('example/histone/peak_base/cuttagpro/SCRIPT_PBMC/real_RP_inte_10k.qs')

In [None]:
impute_h3k27ac <- qread('example/histone/peak_base/cuttagpro/SCRIPT_PBMC/imputationPBMC1022/imputed_RP_inte_10k.qs')

In [None]:
write.table(real_h3k27ac@meta.data, 'example/histone/peak_base/cuttagpro/SCRIPT_PBMC/real_meta_data.txt', quote = FALSE, sep='\t')

In [None]:
tmp_matrix <- as.matrix(rowSums(real_h3k27ac@assays$ATAC@counts[,real_mono_bc]))
write.table(tmp_matrix, '/fs/home/dongxin/Projects/SCRIPT/scATAC/example/histone/trackplot/real_mono.bedgraph', sep='\t', col.names = FALSE, quote=FALSE)

In [None]:
tmp_matrix <- as.matrix(rowSums(impute_h3k27ac@assays$ATAC@counts[,intersect(paste0(impute_mono_bc,'-1'), colnames(impute_h3k27ac@assays$ATAC@counts))]))
write.table(tmp_matrix, '/fs/home/dongxin/Projects/SCRIPT/scATAC/example/histone/trackplot/impute_mono.bedgraph', sep='\t', col.names = FALSE, quote=FALSE)

In [None]:
impute.pbmc.gene_10k <- impute_h3k27ac@assays$ACTIVITY@counts

In [None]:
real.pbmc.gene_10k <- real_h3k27ac@assays$ACTIVITY@counts

In [None]:
real.pbmc.gene_10k_se <- SingleCellExperiment(real.pbmc.gene_10k)

In [None]:
saveRDS(real.pbmc.gene_10k_se, 'example/histone/peak_base/cuttagpro/SCRIPT_PBMC/real_RP.rds')

## RP correlation

In [None]:
imputed_metadata = read.csv('example/PBMC/analysis/metadata.txt', sep='\t', row.names=1)
atac_bc <- rownames(imputed_metadata)
key = read.table('example/PBMC/barcode_key.txt',sep='\t', header=TRUE)
atac_bc <- key[which(key$ATAC %in% atac_bc),'RNA']
rownames(imputed_metadata) <- atac_bc

In [None]:
impute.pbmc.gene_10k_matrix = as.matrix(impute.pbmc.gene_10k)
real_metadata = real_h3k27ac@meta.data
real.pbmc.gene_10k_matrix =  as.matrix(real.pbmc.gene_10k)

In [None]:
impute_t_bc = rownames(imputed_metadata)[which(imputed_metadata['Cell.Type'] == "T")]
impute_t_rp = apply(impute.pbmc.gene_10k_matrix[,intersect(paste0(impute_t_bc,'-1'), colnames(impute.pbmc.gene_10k_matrix))],1, mean)

In [None]:
real_t_bc = rownames(real_metadata)[which(real_metadata['Celltype'] == 'CD4 T' | real_metadata['Celltype'] == 'CD8 T' |real_metadata['Celltype'] =='other T')]
real_t_rp = apply(real.pbmc.gene_10k_matrix[,intersect(real_t_bc, colnames(real.pbmc.gene_10k_matrix))],1, mean)

In [None]:
bulk_target = unique(read.table('/fs/home/dongxin/Projects/SCRIPT/scATAC/example/histone/62350_gene_score_5fold_T.txt', comment.char = '#')$V7)[0:1000]

In [None]:
venn.diagram(
  x = list(
    names(sort(real_t_rp, decreasing = TRUE)[0:1000]), 
    names(sort(impute_t_rp, decreasing = TRUE)[0:1000]) , 
    bulk_target
    ),
  category.names = c("scCUT&Pro" , "SCRIPT Imputed" , "Bulk"),
  filename = 'venn_t.png',
  output = TRUE ,
  imagetype="png" ,
  height = 800 , 
  width = 800 , 
  resolution = 300,
  compression = "lzw",
  lwd = 1,
  col=c("#440154ff", '#21908dff', '#fc2727ff'),
  fill = c(alpha("#440154ff",0.3), alpha('#21908dff',0.3), alpha('#fc2727ff',0.3)),
  cex = 1,
  fontfamily = "Arial",
  cat.cex = 0.7,
  cat.default.pos = "outer",
  cat.pos = c(-27, 22, 135),
  cat.dist = c(0.055, 0.055, 0.085),
  cat.fontfamily = "Arial",
  cat.col = c("#440154ff", '#21908dff', '#fc2727ff'),
  rotation = 1
        )

In [None]:
cor(impute_t_rp, real_t_rp,  method = "spearman")

In [None]:
ggplot(data.frame(impute_t_rp, real_t_rp), aes(x=real_t_rp, y=impute_t_rp)) +
geom_point()+
geom_smooth(method=lm)+
theme_classic()+ 
geom_rug()+
theme(text = element_text(size=20), axis.text.x = element_text(angle=90, hjust=1)) 

In [None]:
impute_mono_bc = rownames(imputed_metadata)[which(imputed_metadata['Cell.Type'] == "Mono")]
impute_mono_rp = apply(impute.pbmc.gene_10k_matrix[,intersect(paste0(impute_mono_bc,'-1'), colnames(impute.pbmc.gene_10k_matrix))],1, mean)

In [None]:
real_mono_bc = rownames(real_metadata)[which(real_metadata['Celltype'] == 'Mono')]
real_mono_rp = apply(real.pbmc.gene_10k_matrix[,intersect(real_mono_bc, colnames(real.pbmc.gene_10k_matrix))],1, mean)

In [None]:
cor(impute_mono_rp, real_mono_rp,  method = "spearman")

In [None]:
ggplot(data.frame(impute_mono_rp, real_mono_rp), aes(x=real_mono_rp, y=impute_mono_rp)) +
geom_point()+
geom_smooth(method=lm)+
theme_classic()+ 
geom_rug()+
theme(text = element_text(size=20), axis.text.x = element_text(angle=90, hjust=1)) 

In [None]:
cor(impute_t_rp, real_mono_rp, method = 'spearman')

In [None]:
ggplot(data.frame(impute_t_rp, real_mono_rp), aes(x=real_mono_rp, y=impute_t_rp)) +
geom_point()+
geom_smooth(method=lm)+
theme_classic()+ 
geom_rug()+
theme(text = element_text(size=20), axis.text.x = element_text(angle=90, hjust=1)) 

In [None]:
T_target = read.table('/fs/home/dongxin/Projects/SCRIPT/scATAC/example/histone/SCRIPT_1114_remove_others/imputation/H3K27ac_T_rp_gene.txt')$V1[0:50]