In [None]:

library(tidyverse)
library(ArchR) %>% suppressMessages()
library(patchwork)
library(Seurat)

addArchRThreads(threads = 20) 
addArchRGenome("mm10")
library(valr)

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.5.2     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.4     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.4     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


In [None]:


scrna.file <- "/zliu_ssd/CHARM/CHARM_brain/1_createobj_embedding/charm_subset.rds"
scrna.object <- readRDS(scrna.file)

rna_cells <- colnames(scrna.object)
length(rna_cells)
head(rna_cells)

In [None]:

DefaultAssay(scrna.object) <- "RNA"

scrna.object <- FindVariableFeatures(scrna.object, selection.method = "vst", nfeatures = 5000, assay = "RNA")

top_genes <- head(VariableFeatures(scrna.object, assay = "RNA"), 5000)

scrna.object_top5000 <- subset(scrna.object, features = top_genes)


Finding variable features for layer counts



Loading required package: Signac



In [None]:


old_rna <- scrna.object_top5000[["RNA"]]

counts_mat <- old_rna$counts  #  5000 x 3620  dgCMatrix
dim(counts_mat)

new_rna_assay <- CreateAssayObject(counts = counts_mat)
class(new_rna_assay)

scrna.object_top5000[["RNA"]] <- new_rna_assay
DefaultAssay(scrna.object_top5000) <- "RNA"

scrna.object_top5000 <- FindVariableFeatures(
    scrna.object_top5000,
    selection.method = "vst",
    nfeatures = 5000,
    assay = "RNA"
)

colnames(scrna.object_top5000) <- paste0("ATAC#", colnames(scrna.object_top5000))
scrna.object_top5000[["RNA"]]@meta.features[1:5, , drop = FALSE]
saveRDS(scrna.object_top5000, file = "./scrna_top5000.rds")

In [None]:



inputFiles <- c("/zliu_ssd/CHARM/CHARM_brain/data/fragments/atac.paired.fragments.bgz")
names(inputFiles) <- c("CHARM")

ArrowFiles <- createArrowFiles(
  inputFiles = inputFiles,
  sampleNames = names(inputFiles),
  minTSS = 0,
  minFrags = 500, 
  maxFrags = Inf,
  addTileMat = FALSE,
  addGeneScoreMat = TRUE,
  force = TRUE,
  excludeChr = c("chrM", "chrY"),
)
proj <- ArchRProject(
  ArrowFiles = ArrowFiles, 
  outputDirectory = "CHARM_count",
  copyArrows = TRUE,
) %>% suppressMessages()

Using GeneAnnotation set by addArchRGenome(Mm10)!

Using GeneAnnotation set by addArchRGenome(Mm10)!

ArchR logging to : ArchRLogs/ArchR-createArrows-2f04a5487ca-Date-2025-12-07_Time-16-55-01.897882.log
If there is an issue, please report to github with logFile!



In [None]:

cellnames = rna_cells
cellnames_prefixed <- paste0("ATAC#", cellnames)

proj_sub <- subsetArchRProject(
  ArchRProj = proj,
  cells = cellnames_prefixed,
  outputDirectory = "CHARM_count",
  dropCells = FALSE
)

Copying ArchRProject to new outputDirectory : /shared/mwang/CHARM-seq/SCARlink/notebooks/CHARM_ATAC_reordered

Copying Arrow Files...

Copying Arrow Files (1 of 1)

Getting ImputeWeights

No imputeWeights found, returning NULL

Copying Other Files...

Saving ArchRProject...

Loading ArchRProject...

Successfully loaded ArchRProject!


                                                   / |
                                                 /    \
            .                                  /      |.
            \\\                              /        |.
              \\\                          /           `|.
                \\\                      /              |.
                  \                    /                |\
                  \\#####\           /                  ||
                ==###########>      /                   ||
                 \\##==......\    /                     ||
            ______ =       =|__ /__                     ||      \\\
       \        

In [None]:

proj <- addTileMatrix(input=proj, binarize=FALSE, tileSize = 500, force=TRUE)

ArchR logging to : ArchRLogs/ArchR-addTileMatrix-302a4225fe04fb-Date-2025-10-29_Time-11-13-49.561095.log
If there is an issue, please report to github with logFile!

2025-10-29 11:14:07.457691 : Batch Execution w/ safelapply!, 0 mins elapsed.

.createArrowGroup : Arrow Group already exists! Dropping Group from ArrowFile! This will take ~10-30 seconds!

.dropGroupsFromArrow : Initializing Temp ArrowFile

.dropGroupsFromArrow : Adding Metadata to Temp ArrowFile

.dropGroupsFromArrow : Adding SubGroups to Temp ArrowFile

.dropGroupsFromArrow : Move Temp ArrowFile to ArrowFile

2025-10-29 11:22:56.837302 : Adding TileMatrix to ATAC for Chr (1 of 20)!, 0.206 mins elapsed.

2025-10-29 11:23:29.282517 : Adding TileMatrix to ATAC for Chr (2 of 20)!, 0.746 mins elapsed.

2025-10-29 11:23:57.086658 : Adding TileMatrix to ATAC for Chr (3 of 20)!, 1.21 mins elapsed.

2025-10-29 11:24:20.742255 : Adding TileMatrix to ATAC for Chr (4 of 20)!, 1.604 mins elapsed.

2025-10-29 11:24:38.379451 : Adding 

In [None]:

proj <- addIterativeLSI(
    ArchRProj = proj,
    useMatrix = "TileMatrix",
    name = "IterativeLSI",
    iterations = 2,
    clusterParams = list(
        resolution = 0.2,
        sampleCells = NULL, 
        n.start = 10
    ),
    varFeatures = 10000,   
    dimsToUse = 1:30,
    force = TRUE
)

Checking Inputs...

ArchR logging to : ArchRLogs/ArchR-addIterativeLSI-302a4275c64d25-Date-2025-10-29_Time-11-50-39.182945.log
If there is an issue, please report to github with logFile!

2025-10-29 11:51:23.81343 : Computing Total Across All Features, 0.401 mins elapsed.

2025-10-29 11:51:39.575349 : Computing Top Features, 0.664 mins elapsed.

###########
2025-10-29 11:51:40.921595 : Running LSI (1 of 2) on Top Features, 0.686 mins elapsed.
###########

2025-10-29 11:51:41.057361 : Creating Partial Matrix, 0.689 mins elapsed.

2025-10-29 11:52:56.716449 : Computing LSI, 1.95 mins elapsed.

2025-10-29 11:53:22.084226 : Identifying Clusters, 2.372 mins elapsed.

2025-10-29 11:53:31.795625 : Identified 4 Clusters, 2.534 mins elapsed.

2025-10-29 11:53:31.976689 : Saving LSI Iteration, 2.537 mins elapsed.

2025-10-29 11:53:42.599726 : Creating Cluster Matrix on the total Group Features, 2.714 mins elapsed.

2025-10-29 11:53:56.100803 : Computing Variable Features, 2.939 mins elapsed.

##

In [None]:

saveArchRProject(
    ArchRProj = proj,
    outputDirectory = "CHARM_count_LSI",
    load = FALSE
)

Copying ArchRProject to new outputDirectory : /shared/mwang/CHARM-seq/SCARlink/notebooks/CHARM_count_LSI

Copying Arrow Files...

Copying Arrow Files (1 of 1)



Getting ImputeWeights

No imputeWeights found, returning NULL

Copying Other Files...

Copying Other Files (1 of 1): IterativeLSI

Saving ArchRProject...



In [None]:

proj <- loadArchRProject("/share/home/mwang/shared/mwang/CHARM-seq/SCARlink/notebooks/CHARM_count_LSI")

Successfully loaded ArchRProject!


                                                   / |
                                                 /    \
            .                                  /      |.
            \\\                              /        |.
              \\\                          /           `|.
                \\\                      /              |.
                  \                    /                |\
                  \\#####\           /                  ||
                ==###########>      /                   ||
                 \\##==......\    /                     ||
            ______ =       =|__ /__                     ||      \\\
       \               '        ##_______ _____ ,--,__,=##,__   ///
        ,    __==    ___,-,__,--'#'  ==='      `-'    | ##,-/
        -,____,---'       \\####\\________________,--\\_##,/
           ___      .______        ______  __    __  .______      
          /   \     |   _  \      /      ||  |  |  | |   _ 

In [None]:

mat <- getMatrixFromProject(proj, useMatrix = "TileMatrix", binarize = FALSE)

ArchR logging to : ArchRLogs/ArchR-getMatrixFromProject-302a4237b84b3b-Date-2025-10-29_Time-11-08-01.322289.log
If there is an issue, please report to github with logFile!

2025-10-29 11:11:38.41061 : Organizing colData, 3.624 mins elapsed.

2025-10-29 11:11:38.869839 : Organizing rowData, 3.631 mins elapsed.

2025-10-29 11:11:39.108952 : Organizing rowRanges, 3.635 mins elapsed.

2025-10-29 11:11:39.439218 : Organizing Assays (1 of 1), 3.641 mins elapsed.

2025-10-29 11:11:39.740028 : Constructing SummarizedExperiment, 3.646 mins elapsed.

2025-10-29 11:11:40.908614 : Finished Matrix Creation, 3.665 mins elapsed.



In [None]:


!scarlink_processing --scrna /share/home/mwang/shared/mwang/CHARM-seq/SCARlink/notebooks/scrna_top5000.rds --scatac /share/home/mwang/shared/mwang/CHARM-seq/SCARlink/notebooks/CHARM_count_LSI -g mm10 -nc 42 -o /share/home/mwang/shared/mwang/CHARM-seq/SCARlink/notebooks/CHARM_out

In [None]:


!scarlink -o /share/home/mwang/shared/mwang/CHARM-seq/SCARlink/notebooks/CHARM_out -g mm10 -np 42

In [None]:

library(ggplot2)
library(stringr)

logfile <- "./log/scarlink_log_None_03_41_08_30_10_2025.log"


loglines <- readLines(logfile)

sparse_lines <- grep("expression too sparse", loglines, value = TRUE)
sparse_genes <- str_match(sparse_lines, "INFO\\s+([^ ]+)\\s+expression too sparse")[,2]

train_lines <- grep("Training regression model on", loglines, value = TRUE)
train_genes <- str_match(train_lines, "Training regression model on\\s+([^ ]+)")[,2]

corr_lines <- grep("Spearman corr on test set", loglines, value = TRUE)
corr_values <- as.numeric(str_match(corr_lines, "Spearman corr on test set:\\s+([-0-9.eE]+)")[,2])

length(train_genes); length(corr_values)

test_genes <- train_genes[!train_genes %in% sparse_genes]
length(test_genes)

df <- data.frame(gene = test_genes,
                 test_corr = corr_values)

write.table(df$gene, file = "scarlink_genes_test.txt",
            quote = FALSE, row.names = FALSE, col.names = FALSE)
write.table(df,
            file = "scarlink_genes_test.txt",
            sep = "\t", quote = FALSE, row.names = FALSE)

mean_corr <- mean(df$test_corr, na.rm = TRUE)

median_corr <- median(df$test_corr, na.rm = TRUE)

mean_corr
median_corr

In [None]:

options(repr.plot.width = 1, repr.plot.height = 4)
p <- ggplot(df, aes(x = "scarlink", y = test_corr)) +
  geom_boxplot(fill = "grey80", color = "black") +
  theme_classic() +
  labs(x = "", y = "Test Spearman corr") +
  ggtitle("")
ggsave("scarlink_test_corr_boxplot.png", plot = p, width = 1, height = 4, units = "in", dpi = 300)