## Integration of late female samples (> 10 PCW)

In [None]:
suppressPackageStartupMessages(library(ArchR))
library(argparse)
library(ArchR)
library(parallel)

### 1. Create ArchR Project file from Arrow files 

In [None]:
addArchRGenome("hg38")

In [None]:
packageVersion("matrixStats")

#### Save sample-level metadata that comes from analysing each sample independently 


In [None]:
data_dir <- "/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/" 

In [None]:
HD_F_GON14609874_meta <- read.csv(paste0(data_dir, "HD_F_GON14609874/HD_F_GON14609874_metadata.csv"), row.names = 'X')
print(dim(HD_F_GON14609874_meta))

HD_F_GON14666992_meta <- read.csv(paste0(data_dir, "HD_F_GON14666992/HD_F_GON14666992_metadata.csv"), row.names = 'X')
print(dim(HD_F_GON14666992_meta))

HD_F_GON11282675_meta <- read.csv(paste0(data_dir, "HD_F_GON11282675/HD_F_GON11282675_metadata.csv"), row.names = 'X')
print(dim(HD_F_GON11282675_meta))

HD_F_GON11282676_meta <- read.csv(paste0(data_dir, "HD_F_GON11282676/HD_F_GON11282676_metadata.csv"), row.names = 'X')
print(dim(HD_F_GON11282676_meta))

HD_F_GON11389960_meta <- read.csv(paste0(data_dir, "HD_F_GON11389960/HD_F_GON11389960_metadata.csv"), row.names = 'X')
print(dim(HD_F_GON11389960_meta))

HD_F_GON11389961_meta <- read.csv(paste0(data_dir, "HD_F_GON11389961/HD_F_GON11389961_metadata.csv"), row.names = 'X')
print(dim(HD_F_GON11389961_meta))

HD_F_GON12449010_meta <- read.csv(paste0(data_dir, "HD_F_GON12449010/HD_F_GON12449010_metadata.csv"), row.names = 'X')
print(dim(HD_F_GON12449010_meta))

HD_F_GON12449011_meta <- read.csv(paste0(data_dir, "HD_F_GON12449011/HD_F_GON12449011_metadata.csv"), row.names = 'X')
print(dim(HD_F_GON12449011_meta))

HD_F_GON12877982_meta <- read.csv(paste0(data_dir, "HD_F_GON12877982/HD_F_GON12877982_metadata.csv"), row.names = 'X')
print(dim(HD_F_GON12877982_meta))

HD_F_GON12877983_meta <- read.csv(paste0(data_dir, "HD_F_GON12877983/HD_F_GON12877983_metadata.csv"), row.names = 'X')
print(dim(HD_F_GON12877983_meta))

HD_F_GON12877984_meta <- read.csv(paste0(data_dir, "HD_F_GON12877984/HD_F_GON12877984_metadata.csv"), row.names = 'X')
print(dim(HD_F_GON12877984_meta))

HD_F_GON13941946_meta <- read.csv(paste0(data_dir, "HD_F_GON13941946/HD_F_GON13941946_metadata.csv"), row.names = 'X')
print(dim(HD_F_GON13941946_meta))

HD_F_GON13941947_meta <- read.csv(paste0(data_dir, "HD_F_GON13941947/HD_F_GON13941947_metadata.csv"), row.names = 'X')
print(dim(HD_F_GON13941947_meta))

HD_F_GON13077785_and_HD_F_GON13094224_meta <- read.csv(paste0(data_dir, "HD_F_GON13077785_and_HD_F_GON13094224/HD_F_GON13077785_and_HD_F_GON13094224_metadata.csv"), row.names = 'X')
print(dim(HD_F_GON13077785_and_HD_F_GON13094224_meta))

HCA_F_GON11173192_and_HCA_F_GON11212447_meta <- read.csv(paste0(data_dir, "HCA_F_GON11173192_and_HCA_F_GON11212447/HCA_F_GON11173192_and_HCA_F_GON11212447_metadata.csv"), row.names = 'X')
print(dim(HCA_F_GON11173192_and_HCA_F_GON11212447_meta))

HD_F_GON15261136_meta <- read.csv(paste0(data_dir, "HD_F_GON15261136/HD_F_GON15261136_metadata.csv"), row.names = 'X')
print(dim(HD_F_GON15261136_meta))

HD_F_GON15261137_meta <- read.csv(paste0(data_dir, "HD_F_GON15261137/HD_F_GON15261137_metadata.csv"), row.names = 'X')
print(dim(HD_F_GON15261137_meta))

HD_F_GON15265171_meta <- read.csv(paste0(data_dir, "HD_F_GON15265171/HD_F_GON15265171_metadata.csv"), row.names = 'X')
print(dim(HD_F_GON15265171_meta))

#### For multiome samples, import annotations from snRNA-seq to cross-compare with label transfer 

In [None]:
metadata <- do.call("rbind", list(HD_F_GON14609874_meta, HD_F_GON14666992_meta, 
                                 HD_F_GON11282675_meta, HD_F_GON11282676_meta, 
                                 HD_F_GON11389960_meta, HD_F_GON11389961_meta, 
                                 HD_F_GON12449010_meta, HD_F_GON12449011_meta, 
                                 HD_F_GON12877982_meta, HD_F_GON12877983_meta, 
                                HD_F_GON12877984_meta,  HD_F_GON13941946_meta, 
                                HD_F_GON13941947_meta,  HD_F_GON13077785_and_HD_F_GON13094224_meta, 
                                HCA_F_GON11173192_and_HCA_F_GON11212447_meta, 
                                 HD_F_GON15261136_meta, HD_F_GON15261137_meta, HD_F_GON15265171_meta))
dim(metadata)

In [None]:
# Females late  
females_late <- ArchRProject(
  ArrowFiles = c(
                  '/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/HD_F_GON14609874/HD_F_GON14609874.arrow', 
                   '/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/HD_F_GON14666992/HD_F_GON14666992.arrow',
                '/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/HD_F_GON12877982/HD_F_GON12877982.arrow', 
                '/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/HD_F_GON13941947/HD_F_GON13941947.arrow', 
                '/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/HD_F_GON13941946/HD_F_GON13941946.arrow', 
                '/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/HD_F_GON12449010/HD_F_GON12449010.arrow', 
               '/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/HD_F_GON12449011/HD_F_GON12449011.arrow', 
               '/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/HD_F_GON11282675/HD_F_GON11282675.arrow', 
              '/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/HD_F_GON11282676/HD_F_GON11282676.arrow', 
              '/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/HD_F_GON12877983/HD_F_GON12877983.arrow',
              '/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/HD_F_GON12877984/HD_F_GON12877984.arrow',
              '/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/HD_F_GON11389960/HD_F_GON11389960.arrow',
              '/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/HD_F_GON11389961/HD_F_GON11389961.arrow',
                 '/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/HCA_F_GON11173192_and_HCA_F_GON11212447/HCA_F_GON11173192_and_HCA_F_GON11212447.arrow', # 12 PCW (Hrv103)
                   '/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/HD_F_GON13077785_and_HD_F_GON13094224/HD_F_GON13077785_and_HD_F_GON13094224.arrow', # 20 PCW (Hrv174)
              '/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/HD_F_GON15261136/HD_F_GON15261136.arrow',
              '/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/HD_F_GON15261137/HD_F_GON15261137.arrow',
              '/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/HD_F_GON15265171/HD_F_GON15265171.arrow'
  ), 

  outputDirectory = "/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/females_late/",
  copyArrows = TRUE #This is recommened so that if you modify the Arrow files you have an original copy for later usage.
)

In [None]:
getAvailableMatrices(females_late)

In [None]:
metadata <- DataFrame(metadata)

In [None]:
head(row.names(metadata))

In [None]:
females_late

In [None]:
females_late <- females_late[row.names(metadata)]
females_late@cellColData <- metadata
females_late

In [None]:
table(females_late$Sample)

In [None]:
# Add donor and stage information --> integrate by donor for consistency with scRNA-seq 
library(dplyr)
donor <- case_when(
    females_late$Sample == "HCA_F_GON11173192_and_HCA_F_GON11212447" ~ "Hrv103",
    females_late$Sample == "HD_F_GON11282676" ~ "Hrv118", 
    females_late$Sample == "HD_F_GON11389960" ~ "Hrv125",
    females_late$Sample == "HD_F_GON12449010" ~ "Hrv69", 
    females_late$Sample == "HD_F_GON12449011" ~ "Hrv69", 
    females_late$Sample == "HD_F_GON12877983" ~ "Hrv139", 
    females_late$Sample == "HD_F_GON11282675" ~ "Hrv118",
    females_late$Sample == "HD_F_GON11389961" ~ "Hrv125", 
    females_late$Sample == "HD_F_GON12877982" ~ "C128", 
    females_late$Sample == "HD_F_GON12877984" ~ "Hrv139",
    females_late$Sample == "HD_F_GON14609874" ~ "Hrv253",
    females_late$Sample == "HD_F_GON13941946" ~ "Hrv245",
    females_late$Sample == "HD_F_GON13941947" ~ "Hrv245",
    females_late$Sample == "HD_F_GON14666992" ~ "Hrv261",
    females_late$Sample == "HD_F_GON13077785_and_HD_F_GON13094224" ~ "Hrv174",
    females_late$Sample == "HD_F_GON15261136" ~ "Hrv275",
    females_late$Sample == "HD_F_GON15261137" ~ "Hrv270",
    females_late$Sample == "HD_F_GON15265171" ~ "Hrv276"
    )

stage <- case_when(
    females_late$Sample == "HCA_F_GON11173192_and_HCA_F_GON11212447" ~ 12,
    females_late$Sample == "HD_F_GON11282676" ~ 20, 
    females_late$Sample == "HD_F_GON11389960" ~ 21,
    females_late$Sample == "HD_F_GON12449010" ~ 19, 
    females_late$Sample == "HD_F_GON12449011" ~ 19, 
    females_late$Sample == "HD_F_GON12877983" ~ 20, 
    females_late$Sample == "HD_F_GON11282675" ~ 20,
    females_late$Sample == "HD_F_GON11389961" ~ 21, 
    females_late$Sample == "HD_F_GON12877982" ~ 12, 
    females_late$Sample == "HD_F_GON12877984" ~ 20, 
    females_late$Sample == "HD_F_GON14609874" ~ 16,
    females_late$Sample == "HD_F_GON13941946" ~ 15,
    females_late$Sample == "HD_F_GON13941947" ~ 15,
    females_late$Sample == "HD_F_GON14666992" ~ 12,
    females_late$Sample == "HD_F_GON13077785_and_HD_F_GON13094224" ~ 20,
    females_late$Sample == "HD_F_GON15261136" ~ 14,
    females_late$Sample == "HD_F_GON15261137" ~ 17,
    females_late$Sample == "HD_F_GON15265171" ~ 12
    )

technology <- case_when(
    females_late$Sample == "HCA_F_GON11173192_and_HCA_F_GON11212447" ~ "MULTIOME",
    females_late$Sample == "HD_F_GON11282676" ~ "ATAC", 
    females_late$Sample == "HD_F_GON11389960" ~ "ATAC",
    females_late$Sample == "HD_F_GON12449010" ~ "ATAC", 
    females_late$Sample == "HD_F_GON12449011" ~ "ATAC", 
    females_late$Sample == "HD_F_GON12877983" ~ "ATAC", 
    females_late$Sample == "HD_F_GON11282675" ~ "ATAC",
    females_late$Sample == "HD_F_GON11389961" ~ "ATAC", 
    females_late$Sample == "HD_F_GON12877982" ~ "ATAC", 
    females_late$Sample == "HD_F_GON12877984" ~ "ATAC", 
    females_late$Sample == "HD_F_GON14609874" ~ "ATAC", 
    females_late$Sample == "HD_F_GON13941946" ~ "ATAC", 
    females_late$Sample == "HD_F_GON13941947" ~ "ATAC", 
    females_late$Sample == "HD_F_GON14666992" ~ "ATAC", 
    females_late$Sample == "HD_F_GON13077785_and_HD_F_GON13094224" ~ "MULTIOME",
    females_late$Sample == "HD_F_GON15261136" ~ "ATAC",
    females_late$Sample == "HD_F_GON15261137" ~ "ATAC",
    females_late$Sample == "HD_F_GON15265171" ~ "ATAC"
    )

females_late$donor <- donor 
females_late$stage <- stage
females_late$technology <- technology

In [None]:
table(females_late$donor)

In [None]:
table(females_late$stage)

In [None]:
table(females_late$technology)

In [None]:
df <- getCellColData(females_late, select = c("log10(nFrags)", "TSSEnrichment"))
df

In [None]:
p <- ggPoint(
    x = df[,1], 
    y = df[,2], 
    colorDensity = TRUE,
    continuousSet = "sambaNight",
    xlabel = "Log10 Unique Fragments",
    ylabel = "TSS Enrichment",
    xlim = c(log10(500), quantile(df[,1], probs = 0.99)),
    ylim = c(0, quantile(df[,2], probs = 0.99))
) + geom_hline(yintercept = 8, lty = "dashed") + geom_vline(xintercept = 3.25, lty = "dashed")

p

In [None]:
p1 <- plotGroups(
    ArchRProj = females_late, 
    groupBy = "Sample", 
    colorBy = "cellColData", 
    name = "TSSEnrichment",
    plotAs = "ridges"
   )
p1

In [None]:
p2 <- plotGroups(
    ArchRProj = females_late, 
    groupBy = "Sample", 
    colorBy = "cellColData", 
    name = "TSSEnrichment",
    plotAs = "violin",
    alpha = 0.4,
    addBoxPlot = TRUE
   )
p2

In [None]:
p3 <- plotGroups(
    ArchRProj = females_late, 
    groupBy = "Sample", 
    colorBy = "cellColData", 
    name = "log10(nFrags)",
    plotAs = "ridges"
   )
p3

In [None]:
p4 <- plotGroups(
    ArchRProj = females_late, 
    groupBy = "Sample", 
    colorBy = "cellColData", 
    name = "log10(nFrags)",
    plotAs = "violin",
    alpha = 0.4,
    addBoxPlot = TRUE
   )
p4

In [None]:
# plotPDF(p1,p2,p3,p4, name = "QC-Sample-Statistics.pdf", ArchRProj = females_late, 
#         addDOC = FALSE, width = 4, height = 4)

In [None]:
p5 <- plotFragmentSizes(ArchRProj = females_late)
p5

In [None]:
p6 <- plotTSSEnrichment(ArchRProj = females_late)
p6

In [None]:
# plotPDF(p5,p6, name = "QC-Sample-FragSizes-TSSProfile.pdf", ArchRProj = females_late, 
#         addDOC = FALSE, width = 5, height = 5)

In [None]:
# females_late <- filterDoublets(females_late)
# females_late

In [None]:
females_late <- addIterativeLSI(
    ArchRProj = females_late,
    useMatrix = "TileMatrix", 
    name = "IterativeLSI", 
    iterations = 2, 
    clusterParams = list( #See Seurat::FindClusters
        resolution = c(0.2), 
        sampleCells = 10000, 
        n.start = 10
    ), 
    varFeatures = 25000, 
    dimsToUse = 1:30
)

In [None]:
###########################
# Extract LSI coordinates #
###########################

lsi.mtx <- getReducedDims(
  ArchRProj = females_late,
  reducedDims = "IterativeLSI",
  returnMatrix = TRUE,
  dimsToUse = 1:30,
  scaleDims = TRUE,
  corCutOff = 0.75
)
dim(lsi.mtx)

In [None]:
library(uwot) # UMAP 

In [None]:
timepoints <- getCellColData(females_late)$stage
table(timepoints)

In [None]:
samples <- getCellColData(females_late)$Sample
table(samples)

In [None]:
timepoints <- getCellColData(females_late)$stage
samples <- getCellColData(females_late)$Sample

lsi_list <- lapply(unique(timepoints), function(i){
      sub_pc   <- lsi.mtx[timepoints == i, , drop = FALSE]
      sub_samp <- samples[timepoints == i]
      list     <- lapply(unique(sub_samp), function(j){ sub_pc[sub_samp == j, , drop = FALSE]})
      names(list) <- unique(sub_samp)
      return(list)
    })

names(lsi_list) <- unique(timepoints)

In [None]:
table(getCellColData(females_late)$Sample, getCellColData(females_late)$stage)

In [None]:
names(lsi_list) <- unique(timepoints)
timepoint_order <- c(12, 15, 16, 19, 20, 21) 
sample_order <- c("HD_F_GON14666992", "HCA_F_GON11173192_and_HCA_F_GON11212447", "HD_F_GON12877982",
                  "HD_F_GON13941947", "HD_F_GON13941946", "HD_F_GON14609874",
                  "HD_F_GON12449010", "HD_F_GON12449011",
                  "HD_F_GON13077785_and_HD_F_GON13094224", "HD_F_GON12877984", "HD_F_GON12877983", 
                 "HD_F_GON11282675", "HD_F_GON11282676", "HD_F_GON11389960", "HD_F_GON11389961")
#arrange to match timepoint order
lsi_list <- lsi_list[order(match(names(lsi_list), timepoint_order))]
lsi_list <- lapply(lsi_list, function(x){ x[order(match(names(x), sample_order))]})

In [None]:
library(batchelor)

In [None]:
packageVersion("matrixStats")

In [None]:
#perform corrections within stages
correct_list <- lapply(lsi_list, function(x){
    if(length(x) > 1){
        return(do.call(reducedMNN, x)$corrected)
      } else {
        return(x[[1]])
      }
    })

In [None]:
# perform correction over stages
lsi.mtx <- reducedMNN(correct_list, merge.order=1:length(correct_list))$corrected 
rm(correct_list,lsi_list)

In [None]:
library(tibble)
sample_metadata <- getCellColData(females_late) %>% as.data.table(keep.rownames = T)
names(sample_metadata)[names(sample_metadata) == 'rn'] <- 'cell'
head(sample_metadata)

In [None]:
pt.size <- ifelse(nrow(lsi.mtx)>=1e4,0.8,1.2)
n_neighbors <- 25
min_dist <- 0.50
dimred <- "IterativeLSI_MNN"

for (i in n_neighbors) {
  for (j in min_dist) {
    
    # Run UMAP
    set.seed(1996)
    umap_embedding.mtx <- umap(lsi.mtx, n_neighbors=i, min_dist=j, metric="cosine", fast_sgd = TRUE) %>% round(2)
    rownames(umap_embedding.mtx) <- rownames(lsi.mtx)
    
    # Fetch UMAP coordinates
    umap.dt <- umap_embedding.mtx %>%
    as.data.table(keep.rownames = T) %>%
    setnames(c("cell","umap1","umap2"))
    
    to.plot <- umap.dt %>% merge(sample_metadata, by="cell")
  }
}

In [None]:
# Set the plot size
options(repr.plot.width = 11, repr.plot.height = 11)

In [None]:
p <- ggplot(to.plot, aes_string(x="umap1", y="umap2", fill="Sample")) +
        geom_point(size=1, shape=21, stroke=0.05) +
        theme_classic() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
                                             text = element_text(size = 15), legend.position="bottom")
p

In [None]:
p2 <- ggplot(to.plot, aes_string(x="umap1", y="umap2", fill="Clusters2")) +
        geom_point(size=1, shape=21, stroke=0.05) +
        theme_classic() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
                                             text = element_text(size = 15), legend.position="bottom")
p2

In [None]:
p3 <- ggplot(to.plot, aes_string(x="umap1", y="umap2", fill="stage")) +
        geom_point(size=1, shape=21, stroke=0.05) +
        # ggrastr::geom_point_rast(size=1.5, shape=21, stroke=0.05) +  # DOES NOT WORK IN THE CLUSTER
        theme_classic() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
                                             text = element_text(size = 15), legend.position="bottom")
p3

In [None]:
p4 <- ggplot(to.plot, aes_string(x="umap1", y="umap2", fill="donor")) +
        geom_point(size=1, shape=21, stroke=0.05) +
        # ggrastr::geom_point_rast(size=1.5, shape=21, stroke=0.05) +  # DOES NOT WORK IN THE CLUSTER
        theme_classic() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
                                             text = element_text(size = 15), legend.position="right")
p4

In [None]:
write.csv(to.plot, "/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/females_late/umap_coords.csv", row.names = FALSE)

In [None]:
write.csv(lsi.mtx, "/nfs/team292/vl6/FetalReproductiveTract/ATAC_QC/ArchR/females_late/mnn_embedding.csv", row.names = FALSE)