### Combine the SoupX corrected count matrices and save as adata

In [1]:
# Load required libraries
library(reticulate)
library(Matrix)

In [2]:
# directories for each of the samples
sample_dirs <- list.dirs("../raw_data/", recursive=FALSE)
sample_dirs

In [3]:
# extract the names of each of the samples
sample_names <- basename(sample_dirs)

In [4]:
# extract the paths to the SoupX corrected matrices
SoupX_count_rds_paths <- paste0(sample_dirs, "/STAR/Solo.out/GeneFullsoupX_corrected_counts.rds")

In [5]:
output_adata_dir <- "adata_from_SoupX/"
dir.create(output_adata_dir)

“'adata_from_SoupX' already exists”


Iterate through each of the SoupX rds files and create a corresponding adata file in the output_adata_dir

In [6]:
num_SoupX_rds <- length(SoupX_count_rds_paths)

for (i in 1:num_SoupX_rds) {

    if (i %% 5 == 0) {
        print(i)
        flush.console()
    }
     
    sample_name <- sample_names[i]

    # load in the SoupX directory
    SoupX_count <- readRDS(SoupX_count_rds_paths[i])
    # transpose for adata format since the rows should correspond to the cells and the columns to the genes
    sparse_matrix <- t(SoupX_count)
    # Import the anndata module from Python
    anndata <- import("anndata")
    # Create an adata object, with the adata.obs as the rownames of the transposed sparse matrix 
    # and adata.var as the colnames
    adata <- anndata$AnnData(X = sparse_matrix, 
                            obs = data.frame(row.names = rownames(sparse_matrix)), 
                            var = data.frame(row.names = colnames(sparse_matrix)))
    
    # save the adata file
    output_adata_path <- paste0(output_adata_dir, sample_name, ".h5ad")
    adata$write(output_adata_path)
}

[1] 5
[1] 10
[1] 15
[1] 20
[1] 25
[1] 30
[1] 35
[1] 40
[1] 45
[1] 50
