# Set-up

In [None]:
# Define the base input directory
base_data_path <- '/users/genomics/xoel/Review_CD/ewce_linnarson.v7/data/'
in.dir <- base_data_path

In [2]:
library(glue)
library(Matrix)
library(spam)
library(spam64)
library(dplyr)

Spam version 2.10-0 (2023-10-23) is loaded.
Type 'help( Spam)' or 'demo( spam)' for a short introduction 
and overview of this package.
Help for individual functions is also obtained by adding the
suffix '.spam' to the function name, e.g. 'help( chol.spam)'.


Attaching package: ‘spam’


The following object is masked from ‘package:Matrix’:

    det


The following objects are masked from ‘package:base’:

    backsolve, forwardsolve



Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




# **Part 1: List the folders to process**

In [3]:
group_vars = c('Subregion', 'Age')

folders <- list()

region_dirs <- list.dirs(glue('{in.dir}/by_{group_vars[1]}/'), full.names = T, recursive = F)

for (region_dir in region_dirs){
    
    region <- basename(region_dir)
    
    age_dirs <- paste0(region_dir, '/by_', group_vars[2], '/') %>%
    list.dirs(full.names = T, recursive = F)
    
    for (age_dir in age_dirs){
        age <- basename(age_dir)

        d <- data.frame(list(region, age, normalizePath(age_dir)))
        colnames(d) <- c(group_vars, 'path')
        folders <- c(folders, list(d))
    }
}

folders <- do.call(folders, what = 'rbind')
folders$size <- paste('cd', folders$path, '; du -s ./') %>% sapply(system, intern = T) %>% gsub(pattern = '\t./', replacement = '', fixed=T) %>% as.numeric
folders <- folders %>% arrange(size)

# **Part 2: Load gene metadata**

In [None]:
gene.file <- glue('{in.dir}GeneInfo.csv')
Ginfo <- data.table::fread(gene.file, data.table=F)[,-1]
sum(duplicated(Ginfo$Gene))
Ginfo$Name <- make.unique(Ginfo$Gene)
sum(duplicated(Ginfo$Name))

In [5]:
dim(Ginfo)

# **Part 3: Helper functions**

In [6]:
options(future.globals.maxSize = 500*(1024**3))

In [7]:
# A function to normalize a expression matrix following EWCE functions
my.sct_normalize <- function (exp, verbose = TRUE) {
    
    requireNamespace("sctransform")
    
    if (verbose){print(dim(exp))}    
    gc()
    exp <- sctransform::vst(umi = exp, 
                            return_cell_attr = F, 
                            return_gene_attr = F, 
                            return_corrected_umi = T, 
                            verbosity = if (verbose) {2} else {0})$umi_corrected
    
    
    if (verbose){message(glue('[{Sys.time()}] MaxScaling...'))}
    exp <- Matrix::t(Matrix::t(exp) * (1/Matrix::colSums(exp)))
    if (verbose){print(dim(exp))}
    return(exp)
}

my_generate_ctd <- function(sct.exp, 
                            level1class, level2class = NULL,
                            as_sparse=F, 
                            as_DelayedArray = F,
                            input_species = 'hs', 
                            savePath = './',
                            groupName = 'CTD_expression',
                            no_cores = parallel::detectCores(),
                            return_ctd = F,
                            verbose = T){
    
    
    annotLevels <- list(level1class=level1class)
    if (!is.null(level2class)){
        annotLevels$level2class <- level2class
    }

    annotLevels <- lapply(names(annotLevels), function(n){
        cs <- annotLevels[[n]]
        print(length(cs))
        if (length(cs) != dim(sct.exp)[2]){
            stop(glue('Length of {n} {length(cs)} is not the same as expression matrix {dim(sct.exp)[2]}'))
        }
        if (!is.factor(cs)){
            cs <- factor(as.character(cs))
        }
        if (length(unique(cs)) == 1){
            print(glue('Number of unique groups in {n} is 1. It will not be used.'))
            return(NULL)
        }
        return(cs)
    }) %>% setNames(nm = names(annotLevels))
    annotLevels <- annotLevels[!sapply(annotLevels,is.null)]

    
    print(lapply(annotLevels, head))
    dir.create(savePath, showWarnings = F)
    
    # Generate celltype data
    CTD <- EWCE::generate_celltype_data(
        exp = sct.exp,
        annotLevels = annotLevels,
        as_sparse=as_sparse,
        as_DelayedArray=as_DelayedArray,
        input_species=input_species,
        savePath=savePath,
        groupName=groupName,
        no_cores=no_cores,
        return_ctd=return_ctd,
        verbose=verbose
    )
    return(CTD)
}


# A function to load a expression matrix, subset it, normalize it and save it
ctd_from_exp_path <- function(exp_path, cellmeta_path, ctd_folder, 
                              overwrite = F,
                              level1_var, level2_var = NULL, 
                              cellname_column = 'CellID', 
                              row.names = NULL, return_ctd = F,
                              verbose = T, ...){
    

    requireNamespace("Matrix")

    
    ctd_name <- c(level1_var, level2_var)
    ctd_name <- paste0('_', paste0(ctd_name[!is.null(ctd_name)], collapse = '.'))
    if (verbose){message(glue('[{Sys.time()}] Using name {ctd_name}'))}


    if (verbose){message(glue('[{Sys.time()}] Loading cell metadata...'))}
    cellmeta <- data.table::fread(cellmeta_path, nThread = parallel::detectCores(), data.table = F)

    if (verbose){message(glue('[{Sys.time()}] Retrieving classes...'))}
    level1class <- cellmeta[,level1_var, drop=T]
    level2class <- if(is.null(level2_var)){NULL}else{cellmeta[,level2_var, drop=T]}
    
    if (verbose){message(glue('[{Sys.time()}] Loading file...'))}
    mtx <- as(t(Matrix::readMM(exp_path)), 'CsparseMatrix')
    
    if (verbose){message(glue('[{Sys.time()}] Setting dimnames...'))}
    colnames(mtx) <- if(is.null(cellname_column)){as.character(1:nrow(mtx))}else{cellmeta[,cellname_column]}
    rownames(mtx) <- if(is.null(row.names)){as.character(1:nrow(mtx))}else{row.names}
    
    if (verbose){message(glue('[{Sys.time()}] Normalizing...'))}
    gc()
    mtx <- my.sct_normalize(mtx, verbose=verbose)

    gc()
    return(
        my_generate_ctd(
            sct.exp = mtx, 
            level1class=level1class, 
            level2class=level2class,
            savePath = ctd_folder, groupName = ctd_name,
            return_ctd = return_ctd, verbose = verbose)
    )
}

# **Part 4: Process folders**

In [8]:
# names of rows (Genes)
row.names <- Ginfo$Name

level1var <- 'ShortName'
level2var <- NULL


In [9]:
for (row in 1:nrow(folders)){
    path <- folders[row, 'path']
    print(glue('{row}/{nrow(folders)}'))
    print(folders[row,])
    
    cellmeta_path <- paste0(path, '/cell_meta.csv')
    expression_path <- paste0(path, '/Expression.sparseMatrix.mtx')

    ctd_from_exp_path(exp_path=expression_path, 
                      cellmeta_path=cellmeta_path,
                      ctd_folder=path, 
                      overwrite = F,
                      level1_var=level1var, level2_var=level2var, 
                      cellname_column = 'CellID', 
                      row.names = row.names, return_ctd = F,
                      verbose = T)
}

1/29
      Subregion Age
1 Telencephalon 6.7
                                                              path   size
1 /scratch/xoel/ewce_data.v7/by_Subregion/Telencephalon/by_Age/6.7 231588


[2024-11-28 14:58:47.123345] Using name _ShortName

[2024-11-28 14:58:47.127428] Loading cell metadata...

[2024-11-28 14:58:47.215291] Retrieving classes...

[2024-11-28 14:58:47.217697] Loading file...

[2024-11-28 14:58:54.294888] Setting dimnames...

[2024-11-28 14:58:54.297467] Normalizing...

Loading required namespace: sctransform



[1] 59480  6903


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 24419 by 6903

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 6903 cells





Found 72 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 24419 genes





Computing corrected count matrix for 24419 genes





Wall clock passed: Time difference of 1.152416 mins

[2024-11-28 15:00:04.092713] MaxScaling...



[1] 24419  6903
[1] 6903
$level1class
[1] ExNeuBl ExNeuBl ExNeuBl ExNeuBl ExNeuBl ExNeuBl
Levels: ExNeu ExNeuBl ExNeuIPC InNeu InNeuIPC RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 1.3 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

Loading required namespace: ggdendro

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Telencephalon/by_Age/6.7/ctd__ShortName.rda



2/29
  Subregion Age                                                         path
2 Subcortex 9.5 /scratch/xoel/ewce_data.v7/by_Subregion/Subcortex/by_Age/9.5
    size
2 279892


[2024-11-28 15:00:17.250336] Using name _ShortName

[2024-11-28 15:00:17.252597] Loading cell metadata...

[2024-11-28 15:00:17.320913] Retrieving classes...

[2024-11-28 15:00:17.322261] Loading file...

[2024-11-28 15:00:23.647444] Setting dimnames...

[2024-11-28 15:00:23.649379] Normalizing...



[1] 59480 12681


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 24749 by 12681

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 12681 cells





Found 68 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 24749 genes





Computing corrected count matrix for 24749 genes





Wall clock passed: Time difference of 2.012036 mins

[2024-11-28 15:02:25.362625] MaxScaling...



[1] 24749 12681
[1] 12681
$level1class
[1] GlioBl GlioBl GlioBl GlioBl GlioBl GlioBl
Levels: ExNeu ExNeuBl ExNeuIPC GlioBl InNeu InNeuIPC RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 2.3 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Subcortex/by_Age/9.5/ctd__ShortName.rda



3/29
    Subregion  Age
3 Hippocampus 14.0
                                                             path   size
3 /scratch/xoel/ewce_data.v7/by_Subregion/Hippocampus/by_Age/14.0 286936


[2024-11-28 15:02:30.751765] Using name _ShortName

[2024-11-28 15:02:30.754063] Loading cell metadata...

[2024-11-28 15:02:30.807688] Retrieving classes...

[2024-11-28 15:02:30.810278] Loading file...

[2024-11-28 15:02:37.369111] Setting dimnames...

[2024-11-28 15:02:37.370994] Normalizing...



[1] 59480  5034


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 28632 by 5034

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 5034 cells





Found 47 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 28632 genes





Computing corrected count matrix for 28632 genes





Wall clock passed: Time difference of 56.5915 secs

[2024-11-28 15:03:35.022229] MaxScaling...



[1] 28632  5034
[1] 5034
$level1class
[1] GlioBl GlioBl GlioBl GlioBl GlioBl GlioBl
Levels: COPs ExNeuBl ExNeuIPC GlioBl Imm InNeu



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 1.1 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Hippocampus/by_Age/14.0/ctd__ShortName.rda



4/29
  Subregion Age                                                         path
4 Subcortex 8.0 /scratch/xoel/ewce_data.v7/by_Subregion/Subcortex/by_Age/8.0
    size
4 323616


[2024-11-28 15:03:38.99414] Using name _ShortName

[2024-11-28 15:03:38.996618] Loading cell metadata...

[2024-11-28 15:03:39.065519] Retrieving classes...

[2024-11-28 15:03:39.067004] Loading file...

[2024-11-28 15:03:46.262418] Setting dimnames...

[2024-11-28 15:03:46.266335] Normalizing...



[1] 59480 11644


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 25450 by 11644

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 11644 cells





Found 96 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 25450 genes





Computing corrected count matrix for 25450 genes





Wall clock passed: Time difference of 2.001567 mins

[2024-11-28 15:05:47.526727] MaxScaling...



[1] 25450 11644
[1] 11644
$level1class
[1] GlioBl GlioBl GlioBl GlioBl GlioBl GlioBl
Levels: ExNeu ExNeuBl ExNeuIPC GlioBl InNeu InNeuBl InNeuIPC RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 2.2 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Subcortex/by_Age/8.0/ctd__ShortName.rda



5/29
  Subregion  Age                                                          path
5 Subcortex 13.0 /scratch/xoel/ewce_data.v7/by_Subregion/Subcortex/by_Age/13.0
    size
5 383016


[2024-11-28 15:05:53.631907] Using name _ShortName

[2024-11-28 15:05:53.634336] Loading cell metadata...

[2024-11-28 15:05:53.697642] Retrieving classes...

[2024-11-28 15:05:53.699442] Loading file...

[2024-11-28 15:06:02.60807] Setting dimnames...

[2024-11-28 15:06:02.61143] Normalizing...



[1] 59480  8557


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 28088 by 8557

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 8557 cells





Found 32 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 28088 genes





Computing corrected count matrix for 28088 genes





Wall clock passed: Time difference of 1.397725 mins

[2024-11-28 15:07:27.663378] MaxScaling...



[1] 28088  8557
[1] 8557
$level1class
[1] GlioBl GlioBl GlioBl GlioBl GlioBl GlioBl
Levels: ExNeu ExNeuIPC GlioBl InNeu InNeuIPC RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 1.8 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Subcortex/by_Age/13.0/ctd__ShortName.rda



6/29
  Subregion Age                                                      path
6    Cortex 9.5 /scratch/xoel/ewce_data.v7/by_Subregion/Cortex/by_Age/9.5
    size
6 414640


[2024-11-28 15:07:33.387488] Using name _ShortName

[2024-11-28 15:07:33.389886] Loading cell metadata...

[2024-11-28 15:07:33.462952] Retrieving classes...

[2024-11-28 15:07:33.464614] Loading file...

[2024-11-28 15:07:42.654641] Setting dimnames...

[2024-11-28 15:07:42.657446] Normalizing...



[1] 59480 15606


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 26046 by 15606

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 15606 cells





Found 88 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 26046 genes





Computing corrected count matrix for 26046 genes





Wall clock passed: Time difference of 2.863412 mins

[2024-11-28 15:10:35.583207] MaxScaling...



[1] 26046 15606
[1] 15606
$level1class
[1] Fibr Fibr Fibr Fibr Fibr Fibr
Levels: ExNeu ExNeuBl ExNeuIPC Fibr GlioBl InNeu InNeuIPC RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 3.0 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Cortex/by_Age/9.5/ctd__ShortName.rda



7/29
  Subregion Age                                                         path
7 Forebrain 9.2 /scratch/xoel/ewce_data.v7/by_Subregion/Forebrain/by_Age/9.2
    size
7 426304


[2024-11-28 15:10:42.356924] Using name _ShortName

[2024-11-28 15:10:42.360096] Loading cell metadata...

[2024-11-28 15:10:42.451325] Retrieving classes...

[2024-11-28 15:10:42.453315] Loading file...

[2024-11-28 15:10:52.111272] Setting dimnames...

[2024-11-28 15:10:52.113708] Normalizing...



[1] 59480 23569


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 27219 by 23569

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 23569 cells





Found 87 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 27219 genes





Computing corrected count matrix for 27219 genes





Wall clock passed: Time difference of 3.761197 mins

[2024-11-28 15:14:39.039624] MaxScaling...



[1] 27219 23569
[1] 23569
$level1class
[1] GlioBl GlioBl GlioBl GlioBl GlioBl GlioBl
Levels: ExNeu ExNeuBl ExNeuIPC GlioBl InNeu InNeuIPC RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 4.8 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Forebrain/by_Age/9.2/ctd__ShortName.rda



8/29
  Subregion Age                                                    path   size
8      Head 5.0 /scratch/xoel/ewce_data.v7/by_Subregion/Head/by_Age/5.0 484716


[2024-11-28 15:14:46.98318] Using name _ShortName

[2024-11-28 15:14:46.985539] Loading cell metadata...

[2024-11-28 15:14:47.046314] Retrieving classes...

[2024-11-28 15:14:47.048728] Loading file...

[2024-11-28 15:14:58.441116] Setting dimnames...

[2024-11-28 15:14:58.44305] Normalizing...



[1] 59480  6539


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 29755 by 6539

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 6539 cells





Found 50 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 29755 genes





Computing corrected count matrix for 29755 genes





Wall clock passed: Time difference of 1.22554 mins

[2024-11-28 15:16:13.157755] MaxScaling...



[1] 29755  6539
[1] 6539
$level1class
[1] Fibr Fibr Fibr Fibr Fibr Fibr
Levels: Fibr Plac Schwann



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 1.4 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Head/by_Age/5.0/ctd__ShortName.rda



9/29
  Subregion Age                                                      path
9    Cortex 9.2 /scratch/xoel/ewce_data.v7/by_Subregion/Cortex/by_Age/9.2
    size
9 537128


[2024-11-28 15:16:18.878017] Using name _ShortName

[2024-11-28 15:16:18.880697] Loading cell metadata...

[2024-11-28 15:16:18.963957] Retrieving classes...

[2024-11-28 15:16:18.965788] Loading file...

[2024-11-28 15:16:31.353432] Setting dimnames...

[2024-11-28 15:16:31.356544] Normalizing...



[1] 59480 22732


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 27601 by 22732

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 22732 cells



  |                                                                      |   0%

“NAs produced by integer overflow”




“NAs produced by integer overflow”




“NAs produced by integer overflow”




“NAs produced by integer overflow”




Found 91 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 27601 genes





Computing corrected count matrix for 27601 genes





Wall clock passed: Time difference of 3.863301 mins

[2024-11-28 15:20:24.265099] MaxScaling...



[1] 27601 22732
[1] 22732
$level1class
[1] GlioBl GlioBl GlioBl GlioBl GlioBl GlioBl
Levels: ExNeu ExNeuBl ExNeuIPC GlioBl InNeu RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 4.7 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Cortex/by_Age/9.2/ctd__ShortName.rda



10/29
     Subregion  Age
10 Hippocampus 12.0
                                                              path   size
10 /scratch/xoel/ewce_data.v7/by_Subregion/Hippocampus/by_Age/12.0 574880


[2024-11-28 15:20:33.069434] Using name _ShortName

[2024-11-28 15:20:33.071779] Loading cell metadata...

[2024-11-28 15:20:33.111168] Retrieving classes...

[2024-11-28 15:20:33.112822] Loading file...

[2024-11-28 15:20:47.012187] Setting dimnames...

[2024-11-28 15:20:47.015414] Normalizing...



[1] 59480  9948


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 30344 by 9948

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 9948 cells



  |                                                                      |   0%

“NAs produced by integer overflow”




“NAs produced by integer overflow”




“NAs produced by integer overflow”




“NAs produced by integer overflow”




Found 31 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 30344 genes





Computing corrected count matrix for 30344 genes





Wall clock passed: Time difference of 1.723936 mins

[2024-11-28 15:22:31.716086] MaxScaling...



[1] 30344  9948
[1] 9948
$level1class
[1] GlioBl GlioBl GlioBl GlioBl GlioBl GlioBl
Levels: ExNeu ExNeuBl ExNeuIPC GlioBl InNeu RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 2.2 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Hippocampus/by_Age/12.0/ctd__ShortName.rda



11/29
   Subregion Age                                                         path
11 Forebrain 6.6 /scratch/xoel/ewce_data.v7/by_Subregion/Forebrain/by_Age/6.6
     size
11 621816


[2024-11-28 15:22:37.694873] Using name _ShortName

[2024-11-28 15:22:37.69726] Loading cell metadata...

[2024-11-28 15:22:37.765369] Retrieving classes...

[2024-11-28 15:22:37.766816] Loading file...

[2024-11-28 15:22:50.998864] Setting dimnames...

[2024-11-28 15:22:51.001778] Normalizing...



[1] 59480 16514


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 29415 by 16514

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 16514 cells



  |                                                                      |   0%

“NAs produced by integer overflow”




“NAs produced by integer overflow”




“NAs produced by integer overflow”




“NAs produced by integer overflow”




Found 76 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 29415 genes





Computing corrected count matrix for 29415 genes





Wall clock passed: Time difference of 2.878552 mins

[2024-11-28 15:25:44.845711] MaxScaling...



[1] 29415 16514
[1] 16514
$level1class
[1] ExNeuBl ExNeuBl ExNeuBl ExNeuBl ExNeuBl ExNeuBl
Levels: ExNeu ExNeuBl ExNeuIPC InNeu InNeuBl RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 3.6 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Forebrain/by_Age/6.6/ctd__ShortName.rda



12/29
   Subregion  Age                                                         path
12  Striatum 14.0 /scratch/xoel/ewce_data.v7/by_Subregion/Striatum/by_Age/14.0
     size
12 639688


[2024-11-28 15:25:53.264112] Using name _ShortName

[2024-11-28 15:25:53.26644] Loading cell metadata...

[2024-11-28 15:25:53.336585] Retrieving classes...

[2024-11-28 15:25:53.338036] Loading file...

[2024-11-28 15:26:08.020486] Setting dimnames...

[2024-11-28 15:26:08.022427] Normalizing...



[1] 59480 13123


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 30791 by 13123

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 13123 cells



  |                                                                      |   0%

“NAs produced by integer overflow”




“NAs produced by integer overflow”




“NAs produced by integer overflow”




“NAs produced by integer overflow”




Found 45 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 30791 genes





Computing corrected count matrix for 30791 genes





Wall clock passed: Time difference of 2.259472 mins

[2024-11-28 15:28:24.769494] MaxScaling...



[1] 30791 13123
[1] 13123
$level1class
[1] GlioBl GlioBl GlioBl GlioBl GlioBl GlioBl
Levels: ExNeu ExNeuIPC GlioBl InNeu InNeuIPC RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 3.0 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Striatum/by_Age/14.0/ctd__ShortName.rda



13/29
   Subregion Age                                                      path
13    Cortex 8.0 /scratch/xoel/ewce_data.v7/by_Subregion/Cortex/by_Age/8.0
     size
13 703500


[2024-11-28 15:28:32.685934] Using name _ShortName

[2024-11-28 15:28:32.688573] Loading cell metadata...

[2024-11-28 15:28:32.766549] Retrieving classes...

[2024-11-28 15:28:32.768666] Loading file...

[2024-11-28 15:28:49.08123] Setting dimnames...

[2024-11-28 15:28:49.083616] Normalizing...



[1] 59480 18419


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 27709 by 18419

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 18419 cells



  |                                                                      |   0%

“NAs produced by integer overflow”




“NAs produced by integer overflow”




“NAs produced by integer overflow”




“NAs produced by integer overflow”




Found 109 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 27709 genes





Computing corrected count matrix for 27709 genes





Wall clock passed: Time difference of 3.028771 mins

[2024-11-28 15:31:52.103866] MaxScaling...



[1] 27709 18419
[1] 18419
$level1class
[1] GlioBl GlioBl GlioBl GlioBl GlioBl GlioBl
Levels: ExNeu ExNeuBl ExNeuIPC GlioBl InNeu RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 3.8 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Cortex/by_Age/8.0/ctd__ShortName.rda



14/29
   Subregion Age                                                         path
14 Subcortex 8.5 /scratch/xoel/ewce_data.v7/by_Subregion/Subcortex/by_Age/8.5
     size
14 825648


[2024-11-28 15:32:01.143846] Using name _ShortName

[2024-11-28 15:32:01.146202] Loading cell metadata...

[2024-11-28 15:32:01.213825] Retrieving classes...

[2024-11-28 15:32:01.215237] Loading file...

[2024-11-28 15:32:21.109485] Setting dimnames...

[2024-11-28 15:32:21.111458] Normalizing...



[1] 59480 13916


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 32307 by 13916

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 13916 cells



  |                                                                      |   0%

“NAs produced by integer overflow”




“NAs produced by integer overflow”




“NAs produced by integer overflow”




“NAs produced by integer overflow”




Found 50 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 32307 genes





Computing corrected count matrix for 32307 genes





Wall clock passed: Time difference of 2.406043 mins

[2024-11-28 15:34:46.730622] MaxScaling...



[1] 32307 13916
[1] 13916
$level1class
[1] GlioBl GlioBl GlioBl GlioBl GlioBl GlioBl
Levels: ExNeu ExNeuBl ExNeuIPC GlioBl InNeu InNeuIPC RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 3.3 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Subcortex/by_Age/8.5/ctd__ShortName.rda



15/29
   Subregion  Age                                                       path
15    Cortex 14.0 /scratch/xoel/ewce_data.v7/by_Subregion/Cortex/by_Age/14.0
     size
15 968556


[2024-11-28 15:34:56.277791] Using name _ShortName

[2024-11-28 15:34:56.280172] Loading cell metadata...

[2024-11-28 15:34:56.35094] Retrieving classes...

[2024-11-28 15:34:56.353074] Loading file...

[2024-11-28 15:35:19.033172] Setting dimnames...

[2024-11-28 15:35:19.035148] Normalizing...



[1] 59480 20255


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 33045 by 20255

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 20255 cells



  |                                                                      |   0%

“NAs produced by integer overflow”




“NAs produced by integer overflow”




“NAs produced by integer overflow”




“NAs produced by integer overflow”




Found 65 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 33045 genes





Computing corrected count matrix for 33045 genes





Wall clock passed: Time difference of 3.475498 mins

[2024-11-28 15:38:48.811849] MaxScaling...



[1] 33045 20255
[1] 20255
$level1class
[1] Eryt Eryt Eryt Eryt Eryt Eryt
Levels: Eryt ExNeu ExNeuBl ExNeuIPC GlioBl Imm InNeu Pery RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 5.0 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Cortex/by_Age/14.0/ctd__ShortName.rda



16/29
   Subregion Age                                                         path
16 Forebrain 7.5 /scratch/xoel/ewce_data.v7/by_Subregion/Forebrain/by_Age/7.5
      size
16 1046604


[2024-11-28 15:39:01.146768] Using name _ShortName

[2024-11-28 15:39:01.149274] Loading cell metadata...

[2024-11-28 15:39:01.242255] Retrieving classes...

[2024-11-28 15:39:01.243834] Loading file...

[2024-11-28 15:39:25.127088] Setting dimnames...

[2024-11-28 15:39:25.13054] Normalizing...



[1] 59480 38533


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 29817 by 38533

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 38533 cells





Found 17 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 29817 genes





Computing corrected count matrix for 29817 genes





Wall clock passed: Time difference of 6.127901 mins

[2024-11-28 15:45:34.241385] MaxScaling...



[1] 29817 38533
[1] 38533
$level1class
[1] GlioBl GlioBl GlioBl GlioBl GlioBl GlioBl
Levels: ExNeu ExNeuBl ExNeuIPC GlioBl InNeu InNeuIPC RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 8.6 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Forebrain/by_Age/7.5/ctd__ShortName.rda



17/29
   Subregion  Age                                                          path
17 Forebrain 10.0 /scratch/xoel/ewce_data.v7/by_Subregion/Forebrain/by_Age/10.0
      size
17 1079168


[2024-11-28 15:45:48.02532] Using name _ShortName

[2024-11-28 15:45:48.027681] Loading cell metadata...

[2024-11-28 15:45:48.123256] Retrieving classes...

[2024-11-28 15:45:48.124788] Loading file...

[2024-11-28 15:46:11.992585] Setting dimnames...

[2024-11-28 15:46:11.994541] Normalizing...



[1] 59480 37180


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 30153 by 37180

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 37180 cells





Found 7 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 30153 genes





Computing corrected count matrix for 30153 genes





Wall clock passed: Time difference of 6.003 mins

[2024-11-28 15:52:13.542571] MaxScaling...



[1] 30153 37180
[1] 37180
$level1class
[1] GlioBl GlioBl GlioBl GlioBl GlioBl GlioBl
Levels: ExNeu ExNeuBl ExNeuIPC GlioBl InNeu OPC RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 8.4 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Forebrain/by_Age/10.0/ctd__ShortName.rda



18/29
   Subregion Age                                                         path
18 Forebrain 5.5 /scratch/xoel/ewce_data.v7/by_Subregion/Forebrain/by_Age/5.5
      size
18 1151868


[2024-11-28 15:52:29.518347] Using name _ShortName

[2024-11-28 15:52:29.520899] Loading cell metadata...

[2024-11-28 15:52:29.558993] Retrieving classes...

[2024-11-28 15:52:29.560747] Loading file...

[2024-11-28 15:52:58.29123] Setting dimnames...

[2024-11-28 15:52:58.29478] Normalizing...



[1] 59480 17574


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 32754 by 17574

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 17574 cells





Found 89 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 32754 genes





Computing corrected count matrix for 32754 genes





Wall clock passed: Time difference of 3.169592 mins

[2024-11-28 15:56:10.053575] MaxScaling...



[1] 32754 17574
[1] 17574
$level1class
[1] Eryt Eryt Eryt Eryt Eryt Eryt
Levels: Eryt ExNeu ExNeuBl ExNeuIPC Fibr InNeu InNeuBl RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 4.3 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Forebrain/by_Age/5.5/ctd__ShortName.rda



19/29
   Subregion Age                                                         path
19 Forebrain 5.0 /scratch/xoel/ewce_data.v7/by_Subregion/Forebrain/by_Age/5.0
      size
19 1158112


[2024-11-28 15:56:23.116874] Using name _ShortName

[2024-11-28 15:56:23.119263] Loading cell metadata...

[2024-11-28 15:56:23.193474] Retrieving classes...

[2024-11-28 15:56:23.195578] Loading file...

[2024-11-28 15:56:50.527529] Setting dimnames...

[2024-11-28 15:56:50.530369] Normalizing...



[1] 59480 25980


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 30697 by 25980

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 25980 cells





Found 89 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 30697 genes





Computing corrected count matrix for 30697 genes





Wall clock passed: Time difference of 4.310145 mins

[2024-11-28 16:01:10.232287] MaxScaling...



[1] 30697 25980
[1] 25980
$level1class
[1] ExNeuBl ExNeuBl ExNeuBl ExNeuBl ExNeuBl ExNeuBl
Levels: ExNeu ExNeuBl ExNeuIPC InNeu InNeuBl RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 5.9 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Forebrain/by_Age/5.0/ctd__ShortName.rda



20/29
   Subregion  Age                                                       path
20    Cortex 13.0 /scratch/xoel/ewce_data.v7/by_Subregion/Cortex/by_Age/13.0
      size
20 1171456


[2024-11-28 16:01:23.555709] Using name _ShortName

[2024-11-28 16:01:23.558053] Loading cell metadata...

[2024-11-28 16:01:23.648511] Retrieving classes...

[2024-11-28 16:01:23.650715] Loading file...

[2024-11-28 16:01:51.057647] Setting dimnames...

[2024-11-28 16:01:51.06121] Normalizing...



[1] 59480 20734


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 32440 by 20734

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 20734 cells





Found 60 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 32440 genes





Computing corrected count matrix for 32440 genes





Wall clock passed: Time difference of 3.569477 mins

[2024-11-28 16:05:26.705348] MaxScaling...



[1] 32440 20734
[1] 20734
$level1class
[1] Eryt Eryt Eryt Eryt Eryt Eryt
Levels: Eryt ExNeu ExNeuBl ExNeuIPC GlioBl Imm InNeu OPC RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 5.0 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Cortex/by_Age/13.0/ctd__ShortName.rda



21/29
   Subregion  Age                                                       path
21    Cortex 12.0 /scratch/xoel/ewce_data.v7/by_Subregion/Cortex/by_Age/12.0
      size
21 1264352


[2024-11-28 16:05:40.200557] Using name _ShortName

[2024-11-28 16:05:40.202966] Loading cell metadata...

[2024-11-28 16:05:40.269767] Retrieving classes...

[2024-11-28 16:05:40.271204] Loading file...

[2024-11-28 16:06:11.115378] Setting dimnames...

[2024-11-28 16:06:11.118863] Normalizing...



[1] 59480 18434


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 33211 by 18434

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 18434 cells





Found 45 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 33211 genes





Computing corrected count matrix for 33211 genes





Wall clock passed: Time difference of 3.337141 mins

[2024-11-28 16:09:32.852985] MaxScaling...



[1] 33211 18434
[1] 18434
$level1class
[1] Eryt Eryt Eryt Eryt Eryt Eryt
Levels: Eryt ExNeu ExNeuBl ExNeuIPC GlioBl InNeu Pery RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 4.6 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Cortex/by_Age/12.0/ctd__ShortName.rda



22/29
   Subregion  Age                                                          path
22 Subcortex 12.0 /scratch/xoel/ewce_data.v7/by_Subregion/Subcortex/by_Age/12.0
      size
22 1348348


[2024-11-28 16:09:46.820424] Using name _ShortName

[2024-11-28 16:09:46.822908] Loading cell metadata...

[2024-11-28 16:09:46.910348] Retrieving classes...

[2024-11-28 16:09:46.911812] Loading file...

[2024-11-28 16:10:18.895221] Setting dimnames...

[2024-11-28 16:10:18.897175] Normalizing...



[1] 59480 28217


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 34076 by 28217

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 28217 cells





Found 107 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 34076 genes





Computing corrected count matrix for 34076 genes





Wall clock passed: Time difference of 4.906711 mins

[2024-11-28 16:15:14.644751] MaxScaling...



[1] 34076 28217
[1] 28217
$level1class
[1] Eryt Eryt Eryt Eryt Eryt Eryt
12 Levels: Endo Eryt ExNeu ExNeuBl ExNeuIPC GlioBl Imm InNeu InNeuIPC ... RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 7.2 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Subcortex/by_Age/12.0/ctd__ShortName.rda



23/29
   Subregion  Age                                                          path
23 Subcortex 11.5 /scratch/xoel/ewce_data.v7/by_Subregion/Subcortex/by_Age/11.5
      size
23 1479148


[2024-11-28 16:15:30.647633] Using name _ShortName

[2024-11-28 16:15:30.650097] Loading cell metadata...

[2024-11-28 16:15:30.737191] Retrieving classes...

[2024-11-28 16:15:30.739312] Loading file...

[2024-11-28 16:16:06.982768] Setting dimnames...

[2024-11-28 16:16:06.986171] Normalizing...



[1] 59480 25699


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 33934 by 25699

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 25699 cells





Found 93 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 33934 genes





Computing corrected count matrix for 33934 genes





Wall clock passed: Time difference of 4.395787 mins

[2024-11-28 16:20:32.336175] MaxScaling...



[1] 33934 25699
[1] 25699
$level1class
[1] Eryt Eryt Eryt Eryt Eryt Eryt
12 Levels: Endo Eryt ExNeu ExNeuBl ExNeuIPC GlioBl Imm InNeu InNeuIPC ... RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 6.5 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Subcortex/by_Age/11.5/ctd__ShortName.rda



24/29
   Subregion Age                                                         path
24 Forebrain 8.5 /scratch/xoel/ewce_data.v7/by_Subregion/Forebrain/by_Age/8.5
      size
24 1620388


[2024-11-28 16:20:46.648052] Using name _ShortName

[2024-11-28 16:20:46.650528] Loading cell metadata...

[2024-11-28 16:20:46.756599] Retrieving classes...

[2024-11-28 16:20:46.759017] Loading file...

[2024-11-28 16:21:23.506892] Setting dimnames...

[2024-11-28 16:21:23.510293] Normalizing...



[1] 59480 50305


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 31056 by 50305

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 50305 cells





Found 30 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 31056 genes





Computing corrected count matrix for 31056 genes





Wall clock passed: Time difference of 7.977418 mins

[2024-11-28 16:29:23.423696] MaxScaling...



[1] 31056 50305
[1] 50305
$level1class
[1] Eryt Eryt Eryt Eryt Eryt Eryt
12 Levels: Eryt ExNeu ExNeuBl ExNeuIPC Fibr GlioBl Imm InNeu ... RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 11.6 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Forebrain/by_Age/8.5/ctd__ShortName.rda



25/29
   Subregion  Age                                                       path
25    Cortex 10.0 /scratch/xoel/ewce_data.v7/by_Subregion/Cortex/by_Age/10.0
      size
25 1775284


[2024-11-28 16:29:43.498703] Using name _ShortName

[2024-11-28 16:29:43.501509] Loading cell metadata...

[2024-11-28 16:29:43.626671] Retrieving classes...

[2024-11-28 16:29:43.62947] Loading file...

[2024-11-28 16:30:25.618175] Setting dimnames...

[2024-11-28 16:30:25.620485] Normalizing...



[1] 59480 51210


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 31140 by 51210

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 51210 cells





Found 12 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 31140 genes





Computing corrected count matrix for 31140 genes





Wall clock passed: Time difference of 8.073132 mins

[2024-11-28 16:38:31.631256] MaxScaling...



[1] 31140 51210
[1] 51210
$level1class
[1] GlioBl GlioBl GlioBl GlioBl GlioBl GlioBl
Levels: ExNeu ExNeuBl ExNeuIPC GlioBl Imm InNeu RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 11.9 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Cortex/by_Age/10.0/ctd__ShortName.rda



26/29
   Subregion Age                                                      path
26    Cortex 6.9 /scratch/xoel/ewce_data.v7/by_Subregion/Cortex/by_Age/6.9
      size
26 1815616


[2024-11-28 16:38:52.924105] Using name _ShortName

[2024-11-28 16:38:52.926831] Loading cell metadata...

[2024-11-28 16:38:53.010751] Retrieving classes...

[2024-11-28 16:38:53.013801] Loading file...

[2024-11-28 16:39:36.035235] Setting dimnames...

[2024-11-28 16:39:36.037385] Normalizing...



[1] 59480 35533


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 33667 by 35533

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 35533 cells





Found 30 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 33667 genes





Computing corrected count matrix for 33667 genes





Wall clock passed: Time difference of 6.041704 mins

[2024-11-28 16:45:40.277829] MaxScaling...



[1] 33667 35533
[1] 35533
$level1class
[1] GlioBl GlioBl GlioBl GlioBl GlioBl GlioBl
Levels: ExNeu ExNeuBl ExNeuIPC GlioBl InNeu InNeuIPC RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 8.9 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Cortex/by_Age/6.9/ctd__ShortName.rda



27/29
   Subregion Age                                                        path
27  Striatum 6.9 /scratch/xoel/ewce_data.v7/by_Subregion/Striatum/by_Age/6.9
      size
27 2921816


[2024-11-28 16:45:58.944648] Using name _ShortName

[2024-11-28 16:45:58.947123] Loading cell metadata...

[2024-11-28 16:45:59.092379] Retrieving classes...

[2024-11-28 16:45:59.094519] Loading file...

[2024-11-28 16:47:08.12964] Setting dimnames...

[2024-11-28 16:47:08.133054] Normalizing...



[1] 59480 94494


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 35146 by 94494

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 94494 cells





Found 12 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 35146 genes





Computing corrected count matrix for 35146 genes





Wall clock passed: Time difference of 16.05395 mins

[2024-11-28 17:03:13.18418] MaxScaling...



[1] 35146 94494
[1] 94494
$level1class
[1] Eryt Eryt Eryt Eryt Eryt Eryt
Levels: Eryt ExNeu ExNeuBl ExNeuIPC GlioBl Imm InNeu InNeuIPC Pery RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 24.7 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Striatum/by_Age/6.9/ctd__ShortName.rda



28/29
   Subregion  Age                                                       path
28    Cortex 11.5 /scratch/xoel/ewce_data.v7/by_Subregion/Cortex/by_Age/11.5
      size
28 3100592


[2024-11-28 17:03:52.94269] Using name _ShortName

[2024-11-28 17:03:52.945504] Loading cell metadata...

[2024-11-28 17:03:53.042579] Retrieving classes...

[2024-11-28 17:03:53.04628] Loading file...

[2024-11-28 17:05:07.457139] Setting dimnames...

[2024-11-28 17:05:07.45927] Normalizing...



[1] 59480 46275


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 35440 by 46275

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 46275 cells





Found 15 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 35440 genes





Computing corrected count matrix for 35440 genes





Wall clock passed: Time difference of 8.097476 mins

[2024-11-28 17:13:15.620568] MaxScaling...



[1] 35440 46275
[1] 46275
$level1class
[1] Eryt Eryt Eryt Eryt Eryt Eryt
12 Levels: Eryt ExNeu ExNeuBl ExNeuIPC Fibr GlioBl Imm InNeu InNeuIPC ... RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 12.2 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Cortex/by_Age/11.5/ctd__ShortName.rda



29/29
   Subregion Age                                                     path
29     Brain 6.0 /scratch/xoel/ewce_data.v7/by_Subregion/Brain/by_Age/6.0
      size
29 5814324


[2024-11-28 17:13:41.926394] Using name _ShortName

[2024-11-28 17:13:41.928809] Loading cell metadata...

[2024-11-28 17:13:42.049043] Retrieving classes...

[2024-11-28 17:13:42.052426] Loading file...

[2024-11-28 17:15:51.683852] Setting dimnames...

[2024-11-28 17:15:51.686577] Normalizing...



[1] 59480 85160


Calculating cell attributes from input UMI matrix: log_umi

Variance stabilizing transformation of count matrix of size 37851 by 85160

Model formula is y ~ log_umi

Get Negative Binomial regression parameters per gene

Using 2000 genes, 85160 cells





Found 13 outliers - those will be ignored in fitting/regularization step


Second step: Get residuals using fitted parameters for 37851 genes





Computing corrected count matrix for 37851 genes





Wall clock passed: Time difference of 13.96203 mins

[2024-11-28 17:29:50.57417] MaxScaling...



[1] 37851 85160
[1] 85160
$level1class
[1] Imm Imm Imm Imm Imm Imm
Levels: ExNeu ExNeuBl ExNeuIPC Imm InNeu InNeuBl InNeuIPC Pery RGC



96 core(s) assigned as workers (0 reserved).

“sparse->dense coercion: allocating vector of size 24.0 GiB”
+ Calculating normalized mean expression.

Converting to sparse matrix.

+ Calculating normalized specificity.

Converting to sparse matrix.

Converting to sparse matrix.

+ Saving results ==>  /scratch/xoel/ewce_data.v7/by_Subregion/Brain/by_Age/6.0/ctd__ShortName.rda

