### setup

In [None]:
library(orthogene)
library(Seurat)
library(dplyr)

wd <- '~/codebases/MacBrainDev/'
setwd(wd)

n.cores <- as.numeric(Sys.getenv('SLURM_CPUS_PER_TASK'))
n.cores <- if (is.na(n.cores)) parallel::detectCores() else n.cores

data.dir <- 'data/'

base.name <- 'All.MNN.v1.org.fct'

# Define filename
indata.fname <- paste0(data.dir, base.name, '.rds')
pc.groups.file <- 'raw/Patterning_center_cluster_order.tsv'
disease.lists.file <- paste0(data.dir, 'all_diseases_list.rds')

# Load data
indata <- readRDS(indata.fname)
# Print summary
indata

clusters <- data.frame(unique(indata@meta.data[,c('subclass','subtype')]))
rownames(clusters) <- clusters$subtype

clusters[clusters$subclass == 'Patterning centers',]

In [None]:
pc.groups <- read.table(pc.groups.file, sep='\t', skip = 1, row.names = 1)
pc.groups

# 1. All clusters
g1 <- levels(clusters$subclass)
# 2. All subclusters
g2 <- levels(clusters$subtype)

# 3. Groups de patterning centers, distintas versiones
g3 <- rownames(pc.groups)[pc.groups$V2==1]
g4 <- rownames(pc.groups)[pc.groups$V3==1]
g5 <- rownames(pc.groups)[pc.groups$V4==1]

# 4. Groups de 'Patterning centers' 'dorsal NSC' 'GE NSC'
g6 <- as.character(clusters$subtype[as.character(clusters$subclass)%in%c('Patterning centers', 'dorsal NSC', 'GE NSC')])
g7 <- as.character(clusters$subtype[as.character(clusters$subclass)%in%c('dorsal NSC', 'GE NSC')])

grupos <- list(
    All_clusters=list(group.by='subclass', idents=g1),
    All_subclusters=list(group.by='subtype', idents=g2),
    PCs_1=list(group.by='subtype', idents=g3),
    PCs_2=list(group.by='subtype', idents=g4),
    PCs_3=list(group.by='subtype', idents=g5),
    Progenitors=list(group.by='subtype', idents=g6),
    Neural_Stem_Cells=list(group.by='subtype', idents=g7))

for (gname in names(grupos)){
    grupos[[gname]][['group.name']] <- gname
}

grupos
saveRDS(grupos, file = 'ewce_exp_groups.rds')

In [None]:
## 2. INDATA (mmulatta) -> HSDATA (hsapiens)

exp.ort <- convert_orthologs(rownames(indata), input_species = 'mmulatta', output_species = 'hsapiens', verbose=T)

data.mm.to.hs <- setNames(rownames(exp.ort), exp.ort$input_gene)
data.hs.to.mm <- setNames(exp.ort$input_gene, rownames(exp.ort))

hsdata <- CreateSeuratObject(counts = indata[['RNA']]@counts[names(data.mm.to.hs),], project = 'hs', assay = 'RNA', meta.data = indata@meta.data, 
                             row.names = data.mm.to.hs, min.cells = 0, min.features = 0)
hsdata <- SetAssayData(object = hsdata, slot = 'data', assay = 'RNA', new.data=indata[['RNA']]@data[names(data.mm.to.hs),])

hsdata

In [None]:
## 3. Risk genes in data

disease_lists <- readRDS(disease.lists.file)
disease_genes <- unique(unlist(disease_lists))

disease_lists.data <- lapply(disease_lists, function(x){x[x%in%rownames(hsdata)]})
disease_genes.data <- unique(unlist(disease_lists.data))

important.gene.sets <- list(
    disease_lists=disease_lists,
    disease_lists.data=disease_lists.data,
    disease_genes=disease_genes,
    disease_genes.data=disease_genes.data,
    data.mm.to.hs=data.mm.to.hs,
    data.hs.to.mm=data.hs.to.mm,
    hs.data.genes=rownames(hsdata)
)

saveRDS(important.gene.sets, file = 'data/ewce_important_genesets.rds')

### retrieve data functions

In [None]:
### Subset data from groups
subset_grupo <- function(sobj, grupo){
    
    if (grupo$group.by == 'subtype'){
        subseurat <- subset(sobj, subset= subtype %in% grupo$idents)
    } else if (grupo$group.by == 'subclass'){
        subseurat <- subset(sobj, subset= subclass %in% grupo$idents)
    }
    Idents(subseurat) <- grupo$group.by
    return(subseurat)
}


### FindAllMarkers
markers_subdata <- function(subdata, 
                            features = NULL, 
                            max.cells.per.ident = 1000){
    
    markers <- FindAllMarkers(
        subdata,
        assay = 'RNA',
        features = features,
        logfc.threshold = 0.25,
        test.use = "wilcox",
        slot = "data",
        min.pct = 0.1,
        min.diff.pct = -Inf,
        node = NULL,
        verbose = TRUE,
        only.pos = TRUE,
        max.cells.per.ident = max.cells.per.ident,
        random.seed = 1,
        latent.vars = NULL,
        min.cells.feature = 3,
        min.cells.group = 3,
        pseudocount.use = 1,
        mean.fxn = NULL,
        fc.name = NULL,
        base = 2,
        return.thresh = 0.01,
        densify = FALSE,
        )
    return(markers)
    }

### Scale data using disease-risk & marker genes
scale_subdata <- function(subdata, 
                          features=NULL){
    
    print(paste('Scaling', length(features), 'genes'))
    
    subdata <- ScaleData(subdata,
                         assay = 'RNA',
                         features=features,
                         vars.to.regress = 'nCount_RNA',
                         split.by = NULL,
                         model.use = "linear",
                         use.umi = FALSE,
                         do.scale = TRUE,
                         do.center = TRUE,
                         scale.max = 1E6,
                         min.cells.to.block = 3000,
                         verbose = TRUE
            )
    return(subdata)
    }


### DotPlot data using disease genes
dp_subdata <- function(subdata, glists=disease_lists.data){
    
    dp.data <- do.call('rbind',
                       lapply(names(glists), 
                              function(dis){
                                  d <- DotPlot(subdata, features=glists[[dis]])$data
                                  d$Disease.listname <- dis
                                  d
                              }))
    return(dp.data)
}
    

### Get data meta-function
get_exp_data <- function(grupo, sobj=hsdata, cat=F
                        ){
    
    if (cat){
        
        report.fname <- paste0(data.dir, 'Disease_expression_data.Report.', grupo$group.name, '.txt')
        return(capture.output(get_exp_data(grupo=grupo, sobj=sobj, data.dir=data.dir, cat=F),
                              file = report.fname))
        
    }
    dir.create(data.dir)
    
    print(paste('INIT:', grupo$group.name))

    markers.fname <- paste0(data.dir, 'Disease_expression_data.Markers.', grupo$group.name, '.csv')    
    dpdata.fname <- paste0(data.dir, 'Disease_expression_data.DotPlot.', grupo$group.name, '.csv')
    scaled.fname <- paste0(data.dir, 'Disease_expression_data.ScaleData.', grupo$group.name, '.rds')
        
    if (all(sapply(c(markers.fname, dpdata.fname), file.exists))){
        print('done')
        return()
    }

    if (!all(sapply(c(dpdata.fname), file.exists))){
        if (file.exists(scaled.fname)){
            mode <- "load.rds"
        } else {
            mode <- 'ss+sc'
        }
    } else if (file.exists(markers.fname)){
        return()
    } else {
        mode <- 'ss'
    }
    
    print(mode)
    
    if (mode == 'load.rds'){
        print('Init load data')
        subdata <- readRDS(scaled.fname)
        print('Done load data')
    } else {
        print('Init subset data')
        subdata <- subset_grupo(sobj, grupo)
        print('Done subset data')
    }
    
    if (!file.exists(markers.fname)){
        print('Init markers')
        markers <- markers_subdata(subdata, max.cells.per.ident = Inf)
        write.csv(markers, markers.fname, row.names=T)
    } else {
        markers <- read.csv(markers.fname, row.names=1)
    }
    scale.genes <- union(markers$gene, disease_genes.data)
    print('Done markers')

    if (mode == 'ss+sc'){
        print('Init scale data')
        subdata <- scale_subdata(subdata, features=scale.genes)
        dim(subdata[['RNA']]@scale.data)
        
        if (!file.exists(scaled.fname)){
            print('Init save scale.data')
            saveRDS(object=subdata[['RNA']]@scale.data, file = scaled.fname)
        }
        print('Done save scale.data')
    }
    print('Done scale data')
    
    if (!file.exists(dpdata.fname)){
        print('Init dot plot data')
        dpdata <- dp_subdata(subdata)
        write.csv(dpdata, dpdata.fname, row.names=T)
    }
    print('Done dot plot data')


    print(paste('DONE:', grupo$group.name))
    return(NULL)
}


## Run
### Set markers for All_subgroups from markers used for EWCE

In [None]:
grupo <- grupos[['All_subclusters']]

In [None]:
subcs.markers.fname <- paste0(data.dir, 'ewce_downsampled_markers.All.MNN.v1.org.fct.dsInf.rds')
subcs.markers <- readRDS(subcs.markers.fname)
subcs.markers$gene <- data.mm.to.hs[subcs.markers$gene]
subcs.markers <- subset(subcs.markers, !is.na(gene))

In [None]:
markers.fname <- paste0( 'data/Disease_expression_data.Markers.', grupo$group.name, '.csv')    
markers.fname

write.csv(subcs.markers, markers.fname, row.names=T)

### Run all

In [None]:
grupos <- grupos[c('PCs_1', 'All_subclusters')]

In [None]:
library(parallel)

r <- mclapply(grupos, get_exp_data, mc.cores = length(grupos))

r