In [None]:
wd <- '~/codebases/MacBrainDev/'
setwd(wd)
dir.create('raw')
dir.create('data')

# Diseases

## Create table of disease-gene associations

### Look-up diseases in directory

In [None]:
diseases_dir <- 'raw/Disease_genes/'
dir(diseases_dir)

### DISGENET

Diseases to include,

In [None]:
disg.disnames.fname <- paste0('raw/Disgenet_forXoel.txt')
disg.disnames <- read.table(disg.disnames.fname, sep='\t', col.names=c('diseaseName', 'include'))

disgenet_diseases <- disg.disnames$diseaseName[as.logical(disg.disnames$include)]

Disgenet table

In [None]:
disgenet.fname <- paste0(diseases_dir, 'curated_gene_disease_associations.tsv')
disgenet.min_genes <- 30

disgenet <- read.csv(disgenet.fname, sep='\t', row.names=NULL)
disgenet[disgenet$diseaseName %in% c('Mixed oligoastrocytoma', 'oligodendroglioma'),'diseaseName'] <- 'M.Oligoastr+Oligodendrogliomas'

disgenet <- disgenet[disgenet$diseaseName %in% c(disgenet_diseases, 'M.Oligoastr+Oligodendrogliomas'),]

disgenet_genes <- lapply(split(disgenet$geneSymbol, disgenet$diseaseName), unique)
names(disgenet_genes) <- paste0('DISGENET::', names(disgenet_genes))

Meduloblastomas 

In [None]:
# Condense medulloblastomas
medullo <- unique(unlist(disgenet_genes[grep('(M|m)edullo', names(disgenet_genes), fixed=F, value=T)]))
disgenet_genes[grep('(M|m)edullo', names(disgenet_genes), fixed=F, value=T)] <- NULL
disgenet_genes[['DISGENET::Medulloblastomas']] <- medullo

Minimum number of genes

In [None]:
disgenet_genes <- disgenet_genes[sapply(disgenet_genes, length) > disgenet.min_genes]

Remove mixed gliomas

In [None]:
# Remove mixed gliomas
disgenet_genes['DISGENET::mixed gliomas'] <- NULL

### All other tables

In [None]:
dis_dfs <- setNames(lapply(grep('.txt', dir(diseases_dir), invert=F, value=T), function(x){read.table(paste0(diseases_dir,x))}),
                    sapply(grep('.txt', dir(diseases_dir), invert=F, value=T), tools::file_path_sans_ext))

dis_genes <- list()

for (dis in names(dis_dfs)){
    if (dis %in% c('ASD_CANCER','SFARI_DEVELOPMENT')){
        next
    }

    if (ncol(dis_dfs[[dis]]) > 1){
        colnames(dis_dfs[[dis]]) <- dis_dfs[[dis]][1,]
        dis_dfs[[dis]]<- dis_dfs[[dis]][-1,]
    } else {
        dis_genes[[dis]] <- as.character(dis_dfs[[dis]][,1])
        dis_dfs[[dis]] <- NULL
    }
}

DD

In [None]:
names(dis_genes) <- gsub('DD_genes', 'DD', names(dis_genes), fixed=T)

GWAS

In [None]:
dis_dfs[['GWAS_genes']] <- dis_dfs[['GWAS_genes']][order(dis_dfs[['GWAS_genes']]$Dataset),]
gwas_genes <- split(dis_dfs[['GWAS_genes']]$Gene, paste('GWAS', dis_dfs[['GWAS_genes']]$Dataset,
                                                        sep='::'))
dis_dfs[['GWAS_genes']] <- NULL

MAGMA

In [None]:
rownames(dis_dfs[['AllDiseasesMAGMA_GeneLevel_181220']]) <- dis_dfs[['AllDiseasesMAGMA_GeneLevel_181220']]$GENE_SYMBOL
dis_dfs[['AllDiseasesMAGMA_GeneLevel_181220']]$GENE_SYMBOL <- NULL
dis_dfs[['AllDiseasesMAGMA_GeneLevel_181220']]$GENE <- NULL
colnames(dis_dfs[['AllDiseasesMAGMA_GeneLevel_181220']]) <- paste('MAGMA', 
                                                                  colnames(dis_dfs[['AllDiseasesMAGMA_GeneLevel_181220']]),
                                                                  sep='::')

In [None]:
max.pval <- 0.05
top.magma <- 200

In [None]:
dis_dfs[['AllDiseasesMAGMA_GeneLevel_181220']] <- as.data.frame(apply(
    dis_dfs[['AllDiseasesMAGMA_GeneLevel_181220']], 
    2,
    as.numeric
    ), row.names=rownames(dis_dfs[['AllDiseasesMAGMA_GeneLevel_181220']]))

In [None]:
magma_genes <- lapply(
    dis_dfs[['AllDiseasesMAGMA_GeneLevel_181220']], 
    function(x){
        x<-setNames(x, rownames(dis_dfs[['AllDiseasesMAGMA_GeneLevel_181220']]))
        x <- x[order(x, na.last = TRUE, decreasing = FALSE)]
        x <- x[x < max.pval & !is.na(x)]
        head(names(x), min(length(x), top.magma))
    }
)

In [None]:
dis_dfs[['AllDiseasesMAGMA_GeneLevel_181220']] <- NULL

### Merge lists

In [None]:
dis_genes <- dis_genes[sort(names(dis_genes))]
disgenet_genes <- disgenet_genes[sort(names(disgenet_genes))]
gwas_genes <- gwas_genes[sort(names(gwas_genes))]
magma_genes <- magma_genes[sort(names(magma_genes))]

In [None]:
print('### SFARI + ASD + DD')
print(names(dis_genes))
print('### DISGENET')
print(names(disgenet_genes))
print('### GWAS')
print(names(gwas_genes))
print('### MAGMA')
print(names(magma_genes))

In [None]:
all_diseases <- c(dis_genes, gwas_genes, magma_genes, disgenet_genes)
names(all_diseases) <- gsub('_', ' ', names(all_diseases), fixed=T)



print('All')
print(names(all_diseases))

In [None]:
saveRDS(all_diseases, 'data/all_diseases_list.rds')

In [None]:
sapply(all_diseases, head)

## Order diseases

### Clustering

In [None]:
order_diss <- function(dis.lists){
    all.dis.genes <- unique(unlist(dis.lists))
    dis.df <- t(data.frame(row.names=all.dis.genes, 
                           lapply(dis.lists, function(x){as.numeric(all.dis.genes %in% x)})))
    eq_cols <- setNames(names(dis.lists), rownames(dis.df))
    hc <- hclust(dist(dis.df, method = 'binary'))
    return(dis.lists[eq_cols[hc$labels[hc$order]]])
}

In [None]:
clustered_diseases <- order_diss(all_diseases)

In [None]:
subclustered_diseases <- c(
    order_diss(dis_genes),
    order_diss(gwas_genes),
    order_diss(magma_genes),
    order_diss(disgenet_genes))

names(subclustered_diseases) <- gsub('_', ' ', names(subclustered_diseases), fixed=T)

### Groups and palette

In [None]:
dis.groups <- c(setNames(rep('SFARI + GWAS', length(names(c(dis_genes, gwas_genes)))),
         names(c(dis_genes, gwas_genes))),
                setNames(rep('MAGMA', length(names(magma_genes))),
                         names(magma_genes)),
                setNames(rep('DISGENET', length(names(disgenet_genes))),
                         names(disgenet_genes)))

In [None]:
dis.class <- c("ASD HC65" = 'ASD',
"DD"='PSY',
"SFARI Score1"='ASD', 
"SFARI Score2"='ASD', 
"SFARI Score3"='ASD', 
"SFARI Syndromic"='ASD',
"GWAS::ADHD 2019"='PSY', 
"GWAS::AD 2019"='PSY', 
"GWAS::AN 2019"='PSY', 
"GWAS::ASD 2019"='ASD',
"GWAS::BD 2019"='NDD',
"GWAS::IQ 2018"='PSY',
"GWAS::MDD 2018"='PSY',
"GWAS::NEUROT 2018"='PSY',
"GWAS::PD 2014"='NDD',
"GWAS::SCZ 2020"='PSY',
"MAGMA::ASD 2019"='ASD',
"MAGMA::ADHD 2019"='PSY',
"MAGMA::SCZ 2020"='PSY',
"MAGMA::BD 2019"='NDD',
"MAGMA::OCD 2018"='PSY',
"MAGMA::AN 2019"='PSY',
"MAGMA::TS 2019"='PSY',
"MAGMA::MDD 2018"='PSY',
"MAGMA::NEUROT 2018"='PSY',
"MAGMA::IQ 2018"='PSY',
"MAGMA::AD 2019"='PSY',
"MAGMA::PD 2014"='PSY',
"DISGENET::Giant Cell Glioblastoma"='CANCER 1',
"DISGENET::Glioblastoma"='CANCER 1',
"DISGENET::Glioblastoma Multiforme"='CANCER 1',
"DISGENET::Glioma"='CANCER 1',
"DISGENET::M.Oligoastr+Oligodendrogliomas"='CANCER 2',
"DISGENET::Malignant Glioma"='CANCER 1',
"DISGENET::Neuroblastoma"='CANCER 2',
"DISGENET::mixed gliomas"='CANCER 1',
"DISGENET::Medulloblastomas"='CANCER 2')

library(RColorBrewer)
class.cols <- setNames(brewer.pal(n = length(unique(dis.class)), name='Accent'), unique(dis.class))
dis.class.cols <- setNames(class.cols[dis.class], names(dis.class))


dis.class.cols <- dis.class.cols[unlist(unname(split(names(dis.class.cols), dis.class.cols)))]

In [None]:
DiseasesOrdered <- list(
    alphabetical=all_diseases,
    clustered_together=clustered_diseases,
    clustered_bygroup=subclustered_diseases,
    groups=dis.groups,
    class=dis.class,
    class.colors=class.cols,
    dis.class.colors=dis.class.cols
)

In [None]:
saveRDS(DiseasesOrdered, 'data/DiseaseListsInfo.rds')

# RNA Assay

---
title: Data description
author: Shaojie Ma
date: August 31, 2022
---

## Updated seurat object


### Object name: All.MNN.v1.org.rds
```R
## Cell class
object@meta.data$subclass

## Cell subtype
object@meta.data$subtype

## Age
object@meta.data$cbnage

## Region information
object@meta.data$lobe
```


### Subtype order
Subtype order is stored in this file: all.nhp.cbn.v6.txt
You will just need the "cluster" column and the current order is the cell type order.


### Subtype Colors (for MF7-A)
```R
## This is the colors we used in MF1
group_colors <- c(`Patterning centers` = "#821f44", 
					`dorsal NSC` = "#f573ee",
					enIPC = "#7ca4f9",
					`Excitatory neurons` = "#2166ac",
					`CR` = "#bccf42",
					`GE NSC` = "#f1b6da",
					inIPC = "#7fe63e",
					`Inhibitory neurons` = "#0e9c23",
					gIPC = "#ffc277",
					Astro = "#e08214",
					`OPC&Oligo` = "#ad630a",
					Mes = "#6aada3",
					Immune = "#7a7878",
					`RB&Vas` = "#525759",
					`PAT-related subtypes` = "#fa3980")

## For the disease main figure (MF7-A), I changed these non-significant subclasses to "lightgrey(#D3D3D3)"
## Also for better contrasting, I changed the colors of "Immune" & "CR(Cajal Retzius cells)"
## In case that some "lightgrey" cell types have significant scores in the updated dataset, maybe you can update their colors based on the above color codes.
group_colors <- c(`Patterning centers` = "#D3D3D3", 
					`dorsal NSC` = "#D3D3D3",
					enIPC = "#7ca4f9",
					`Excitatory neurons` = "#2166ac",
					`CR` = "#AF9AE2",
					`GE NSC` = "#D3D3D3",
					inIPC = "#7fe63e",
					`Inhibitory neurons` = "#0e9c23",
					gIPC = "#ffc277",
					Astro = "#e08214",
					`OPC&Oligo` = "#ad630a",
					Mes = "#D3D3D3",
					Immune = "#e05085",
					`RB&Vas` = "#D3D3D3",
					`PAT-related subtypes` = "#D3D3D3")
```





In [None]:
celltype.info <- data.table::fread('raw/all.nhp.cbn.v6.txt')
celltype.info

In [None]:
object <- readRDS('raw/All.MNN.v1.org.rds')

In [None]:
object

In [None]:
## Cell class
str(object@meta.data$subclass)

## Cell subtype
str(object@meta.data$subtype)

## Age
str(object@meta.data$cbnage)

## Region information
str(object@meta.data$lobe)


In object, subclass refers to label in the annotation, which is the subclass label.
In object, subtype refers to cluster in the annotation.

In [None]:
all(unique(object$subclass) %in% unique(celltype.info$label))

In [None]:
all(unique(object$subtype) %in% unique(celltype.info$cluster))

In [None]:
unique(object$subclass)
unique(celltype.info$label)

In [None]:
unique(object$subtype)
unique(celltype.info$cluster)

In [None]:
object$subclass <- factor(object$subclass,
                          levels=unique(celltype.info$label))

In [None]:
object$subtype <- factor(object$subtype,
                         unique(celltype.info$cluster))

In [None]:
saveRDS(object = object, 
        file = 'data/All.MNN.v1.org.fct.rds')

In [None]:
file.exists('data/All.MNN.v1.org.fct.rds')