### setup

In [None]:
library(orthogene)
library(Seurat)
library(dplyr)
library(ggplot2)
library(ggpubr)
library(viridis)

fig <- function(width, heigth){
 options(repr.plot.width = width, repr.plot.height = heigth)
 }

In [None]:
wd <- '~/codebases/MacBrainDev/'
setwd(wd)

dir.create('PCsMarkers', showWarnings = F)

data.dir <- 'data/'

base.name <- 'All.MNN.v1.org.fct'
# Define filename
indata.fname <- paste0(data.dir, base.name, '.rds')
pc.markers.file <- 'DiseaseGenesMarkersExpression/PCs_1/Disease-risk marker gene expression plot.NotExp.0.1.Exp.0.1.csv'

genesets <- readRDS(paste0(data.dir, 'ewce_important_genesets.rds'))

In [None]:
finish_plot_1 <- function(p){
    return(p + 
    geom_point(shape=16) +
    facet_grid(cols=vars(subclass),
               space='free', scales='free') + 
    labs(gene = "Disease",
         x = "Cell subtype",  
         y= 'Gene',
         size='Percentage expressed', 
         color='Average expression') + 

    scale_size_continuous(range=c(2,6)) +

    theme_pubr(base_family='Arial', legend = 'bottom') + labs_pubr() + 

    theme(strip.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5),
          axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, margin = margin(b=20))) +

    ggtitle(label = "Disease-risk & Patterning Center marker genes' expression") + 

    guides(color = guide_colorbar(title.position = "top", 
                                hjust = 0.5,# centres the title horizontally
                                title.hjust = 0.5,
                                label.position = "bottom"),
           size = guide_legend(title.position = "top", 
                                hjust = 0.5,# centres the title horizontally
                                title.hjust = 0.5,
                                label.position = "bottom")) +

    theme(
          panel.background = element_rect(fill = "#F2F2F2"),
          panel.grid.major = element_line(size = 0.5, linetype = 'solid', colour = "darkgrey")) +
    
    scale_color_gradient(low='lightgrey', high='red')
    # scale_color_viridis(direction=-1, option='magma', guide = "colourbar")
)
}
    

In [None]:
get_dis_plot <- function(genes){

    gene.in.dis <- lapply(genesets$disease_lists.data, function(x) {setNames(genes %in% x, genes)})

    gene.in.dis <- gene.in.dis[sapply(gene.in.dis, any)]

    exclusive.in.disease <- do.call('rbind', lapply(names(gene.in.dis), function(x){
        data.frame(Disease.listname=x, Gene=genes, Risk.gene=gene.in.dis[[x]])
    }))

    exclusive.in.disease$Disease.group <- factor(disease.groups[exclusive.in.disease$Disease.listname], levels=c('GWAS', 'MAGMA', 'DISGENET'))
    exclusive.in.disease$Disease.name <- disease.names[exclusive.in.disease$Disease.listname]
    exclusive.in.disease$Disease.name <- factor(exclusive.in.disease$Disease.name, sort(unique(exclusive.in.disease$Disease.name)))
    
    dis.exc.plot <- ggplot(exclusive.in.disease, 
           aes(x=Disease.name,
               y = factor(Gene, levels=genes),
               fill=Risk.gene)) + 
        facet_grid(cols=vars(Disease.group), scales='free', space='free') + 
        geom_tile(color='black') +
        scale_fill_manual(values=c('TRUE'='black', 'FALSE'='#F2F2F2')) + 
        labs(x = "",
             y = "Gene",  
             fill='Risk gene') + 

        theme_pubr(base_family='Arial', legend = 'none') + labs_pubr() +

        theme(strip.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5),
              axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +


        guides(fill = guide_legend(title.position = "top", 
                                    hjust = 0.5,# centres the title horizontally
                                    title.hjust = 0.5,
                                    label.position = "right", ncol=1)) +
        theme(panel.background = element_rect(fill = "#F2F2F2"), legend.margin=margin(t=10))

    return(list(p=dis.exc.plot, n=length(unique(exclusive.in.disease$Disease.listname))))
}

### load data

In [None]:
# Load data
indata <- readRDS(indata.fname)
# Print summary
indata
# gather info from clusters
clusters <- data.frame(unique(indata@meta.data[,c('subclass','subtype')]))
rownames(clusters) <- clusters$subtype
clusters

# convert human genes to macaque (121 orthologs)
exp.ort <- convert_orthologs(rownames(indata), input_species = 'mmulatta', output_species = 'hsapiens', verbose=T)
head(exp.ort)

data.mm.to.hs <- setNames(rownames(exp.ort), exp.ort$input_gene)
data.hs.to.mm <- setNames(exp.ort$input_gene, rownames(exp.ort))

hsdata <- CreateSeuratObject(counts = indata[['RNA']]@counts[names(data.mm.to.hs),], project = 'hs', assay = 'RNA', meta.data = indata@meta.data, 
                             row.names = data.mm.to.hs, min.cells = 0, min.features = 0)

hsdata <- SetAssayData(object = hsdata, slot = 'data', assay = 'RNA', new.data=indata[['RNA']]@data[names(data.mm.to.hs),])

hsdata

In [None]:
disease.listnames <- names(genesets$disease_lists.data)

disease.names <- setNames(sapply(strsplit(disease.listnames, split='::'), function(x){rev(x)[1]}), disease.listnames)

disease.groups <- setNames(sapply(strsplit(disease.listnames, split='::'), 
                                 function(x){if (length(x)==2) x[1] else 'GWAS'}), disease.listnames)

### prepare data

In [None]:
bname <- basename(pc.markers.file)

name.split <- strsplit(pc.markers.file, split='.', fixed=T)[[1]]
not.exp <- grep('NotExp', name.split, fixed=T)
min.exp <- as.numeric(paste(name.split[(not.exp+1):(not.exp+2)], collapse='.'))*100

print(pc.markers.file)
print(min.exp)

bname <- basename(pc.markers.file)

pc.markers.df <- read.csv(pc.markers.file, row.names=1)
risk.markers <- sort(unique(strsplit(paste(pc.markers.df$risk.markers, collapse=','), split=',', fixed=T)[[1]]))
risk.markers <- risk.markers[risk.markers!='']
print(length(risk.markers))

my.hsdata <- ScaleData(hsdata, block.size = 10000, features=risk.markers, vars.to.regress = 'nCount_RNA', verbose=T)





dp <- DotPlot(my.hsdata, features=rev(risk.markers), group.by = 'subtype')
dpdata <- dp$data

dpdata$subclass <- clusters[as.character(dpdata$id), 'subclass']

gene.max <- subset(dpdata, subclass == 'Patterning centers' & pct.exp >= min.exp) %>%
    group_by(features.plot) %>% 
    summarize(max.pct = id[order(avg.exp.scaled, decreasing = T)][1])

gene.order <- rev(unname(unlist(split(gene.max$features.plot, gene.max$max.pct)[levels(droplevels(subset(clusters, subclass=='Patterning centers' )$subtype))])))

dpdata$features.plot <- factor(dpdata$features.plot, levels=gene.order)

subclass.dont.count <- c('Mes', 'Patterning centers', 'RB&Vas', 'Immune')
if(!all(subclass.dont.count %in% unique(dpdata$subclass))){stop()}

gene.per.cluster.exp <- subset(dpdata, 
                               !subclass %in% subclass.dont.count) %>%
    group_by(id, features.plot) %>% 
    summarise(exp=pct.exp >= 10)

gene.exp <- gene.per.cluster.exp %>% 
    group_by(features.plot) %>% 
    summarize(exp=sum(exp), exclusive=sum(exp)<=3)

genes.exclusive <- subset(gene.exp, exclusive)$features.plot

message(bname)
print('- Exclusive genes:')
print(genes.exclusive)

### plot

In [None]:
# expression   
exp.plot.scaled <- finish_plot_1(ggplot(dpdata, 
                  aes(x=id,
                      y=features.plot,
                      size=ifelse(pct.exp < min.exp, NA, pct.exp),
                      color=avg.exp.scaled)) ) + ggtitle(label = "Disease-risk & Patterning Center marker genes' expression",
                                                         subtitle=paste0('Minimum expression shown: ', min.exp, '% of cells')) 

ggsave(exp.plot.scaled, 
       filename = paste0('PCsMarkers/', 'MarkersExpression.pdf'),
       width = 22, height = 27, limitsize = FALSE, useDingbats=FALSE)

# diseases
dis.exc.plot.list <- get_dis_plot(gene.order)
dis.n <- dis.exc.plot.list$n
dis.exc.plot <- dis.exc.plot.list$p


combined.plot <- cowplot::plot_grid(
    dis.exc.plot + theme(axis.text.y = element_blank()),
    exp.plot.scaled + ylab(''),
    rel_widths=c(4,13),
    align = 'h', axis = 'lbt')

cowplot::save_plot(combined.plot, filename='Supp.16.C.pdf',
                    base_height = 27, base_width = 30, limitsize = FALSE, useDingbats=FALSE)


# expression exclusive
exp.plot.scaled.top <- finish_plot_1(ggplot(filter(dpdata, features.plot %in% genes.exclusive), 
                  aes(x=id,
                      y=features.plot,
                      size=ifelse(pct.exp < min.exp, NA, pct.exp),
                      color=avg.exp.scaled)) ) + ggtitle(label = "Disease-risk & Patterning Center marker genes' expression",
                                                         subtitle=paste0('Minimum expression shown: ', min.exp, '% of cells')) 

ggsave(exp.plot.scaled.top, 
       filename = paste0('PCsMarkers/', 'ExclusiveMarkersExpression.pdf'),
       width = 22, height = 10, limitsize = FALSE, useDingbats=FALSE)

# diseases
dis.exc.plot.list.top <- get_dis_plot(genes.exclusive)
dis.n.top <- dis.exc.plot.list.top$n
dis.exc.plot.top <- dis.exc.plot.list.top$p


combined.plot.top <- cowplot::plot_grid(
    dis.exc.plot.top + theme(axis.text.y = element_blank()),
    exp.plot.scaled.top + ylab(''),
    rel_widths=c(2.7,13),
    align = 'h', axis = 'lbt')

cowplot::save_plot(combined.plot.top, filename='Fig.7.B.pdf',
                    base_height = 10, base_width = 25, limitsize = FALSE, useDingbats=FALSE)

In [None]:
fig(25,10)

combined.plot.top