In [1]:
inst <- suppressMessages(lapply(c('DEP',
                                  'SummarizedExperiment',
                                  'pheatmap',
                                  'ggplot2',
                                  'EnhancedVolcano',
                                  'VennDiagram',
                                  'readxl',
                                  'tidyverse'),
                                library,
                                character.only = TRUE)
) 

“mzR has been built against a different Rcpp version (1.0.4.6)
than is installed on your system (1.0.6). This might lead to errors
when loading mzR. If you encounter such issues, please send a report,
including the output of sessionInfo() to the Bioc support forum at 
https://support.bioconductor.org/. For details see also
https://github.com/sneumann/mzR/wiki/mzR-Rcpp-compiler-linker-issue.”


Set whether to produce plots, set to False for test runs.

In [2]:
bool_plot = FALSE

In [3]:
input_file = '~/Documents/consultation/Luiza/omics_data/proteomics/data/astrocyteprotoeme12102017-1.xlsx'
options(repr.plot.width=8, repr.plot.height=8)

## Compare Acsa2 positive vs. negative

In [4]:
# load the imputed and normalized data from 
datraw = read_excel(input_file,sheet=2)[,c(4,3,11:57)]

In [5]:
dat <- apply(datraw[,-c(1,2)], 2, function(x) as.numeric(x))
gen <- as.character(as.matrix(datraw[,1]))

In [6]:
dat <-aggregate(dat,by=list(gen),FUN=mean,na.rm=TRUE)
rownames(dat) <- dat[,1]
dat <- dat[,-1]

In [7]:
astro = c('Slc1a2', 'Slc1a3', 'Aqp4', 'S100b', 'Sox9','Gfap','Aldh1l1','Gja1','Gjb6','Agt','Atp1b2')
neuro = c('Syp', 'Tubb3','Snap25','Syt1')
microglia = c('Itgam','Aif1')
oligodendrocyte = c('Mog','Mag')
endothelial = c('Slco1c1')
mural = c('Mustn1','Pdgfrb','Des')
tanycyes = c('Crym')
npc = c('Nes','Sox2','Prom1')
vlmc = c('Col1a1')

In [8]:
datfin <- data.frame(CTX=apply(dat %>% select(ends_with("CTX")), 1, mean),
                     HPT=apply(dat %>% select(ends_with("HPT")), 1, mean),
                     Hippo=apply(dat %>% select(ends_with("Hippo")), 1, mean),
                     CTX_FT=apply(dat %>% select(ends_with("CTX_FT")), 1, mean),
                     HPT_FT=apply(dat %>% select(ends_with("HPT_FT")), 1, mean),
                     Hippo_FT=apply(dat %>% select(ends_with("Hippo_FT")), 1, mean))

In [9]:
astro = datfin[rownames(datfin)%in%c('Slc1a2', 'Slc1a3', 'Aqp4', 'S100b', 'Sox9','Gfap','Aldh1l1','Gja1','Gjb6','Agt','Atp1b2'),]
neuro = datfin[rownames(datfin)%in%c('Syp', 'Tubb3','Snap25','Syt1'),]
mglia = datfin[rownames(datfin)%in%c('Itgam','Aif1'),]
oligo = datfin[rownames(datfin)%in%c('Mog','Mag'),]
endo = datfin[rownames(datfin)%in%c('Slco1c1'),]
mural = datfin[rownames(datfin)%in%c('Mustn1','Pdgfrb','Des'),]
tcyes = datfin[rownames(datfin)%in%c('Crym'),]
npc = datfin[rownames(datfin)%in%c('Nes','Sox2','Prom1'),]
vlmc = datfin[rownames(datfin)%in%c('Col1a1'),]

### heatmap

In [10]:
if(bool_plot){
    mat <- as.matrix(rbind(astro,neuro,mglia,oligo,endo,mural,tcyes,npc,vlmc))
    my_palette <- colorRampPalette(c("blue","white","red"))(n = 255)

    Acsa2 <- c(rep("Acsa2_pos",3),rep("Acsa2_neg",3))
    Tissue <- c(rep(c("cortex","hypothalamus","hippocampus"),2))
    dcol <- as.data.frame(cbind(Tissue,Acsa2))
    rownames(dcol) <- colnames(mat)

    Cell_types <- c(rep("astrocytes",11),rep("neurons",4),rep("microglia",2),rep("oligodendrocyte",2),
                    "endothelial",rep("mural",3),"tanycyes",rep("npc",3),"vlmc")
    drow <- as.data.frame(cbind(Cell_types))
    rownames(drow) <- rownames(mat)

    pheatmap(mat, scale = "row", show_rownames=T, color=my_palette, cluster_cols=F, cluster_rows=F, gaps_col=3,
             gaps_row=c(11,15,17,19,20,23,24,27,28), annotation_col=dcol, annotation_row=drow, fontsize=16,
             main="Acsa2 positive vs. negative") 
}

### pca plot

In [11]:
df_pca <- prcomp(t(dat))
df_out <- as.data.frame(df_pca$x)
df_out$group <- c(rep("Acsa2_pos",24),rep("Acsa2_neg",23))
df_out$tissue <- c(rep("hippocampus",8),rep("hypothalamus",8),rep("cortex",8), 
                   rep("cortex",8),rep("hypothalamus",8),rep("hippocampus",7))

In [12]:
if(bool_plot){
    ggplot(df_out, aes(x = PC1, y = PC2, color = group, shape = tissue)) +
           geom_point(size = 6) + 
           theme(axis.text = element_text(size=20), 
                 axis.title = element_text(size=20), 
                 legend.title = element_text(siz=20),
                 legend.text = element_text(size=20),
                 plot.title = element_text(size=20)) + 
           ggtitle("PCA of Acsa2 positive vs. negative")
}

### volcano plot

In [13]:
ft <- read_excel(input_file, sheet=2)[,c(4,6,10)]
colnames(ft) <- c("GeneName","pvalue","log2FoldChange")
for (i in 1:nrow(ft)) ft[i,2] <- 10 ^ -ft[i,2]

In [14]:
if(bool_plot){ 
    EnhancedVolcano(ft,
                    lab = ft$GeneName,
                    pCutoff = 0.05,
                    x = 'log2FoldChange',
                    y = 'pvalue',
                    ylim = c(-0, 25),
                    col = c("grey30", "grey30", "red2", "red2"),
                    title = "Acsa2_neg vs. Acsa2_pos")
}

In [15]:
if(bool_plot){ 
    EnhancedVolcano(ft,
                    lab = ft$GeneName,
                    pCutoff = 0.05,
                    FCcutoff = 10,
                    x = 'log2FoldChange',
                    y = 'pvalue',
                    ylim = c(-0, 25),
                    selectLab = rownames(mat),
                    labSize = 5,
                    labCol = 'green',
                    col = c("grey30", "grey30", "red2", "red2"),
                    title = "Acsa2_neg vs. Acsa2_pos")
}

## Differential expression analysis visualization

### tissue effect

In [16]:
diff = read.table(file="/Users/viktorian.miok/Documents/data/Protein_Data/Perseus-Proteomics_Analysis-Chow.txt",
                 header=T, sep="\t", skip=1)
diff <- diff[,c(30:38,42)]
colnames(diff) <- c("pvalHippo_vs_HPT","diffHippo_vs_HPT","testHippo_vs_HPT","pvalHippo_vs_CTX","diffHippo_vs_CTX",
                    "testHippo_vs_CTX","pvalHPT_vs_CTX","diffHPT_vs_CTX","testHPT_vs_CTX","GeneName")

In [17]:
if(bool_plot){ 
    Hippo_vs_HPT <- data.frame(diff$GeneName, diff$diffHippo_vs_HPT, diff$pvalHippo_vs_HPT)
    colnames(Hippo_vs_HPT) <- c("GeneName", "log2FoldChange", "pvalue")
    EnhancedVolcano(Hippo_vs_HPT,
                    lab = diff$GeneName,
                    pCutoff = 0.05,
                    FCcutoff = 10,
                    x = 'log2FoldChange',
                    y = 'pvalue',
                    ylim = c(0, 10),
                    xlim = c(-7, 7),
                    col = c("grey30", "grey30", "red2", "red2"),
                    title = "HPT vs. Hippo")
}

In [18]:
if(bool_plot){    
    Hippo_vs_CTX <- data.frame(diff$GeneName, diff$diffHippo_vs_CTX, diff$pvalHippo_vs_CTX)
    colnames(Hippo_vs_CTX) <- c("GeneName","log2FoldChange","pvalue")
    EnhancedVolcano(Hippo_vs_CTX,
                    lab = Hippo_vs_CTX$GeneName,
                    pCutoff = 0.05,
                    FCcutoff = 10,
                    x = 'log2FoldChange',
                    y = 'pvalue',
                    ylim = c(0, 10),
                    xlim = c(-7, 7),
                    col = c("grey30", "grey30", "red2", "red2"),
                    title = "CTX vs. Hippo")
}

In [19]:
if(bool_plot){ 
    HPT_vs_CTX <- data.frame(diff$GeneName,diff$diffHPT_vs_CTX, diff$pvalHPT_vs_CTX)
    colnames(HPT_vs_CTX) <- c("GeneName","log2FoldChange","pvalue")
    EnhancedVolcano(HPT_vs_CTX,
                    lab = HPT_vs_CTX$GeneName,
                    pCutoff = 0.05,
                    FCcutoff = 10,
                    x = 'log2FoldChange',
                    y = 'pvalue',
                    ylim = c(0, 10),
                    xlim = c(-7, 7),
                    col = c("grey30", "grey30", "red2", "red2"),
                    title = "HPT vs. CTX")
}

In [20]:
if(bool_plot){ 
    df <- data.frame(location=c("HPT_vs_Hippo", "HPT_vs_CTX", "CTX_vs_Hippo"),
                     Sig_genes=c(dim(Hippo_vs_HPT[Hippo_vs_HPT[,3]<0.05,])[1],
                                 dim(HPT_vs_CTX[HPT_vs_CTX[,3]<0.05,])[1], 
                                 dim(Hippo_vs_CTX[Hippo_vs_CTX[,3]<0.05,])[1])
    )
    ggplot(df, aes(x=location, y=Sig_genes, fill=location))  +
           geom_bar(stat="identity") + 
           theme_minimal() +
           theme(legend.position = "none") +
           scale_fill_manual(values=c("purple", "brown", "blue"))
}

In [21]:
if(bool_plot){ 
    vp <- venn.diagram(list(HPT_vs_CTX = HPT_vs_CTX[which(HPT_vs_CTX[,3]<0.05),1],
                            Hippo_vs_CTX = Hippo_vs_CTX[which(Hippo_vs_CTX[,3]<0.05),1],
                            HPT_vs_Hippo = Hippo_vs_HPT[which(Hippo_vs_HPT[,3]<0.05),1]),
                       fill = c("red", "darkorchid1", "cornflowerblue"),
                       filename = NULL, cex=1.5, col = "transparent",
                       cat.cex = 1.3, margin = 0.01, main="Proteome", main.cex=2);
    grid.draw(vp)
}

### diet effect

In [22]:
# load the results from Persus
hippo <- read_excel(input_file, sheet=10)[,c(4,6,7)]
colnames(hippo) <- c("GeneName","pvalue","log2FoldChange")
for (i in 1:nrow(hippo)) hippo[i,2] <- 10 ^ -hippo[i,2]

In [23]:
if(bool_plot){ 
    EnhancedVolcano(hippo,
                    lab = hippo$GeneName,
                    x = 'log2FoldChange',
                    y = 'pvalue',
                    FCcutoff = 10,
                    pCutoff = 0.05,
                    xlim = c(-5, 5),
                    ylim = c(0, 5),
                    col = c("grey30", "grey30", "red2", "red2"),
                    title = "hippo: chow vs. hfd")
}

In [24]:
# load the results from Persus
ctx <- read_excel(input_file, sheet=8)[,c(4,6,7)]
colnames(ctx) <- c("GeneName","pvalue","log2FoldChange")
for (i in 1:nrow(ctx)) ctx[i,2] <- 10 ^ -ctx[i,2]

In [25]:
if(bool_plot){ 
    EnhancedVolcano(ctx,
                    lab = ctx$GeneName,
                    x = 'log2FoldChange',
                    y = 'pvalue',
                    FCcutoff = 10,
                    pCutoff = 0.05,
                    xlim = c(-5, 5),
                    ylim = c(0, 5),
                    col = c("grey30", "grey30", "red2", "red2"),
                    title = "ctx: chow vs. hfd")
}

In [26]:
# load the results from Persus
hpt <- read_excel(input_file, sheet=6)[,c(4,6,7)]
colnames(hpt) <- c("GeneName","pvalue","log2FoldChange")
for (i in 1:nrow(hpt)) hpt[i,2] <- 10 ^ -hpt[i,2]

In [27]:
if(bool_plot){ 
    EnhancedVolcano(hpt,
                    lab = hpt$GeneName,
                    x = 'log2FoldChange',
                    y = 'pvalue',
                    FCcutoff = 10,
                    pCutoff = 0.05,
                    xlim = c(-5, 5),
                    ylim = c(0, 5),
                    col = c("grey30", "grey30", "red2", "red2"),
                    title = "hpt: chow vs. hfd")
}

In [28]:
if(bool_plot){ 
    df <- data.frame(location=c("Hypothalamus", "Hippocampus", "Cortex"),
                     Sig_genes=c(dim(hpt[which(hpt[,2]<0.05),])[1], 
                                 dim(hippo[which(hippo[,2]<0.05),])[1],
                                 dim(ctx[which(ctx[,2]<0.05),])[1]))
    ggplot(df, aes(x=location, y=Sig_genes, fill=location)) +
           geom_bar(stat="identity") +
           theme_minimal()+ theme(legend.position = "none") +
           scale_fill_manual(values=c("red", "#56B4E9", "olivedrab3"))
}

In [29]:
if(bool_plot){ 
    vp <- venn.diagram(list(HPT = as.matrix(hpt[hpt[,2]<0.05,1]),
                            Hippo = as.matrix(hippo[hippo[,2]<0.05,1]),
                            CTX = na.omit(as.matrix(ctx[ctx[,2]<0.05,])[,1])), 
                       fill = c("red", "darkorchid1", "cornflowerblue"),filename = NULL, cex=1.5, col = "transparent",
                       cat.cex = 1.5, margin = 0.01, main="Proteome", main.cex=2);
    grid.draw(vp)
}