In [1]:
inst <- suppressMessages(lapply(c('DEP',
                                  'SummarizedExperiment',
                                  'pheatmap',
                                  'ggplot2',
                                  'EnhancedVolcano',
                                  'VennDiagram',
                                  'org.Mm.eg.db',
                                  'clusterProfiler',
                                  'readxl',
                                  'tidyverse',
                                  'KEGGREST'),
                                library,
                                character.only=TRUE)
) 

“mzR has been built against a different Rcpp version (1.0.4.6)
than is installed on your system (1.0.6). This might lead to errors
when loading mzR. If you encounter such issues, please send a report,
including the output of sessionInfo() to the Bioc support forum at 
https://support.bioconductor.org/. For details see also
https://github.com/sneumann/mzR/wiki/mzR-Rcpp-compiler-linker-issue.”


Set whether to produce plots, set to False for test runs.

In [2]:
bool_plot = FALSE

In [3]:
input_file = '~/Documents/consultation/Luiza/omics_data/data/astrocyteprotoeme12102017-1.xlsx'
options(repr.plot.width=8, repr.plot.height=8)
my_palette = colorRampPalette(c("blue", "white", "red"))(n=255)

## Compare Acsa2 positive vs. negative

In [4]:
# load the imputed and normalized data from 
datraw <- read_excel(input_file,
                     sheet=2)[,c(4, 3, 11:57)]

In [5]:
dat <- apply(datraw[,-c(1,2)], 
             2, 
             function(x) as.numeric(x)
)
gen = as.character(as.matrix(datraw[,1]))

In [6]:
dat = aggregate(dat,
                by=list(gen),
                FUN=mean, 
                na.rm=TRUE
)
rownames(dat) = dat[,1]
dat = dat[,-1]

In [7]:
astro = c('Slc1a2', 'Slc1a3', 'Aqp4', 'S100b', 'Sox9', 'Gfap', 'Aldh1l1', 'Gja1', 'Gjb6', 'Agt', 'Atp1b2')
neuro = c('Syp', 'Tubb3', 'Snap25', 'Syt1')
microglia = c('Itgam', 'Aif1')
oligodendrocyte = c('Mog', 'Mag')
endothelial = c('Slco1c1')
mural = c('Mustn1', 'Pdgfrb', 'Des')
tanycyes = c('Crym')
npc = c('Nes', 'Sox2', 'Prom1')
vlmc = c('Col1a1')

In [8]:
datfin = data.frame(CTX = apply(dat %>% select(ends_with("CTX")), 1, mean),
                    HPT = apply(dat %>% select(ends_with("HPT")), 1, mean),
                    Hippo = apply(dat %>% select(ends_with("Hippo")), 1, mean),
                    CTX_FT = apply(dat %>% select(ends_with("CTX_FT")), 1, mean),
                    HPT_FT = apply(dat %>% select(ends_with("HPT_FT")), 1, mean),
                    Hippo_FT = apply(dat %>% select(ends_with("Hippo_FT")), 1, mean))

In [9]:
astro = datfin[rownames(datfin)%in%c('Slc1a2', 'Slc1a3', 'Aqp4', 'S100b', 'Sox9',
                                     'Gfap','Aldh1l1','Gja1','Gjb6','Agt','Atp1b2'),]
neuro = datfin[rownames(datfin)%in%c('Syp', 'Tubb3','Snap25','Syt1'),]
mglia = datfin[rownames(datfin)%in%c('Itgam','Aif1'),]
oligo = datfin[rownames(datfin)%in%c('Mog','Mag'),]
endo = datfin[rownames(datfin)%in%c('Slco1c1'),]
mural = datfin[rownames(datfin)%in%c('Mustn1','Pdgfrb','Des'),]
tcyes = datfin[rownames(datfin)%in%c('Crym'),]
npc = datfin[rownames(datfin)%in%c('Nes','Sox2','Prom1'),]
vlmc = datfin[rownames(datfin)%in%c('Col1a1'),]

### heatmap

In [10]:
options(repr.plot.width=8, repr.plot.height=8)

In [11]:
mat = as.matrix(rbind(astro, neuro, mglia, oligo, endo, mural)) # ,npc ,vlmc,tcyes

Omics = c(rep("transcriptomics", 3), rep("proteomics", 3))
Acsa2 = c(rep("Acsa2_pos", 3), rep("Acsa2_neg", 3))
Tissue = c(rep(c("cortex", "hypothalamus", "hippocampus"),2))
dcol = as.data.frame(cbind(Tissue, Acsa2, Omics))
rownames(dcol) = colnames(mat)

pheatmap(mat, 
         scale="row",
         show_rownames=F,
         show_colnames=F,
         color=my_palette,
         cluster_cols=F,
         cluster_rows=F, 
         gaps_col=3,
         gaps_row=c(11, 15, 17, 19, 20, 23),
         fontsize=18,
         legend=T,
         filename="heatmap_ACSA2.pdf",
         width=8,
         height=8
) 

### pca plot

In [12]:
df_pca <- prcomp(t(dat), 
                 scale=T
)
df_out = as.data.frame(df_pca$x)
df_out$group = c(rep("Acsa2_pos",24), rep("Acsa2_neg",23))
df_out$tissue = c(rep("cortex",8), rep("hippocampus",8), rep("hypothalamus",8), 
                rep("cortex",8), rep("hypothalamus",8), rep("hippocampus",7))

In [13]:
if(bool_plot){
    ggplot(df_out, aes(x=PC1, y=PC2, color=group, shape=tissue)) +
           geom_point(size=6) + 
           theme(axis.text=element_text(size=20), 
                 axis.title=element_text(size=20), 
                 legend.title=element_text(siz=20),
                 legend.text=element_text(size=20),
                 plot.title=element_text(size=20)) + 
           ggtitle("PCA of Acsa2 positive vs. negative")
}

In [14]:
dat = dat[,1:24]

## PCA

In [15]:
options(repr.plot.width=10, repr.plot.height=8)

In [16]:
dat1 <- read.table(file="/Users/viktorian.miok/Documents/data/Protein_Data/proteingroups_astrocytes_imputed.txt",
                   header=T, 
                   sep='\t'
)
dat1 = dat1[-1, c(1:24)]

In [17]:
dat1 <- apply(dat1,
              2,
              function(x) as.numeric(x))
df_pca <- prcomp(t(dat1),
                 scale=T
)
df_out = as.data.frame(df_pca$x)
df_out$tissue = as.factor(condition=c(rep("Hippo_CD", 4), rep("Hippo_hfd", 4), rep("HPT_CD", 4),
                                    rep("HPT_hfd", 4), rep("CTX_CD", 4), rep("CTX_hfd", 4))
) 

In [18]:
options(repr.plot.width=10, repr.plot.height=10)

In [19]:
#if(bool_plot){
pdf(file="PCA.pdf", width=10, height=10)
    ggplot(df_out, aes(x=PC1, y=PC2, color=tissue)) + 
           geom_point(aes(color=tissue), size=10) + 
           geom_point(shape = 1, color = "black", size=10) + 
           xlab(paste0("PC1: ",round(df_pca$sdev[1]),"% variance")) + 
           ylab(paste0("PC2: ",round(df_pca$sdev[2]),"% variance")) +
           theme(axis.text=element_text(size=25), 
                 axis.title=element_text(size=30)) + 
           scale_color_manual(values=c("aquamarine4", "aquamarine2", "darkorange2", 
                                       "orange", "blueviolet", "mediumpurple1")) +
           theme(legend.position="none")
dev.off()
#}

### volcano plot

In [20]:
ft <- read_excel(input_file,
                 sheet=2)[,c(4, 6 ,10)]
colnames(ft) = c("GeneName", "pvalue", "log2FoldChange")
for (i in 1:nrow(ft)) ft[i,2]=10 ^ -ft[i,2]

In [21]:
if(bool_plot){ 
    EnhancedVolcano(ft,
                    lab=ft$GeneName,
                    pCutoff=0.05,
                    x='log2FoldChange',
                    y='pvalue',
                    ylim=c(-0, 25),
                    col=c("grey30", "grey30", "red2", "red2"),
                    title="Acsa2_neg vs. Acsa2_pos"
    )
}

In [22]:
if(bool_plot){ 
    EnhancedVolcano(ft,
                    lab=ft$GeneName,
                    pCutoff=0.05,
                    FCcutoff=10,
                    x='log2FoldChange',
                    y='pvalue',
                    ylim=c(-0, 25),
                    selectLab=rownames(mat),
                    labSize=5,
                    labCol='green',
                    col=c("grey30", "grey30", "red2", "red2"),
                    title="Acsa2_neg vs. Acsa2_pos"
    )
}

## Differential expression analysis visualization

### tissue effect

In [23]:
diff = read.table(file="/Users/viktorian.miok/Documents/data/Protein_Data/Perseus-Proteomics_Analysis-Chow.txt",
                  header=T, 
                  sep="\t", 
                  skip=1
)
diff = diff[,c(30:38, 42)]
colnames(diff) = c("pvalHippo_vs_HPT", "diffHippo_vs_HPT", "testHippo_vs_HPT", "pvalHippo_vs_CTX", "diffHippo_vs_CTX",
                 "testHippo_vs_CTX", "pvalHPT_vs_CTX", "diffHPT_vs_CTX", "testHPT_vs_CTX", "GeneName")

In [24]:
if(bool_plot){ 
    Hippo_vs_HPT = data.frame(diff$GeneName, diff$diffHippo_vs_HPT, diff$pvalHippo_vs_HPT)
    colnames(Hippo_vs_HPT) = c("GeneName", "log2FoldChange", "pvalue")
    EnhancedVolcano(Hippo_vs_HPT,
                    lab=diff$GeneName,
                    pCutoff=0.05,
                    FCcutoff=10,
                    x='log2FoldChange',
                    y='pvalue',
                    ylim=c(0, 10),
                    xlim=c(-7, 7),
                    col=c("grey30", "grey30", "red2", "red2"),
                    title="HPT vs. Hippo")
}

In [25]:
if(bool_plot){    
    Hippo_vs_CTX = data.frame(diff$GeneName, diff$diffHippo_vs_CTX, diff$pvalHippo_vs_CTX)
    colnames(Hippo_vs_CTX) = c("GeneName", "log2FoldChange", "pvalue")
    EnhancedVolcano(Hippo_vs_CTX,
                    lab=Hippo_vs_CTX$GeneName,
                    pCutoff=0.05,
                    FCcutoff=10,
                    x='log2FoldChange',
                    y='pvalue',
                    ylim=c(0, 10),
                    xlim=c(-7, 7),
                    col=c("grey30", "grey30", "red2", "red2"),
                    title="CTX vs. Hippo"
    )
}

In [26]:
if(bool_plot){ 
    HPT_vs_CTX = data.frame(diff$GeneName, diff$diffHPT_vs_CTX, diff$pvalHPT_vs_CTX)
    colnames(HPT_vs_CTX) = c("GeneName", "log2FoldChange", "pvalue")
    EnhancedVolcano(HPT_vs_CTX,
                    lab=HPT_vs_CTX$GeneName,
                    pCutoff=0.05,
                    FCcutoff=10,
                    x='log2FoldChange',
                    y='pvalue',
                    ylim=c(0, 10),
                    xlim=c(-7, 7),
                    col=c("grey30", "grey30", "red2", "red2"),
                    title="HPT vs. CTX")
}

In [27]:
if(bool_plot){ 
    df = data.frame(location=c("HPT_vs_Hippo", "HPT_vs_CTX", "CTX_vs_Hippo"),
                    Sig_genes=c(dim(Hippo_vs_HPT[Hippo_vs_HPT[,3] < 0.05,])[1],
                                dim(HPT_vs_CTX[HPT_vs_CTX[,3] < 0.05,])[1], 
                                dim(Hippo_vs_CTX[Hippo_vs_CTX[,3] < 0.05,])[1])
    )
    ggplot(df, aes(x=location, y=Sig_genes, fill=location))  +
           geom_bar(stat="identity") + 
           theme_minimal() +
           theme(legend.position="none") +
           scale_fill_manual(values=c("purple", "brown", "blue"))
}

In [28]:
if(bool_plot){ 
    vp <- venn.diagram(list(HPT_vs_CTX = HPT_vs_CTX[which(HPT_vs_CTX[,3]<0.05),1],
                            Hippo_vs_CTX = Hippo_vs_CTX[which(Hippo_vs_CTX[,3]<0.05),1],
                            HPT_vs_Hippo = Hippo_vs_HPT[which(Hippo_vs_HPT[,3]<0.05),1]),
                       fill=c("red", "darkorchid1", "cornflowerblue"),
                       filename=NULL, 
                       cex=1.5,
                       col="transparent",
                       cat.cex=1.3, 
                       margin=0.01, 
                       main="Proteome",
                       main.cex=2);
    grid.draw(vp)
}

### diet effect

In [29]:
# load the results from Persus
hippo <- read_excel(input_file,
                    sheet=10)[,c(4, 6, 7)]
colnames(hippo) = c("GeneName", "pvalue", "log2FoldChange")
for (i in 1:nrow(hippo)) hippo[i,2]=10 ^ -hippo[i,2]

In [30]:
pdf(file="volcano_hippo.pdf", width=10, height=10)
    EnhancedVolcano(hippo,
                    lab=hippo$GeneName,
                    x='log2FoldChange',
                    y='pvalue',
                    FCcutoff=10,
                    pCutoff=0.05,
                    axisLabSize=30,
                    title=NULL,
                    subtitle=NULL,
                    caption=NULL,
                    xlim=c(-5, 5),
                    ylim=c(0, 5),
                    col=c("grey30", "grey30", "red2", "red2"),
                    legendLabels=c('NS', expression(Log[2]~FC),'p-value>0.05', expression(p-value~and~log[2]~FC)),
                    legendLabSize=30
    )
dev.off()

“Removed 2 rows containing missing values (geom_vline).”


In [31]:
# load the results from Persus
ctx <- read_excel(input_file, 
                  sheet=8)[,c(4, 6, 7)]
colnames(ctx) = c("GeneName", "pvalue", "log2FoldChange")
for (i in 1:nrow(ctx)) ctx[i,2]=10 ^ -ctx[i,2]

In [32]:
pdf(file="volcano_ctx.pdf", width=10, height=10)
    EnhancedVolcano(ctx,
                    lab=ctx$GeneName,
                    x='log2FoldChange',
                    y='pvalue',
                    FCcutoff=10,
                    pCutoff=0.05,
                    axisLabSize=30,
                    title=NULL,
                    subtitle=NULL,
                    caption=NULL,
                    xlim=c(-5, 5),
                    ylim=c(0, 5),
                    col=c("grey30", "grey30", "red2", "red2"),
                    legendLabels=c('NS', expression(Log[2]~FC), 'p-value>0.05', expression(p-value~and~log[2]~FC)),
                    legendLabSize=30
   )
dev.off()

“Removed 2 rows containing missing values (geom_vline).”


In [33]:
# load the results from Persus
hpt <- read_excel(input_file, 
                  sheet=6)[,c(4, 6, 7)]
colnames(hpt) = c("GeneName", "pvalue", "log2FoldChange")
for (i in 1:nrow(hpt)) hpt[i,2]=10 ^ -hpt[i,2]

In [34]:
pdf(file="volcano_hpt.pdf", width=10, height=10)
    EnhancedVolcano(hpt,
                    lab=hpt$GeneName,
                    x='log2FoldChange',
                    y='pvalue',
                    FCcutoff=10,
                    pCutoff=0.05,
                    axisLabSize=30,
                    title=NULL,
                    subtitle=NULL,
                    caption=NULL,
                    xlim=c(-5, 5),
                    ylim=c(0, 5),
                    col=c("grey30", "grey30", "red2", "red2"),
                    legendLabels=c('NS', expression(Log[2]~FC),'p-value>0.05', expression(p-value~and~log[2]~FC)),
                    legendLabSize=30)
dev.off()

“Removed 2 rows containing missing values (geom_vline).”


In [35]:
if(bool_plot){ 
    df = data.frame(location=c("Hypothalamus", "Hippocampus", "Cortex"),
                    Sig_genes=c(dim(hpt[which(hpt[,2]<0.05),])[1], 
                                dim(hippo[which(hippo[,2]<0.05),])[1],
                                dim(ctx[which(ctx[,2]<0.05),])[1])
    )
    ggplot(df, aes(x=location, y=Sig_genes, fill=location)) +
           geom_bar(stat="identity") +
           theme(axis.text=element_text(size=20), 
                 axis.title=element_text(size=20), 
                 legend.title=element_text(siz=20),
                 legend.text=element_text(size=20),
                 plot.title=element_text(size=20)) +
           theme(legend.position="none") +
           scale_fill_manual(values=c("lightblue","lightgreen","tan1"))
}

In [36]:
options(repr.plot.width=8, repr.plot.height=8)

In [37]:
pdf(file="venn_tran.pdf", width=8, height=8)
vp <- venn.diagram(list(' '=as.matrix(hpt[hpt[,2]<0.05,1]),
                        ' '=as.matrix(hippo[hippo[,2]<0.05,1]),
                        ' '=na.omit(as.matrix(ctx[ctx[,2]<0.05,])[,1])), 
                   fill=c("orange", "mediumpurple1", "aquamarine2"),
                   filename=NULL, 
                   cex=3.5,
                   col="transparent",
                   cat.cex=0.01,
                   margin=0.01,
                   main=NULL,
                   main.cex=3);
    grid.draw(vp)
dev.off()

In [38]:
options(repr.plot.width=5, repr.plot.height=8)

In [39]:
datr <- read_excel(input_file,
                   sheet=8)[,c(4,9:16)]
datctx <- apply(datr[,-1],
                2, 
                function(x) as.numeric(x)
)
gen = as.character(as.matrix(datr[,1]))
datctx <- aggregate(datctx,
                    by=list(gen),
                    FUN=mean,
                    na.rm=TRUE
)
rownames(datctx) = datctx[,1]
datctx = datctx[,-1]

In [40]:
vsd_ctx = datctx[rownames(datctx)%in%as.character(as.matrix(ctx[ctx[,2] < 0.05,1])),]
pheatmap(vsd_ctx, 
         scale="row", 
         show_rownames=FALSE,
         show_colnames=FALSE,
         color=my_palette,
         cluster_cols=FALSE, 
         fontsize=15,
         legend=FALSE,
         filename="heatmap_ctx.pdf",
         width=5, 
         height=8,
         breaks=seq(-2, 2, length.out=255)
)

In [41]:
datr <- read_excel(input_file,
                   sheet=10)[,c(4, 9:16)]
datctx <- apply(datr[,-1],
                2,
                function(x) as.numeric(x)
)
gen = as.character(as.matrix(datr[,1]))
datctx <- aggregate(datctx, 
                    by=list(gen),
                    FUN=mean, 
                    na.rm=TRUE
)
rownames(datctx) = datctx[,1]
datctx = datctx[,-1]

In [42]:
vsd_hip = datctx[rownames(datctx)%in%as.character(as.matrix(hippo[hippo[,2] < 0.05,1])),]
pheatmap(vsd_hip,
         scale="row",
         show_rownames=FALSE,
         show_colnames=FALSE,
         color=my_palette, 
         cluster_cols=FALSE,
         fontsize=15,
         legend=FALSE,
         filename="heatmap_hippo.pdf",
         width=5, 
         height=8,
         breaks=seq(-2, 2, length.out=255)
)

In [43]:
datr <- read_excel(input_file,
                   sheet=6)[,c(4,9:16)]
datctx <- apply(datr[,-1],
                2, 
                function(x) as.numeric(x))
gen = as.character(as.matrix(datr[,1]))
datctx <- aggregate(datctx, 
                    by=list(gen),
                    FUN=mean, 
                    na.rm=TRUE
)
rownames(datctx) = datctx[,1]
datctx=datctx[,-1]

In [44]:
vsd_hpt = datctx[rownames(datctx)%in%as.character(as.matrix(hpt[hpt[,2] < 0.05, 1])),]
pheatmap(vsd_hpt,
         scale="row",
         show_rownames=FALSE,
         show_colnames=FALSE,
         color=my_palette,
         cluster_cols=F,
         fontsize=15,
         legend=T,
         filename="heatmap_hpt.pdf",
         width=5, 
         height=8,
         breaks=seq(-2, 2, length.out=255)
) 

### HPT

In [45]:
nrow(hpt[(hpt$pvalue < 0.05)&(hpt$log2FoldChange < 0),])
nrow(hpt[(hpt$pvalue < 0.05)&(hpt$log2FoldChange > 0),])
nrow(hpt[(hpt$pvalue < 0.05),])

### CTX

In [46]:
nrow(ctx[(ctx$pvalue < 0.05)&(ctx$log2FoldChange < 0),])
nrow(ctx[(ctx$pvalue < 0.05)&(ctx$log2FoldChange > 0),])
nrow(ctx[(ctx$pvalue < 0.05),])

### Hippo

In [47]:
nrow(hippo[(hippo$pvalue < 0.05)&(hippo$log2FoldChange < 0),])
nrow(hippo[(hippo$pvalue < 0.05)&(hippo$log2FoldChange > 0),])
nrow(hippo[(hippo$pvalue < 0.05),])

## Pathway enrichment

In [48]:
sg.ctx <- bitr(rownames(vsd_ctx),
               fromType="SYMBOL",
               toType="ENTREZID",
               OrgDb=org.Mm.eg.db
)
go.ctx <- enrichGO(sg.ctx[,2],
                   'org.Mm.eg.db',
                   ont="BP",
                   pvalueCutoff=0.1
)
sg.hpt <- bitr(rownames(vsd_hpt), 
               fromType="SYMBOL",
               toType="ENTREZID",
               OrgDb=org.Mm.eg.db
)
go.hpt <- enrichGO(sg.hpt[,2], 
                   'org.Mm.eg.db',
                   ont="BP",
                   pvalueCutoff=0.1
)
sg.hip <- bitr(rownames(vsd_hip), 
               fromType="SYMBOL",
               toType="ENTREZID", 
               OrgDb=org.Mm.eg.db
)
go.hip <- enrichGO(sg.hip[,2],
                   'org.Mm.eg.db',
                   ont="BP", 
                   pvalueCutoff=0.1
)

'select()' returned 1:1 mapping between keys and columns

“4.9% of input gene IDs are fail to map...”
'select()' returned 1:1 mapping between keys and columns

“9.43% of input gene IDs are fail to map...”
'select()' returned 1:1 mapping between keys and columns

“6.11% of input gene IDs are fail to map...”


### GO

In [49]:
go = c(#intracellular estrogen receptor signaling pathway
    #regulation of cellular response to insulin stimulus
    #negative regulation of insulin receptor signaling pathway
    "insulin receptor signaling pathway",
    #response to insulin
    "leptin-mediated signaling pathway",
    "response to leptin",
    "glycolytic process",
    "cellular response to glucose stimulus",
    "cellular carbohydrate metabolic process",
    "glycogen metabolic process",
    "glucose transmembrane transport",
    #regulation of lipid storage
    #triglyceride biosynthetic process
    #fatty acid metabolic process
    #regulation of lipid transport
    "fatty acid catabolic process",
    "lipid import into cell",
    "response to endoplasmic reticulum stress",
    "response to reactive oxygen species",
    "response to oxidative stress",
    #mitochondrion organization
    #regulation of mitochondrion organization
    "positive regulation of G1/S transition of mitotic cell cycle",
    "glial cell proliferation",
    "microtubule cytoskeleton organization involved in mitosis",
    "smoothened signaling pathway",
    "astrocyte activation",
    "positive regulation of inflammatory response",
    "microglial cell activation",
    #positive regulation of cell adhesion
    #cell adhesion mediated by integrin
    "cell-substrate junction assembly",
    "regulation of cell shape",
    "calcium ion transport into cytosol",
    "calcium-mediated signaling",
    #regulation of ARF protein signal transduction
    #ARF protein signal transduction
    #exocytosis
    #regulation of axon regeneration
    "axon guidance",
    "regulation of neurotransmitter levels",
    "glutamate receptor signaling pathway",
    "axonogenesis",
    "regulation of synaptic transmission, dopaminergic",
    "regulation of neuronal synaptic plasticity",
    "exploration behavior",
    "feeding behavior",
    "locomotory behavior"
)
    #regulation of generation of precursor metabolites and energy
    #response to nutrient levels
    #energy homeostasis
    #adaptive thermogenesis)

In [50]:
options(repr.plot.width=15, repr.plot.height=15)

In [51]:
pdf(file = "enrich_path.pdf", width=15, height=15)
    enr_ctx.go = go.ctx@result[go.ctx@result[,5] < 0.05, c(2, 5, 6, 8, 9)]
    enr_ctx.go$Tissue='CTX'
    enr_hpt.go = go.hpt@result[go.hpt@result[,5] < 0.05, c(2, 5, 6, 8, 9)]
    enr_hpt.go$Tissue='HPT'
    enr_hip.go = go.hip@result[go.hip@result[,5] < 0.05, c(2, 5, 6, 8, 9)]
    enr_hip.go$Tissue='Hippo'
    
    all = rbind(enr_ctx.go, enr_hpt.go, enr_hip.go)
    #write.csv(all, file="GO_proteome.csv")
    all = all[all$Description%in%go,]
    colnames(all) = c('Description', 'pvalue', 'p.adjust', 'geneID', 'count', 'Tissue')
    ggplot(all,    
           aes(Tissue, y=factor(Description, level=go[length(go):1]), col=pvalue, size=count)) + 
           geom_point() + 
           scale_colour_gradient(low="red", high="blue") +
           scale_size_continuous(range=c(3, 10)) +
           xlab("") + 
           ylab("") + 
           theme(axis.text.x=element_blank()) + 
           theme(legend.position="none") +
           theme(text=element_text(size=40))
dev.off()

In [52]:
all = rbind(enr_ctx.go, enr_hpt.go, enr_hip.go)
all = all[all$Description%in%go,]

In [53]:
sig.gene.hpt.all <- bitr(hpt$GeneName, 
                         fromType="SYMBOL",
                         toType="ENTREZID", 
                         OrgDb=org.Mm.eg.db
)
sig.gene.ctx.all <- bitr(ctx$GeneName,
                         fromType="SYMBOL",
                         toType="ENTREZID",
                         OrgDb=org.Mm.eg.db
)
sig.gene.hip.all <- bitr(hippo$GeneName,
                         fromType="SYMBOL", 
                         toType="ENTREZID", 
                         OrgDb=org.Mm.eg.db
)

'select()' returned 1:1 mapping between keys and columns

“6.93% of input gene IDs are fail to map...”
'select()' returned 1:1 mapping between keys and columns

“6.8% of input gene IDs are fail to map...”
'select()' returned 1:1 mapping between keys and columns

“6.91% of input gene IDs are fail to map...”


In [54]:
options(repr.plot.width=7, repr.plot.height=15)

In [73]:
pdf(file="enrichdot_ctx.pdf", width=7, height=15)
    df_total.ctx=data.frame()
    for(i in 1:nrow(all)){   
        p = as.data.frame(ctx[which(ctx$GeneName%in%
                        sig.gene.ctx.all[sig.gene.ctx.all[,2]%in%unlist(strsplit(all$geneID[i],"/")),1]), c(2,3)])
        p$pathway = all$Description[i]
        p$padjPath = all$p.adjust[i]
        p$count = all$Count[i]
        df_total.ctx = rbind(df_total.ctx, p)    
    }
    df_total.ctx$significance <- ifelse(df_total.ctx$pvalue < 0.05,
                                        "significant",
                                        "not significant"
    )
    ggplot(df_total.ctx, aes(log2FoldChange, y=factor(pathway, level=go[length(go):1]))) + 
           geom_point(aes(colour=significance, size=-log10(pvalue)), alpha=0.5) + 
           scale_color_manual(values=c("gray85", "red")) + 
           scale_size_continuous(range=c(0, 7.5)) +  #
           geom_vline(xintercept=0)+
           theme(axis.title=element_text(size=20),
                 title=element_text(size=20), 
                 text = element_text(size=35)) + #guides(color=guide_legend(override.aes=list(size=5) ) ) +
           theme(legend.position="none") + 
           theme(axis.text.y=element_blank()) + 
           ylab("") + 
           xlab("") +
           xlim(-2.68, 3.82)
dev.off()

In [74]:
pdf(file="enrichdot_hpt.pdf", width=7, height=15)
    df_total.hpt=data.frame()
    for(i in 1:nrow(all)){
        p=as.data.frame(hpt[which(hpt$GeneName%in%
                        sig.gene.hpt.all[sig.gene.hpt.all[,2]%in%unlist(strsplit(all$geneID[i],"/")),1]), c(2,3)])
        p$pathway=all$Description[i]
        p$tissue=all$Tissue[i]
        p$padjPath=all$pvalue[i]
        p$count=all$Count[i]
        df_total.hpt=rbind(df_total.hpt, p)    
    } 
    for(j in 1:nrow(df_total.hpt)) {if((df_total.hpt$tissue[j] == 'HPT')&(df_total.hpt$pvalue[j] > 0.05)) df_total.hpt$pvalue[j] <- 0.05}
    
    df_total.hpt$significance <- ifelse(df_total.hpt$pvalue <= 0.05,
                                        "significant",
                                        "not significant"
    )
    ggplot(df_total.hpt, aes(log2FoldChange, y=factor(pathway, level=go[length(go):1]))) + 
           geom_point(aes(colour=significance, size= -log10(pvalue)), alpha=0.5) + 
           scale_color_manual(values=c("gray85", "red")) + 
           scale_size_continuous(range=c(0, 8.5)) +  #
           geom_vline(xintercept=0) +
           theme(axis.title=element_text(size=20),
                 title=element_text(size=20), 
                 text = element_text(size=35)) + 
           theme(legend.position="none") + 
           theme(axis.text.y=element_blank()) + 
           ylab("") +
           xlab("") + 
           xlim(-2.68, 3.82)
dev.off()

In [75]:
pdf(file="enrichdot_hippo.pdf", width=7, height=15)
    df_total.hip=data.frame()
    for(i in 1:nrow(all)){
        p=as.data.frame(hippo[which(hippo$GeneName%in%
                        sig.gene.hip.all[sig.gene.hip.all[,2]%in%unlist(strsplit(all$geneID[i],"/")),1]), c(2,3)])
        p$pathway=all$Description[i]
        p$padjPath=all$p.adjust[i]
        p$count=all$Count[i]
        df_total.hip=rbind(df_total.hip, p)    
    }    
    df_total.hip$col <- ifelse(df_total.hip$pvalue <= 0.05,
                               "red",
                               "grey"
    )
    ggplot(df_total.hip, aes(log2FoldChange, y=factor(pathway, level=go[length(go):1]))) + 
           geom_point(aes(colour=col, size=-log10(pvalue)), alpha=0.5) + 
           scale_color_manual(values=c("gray85", "red"))+ 
           scale_size_continuous(range = c(0, 7.5)) +
           geom_vline(xintercept=0) +
           theme(axis.title=element_text(size=20),
                 title=element_text(size=20), 
                 text = element_text(size=35)) +
           theme(legend.position="none") + 
           theme(axis.text.y=element_blank()) + 
           ylab("") +
           xlab("") + 
           xlim(-2.68, 3.82)
dev.off()

In [60]:
#ggplot(df_total.hip, aes(log2FoldChange, y=factor(pathway, level=go[length(go):1]))) +
    #       scale_color_gradient(low="gray90", high="red3", limits=c(0,3.2)) +
    #       geom_point(aes(color=-log10(pvalue)), size=7, alpha=0.8) +
    #       geom_vline(xintercept=0)+
    #       theme(axis.title=element_text(size=20),#face="bold"),
    #             legend.title=element_text(size=35),#face="bold"),
    #             title=element_text(size=20),#,face="bold"),
    #             text=element_text(size=35)) +
    #       ylab("") + 
    #       theme(legend.position="none") + 
    #       xlab("") + 
    #       theme(axis.text.y=element_blank())+ xlim(-2.68,3.82)

### KEGG

In [58]:
#kegg <- c("Apoptosis","Inositol phosphate metabolism","Tight junction","Glycosaminoglycan degradation",
#          "Adherens junction","Endocytosis","SNARE interactions in vesicular transport","AMPK signaling pathway",
#          "Oxidative phosphorylation","Amino sugar and nucleotide sugar metabolism","Adipocytokine signaling pathway",
#          "FoxO signaling pathway","Insulin resistance","Insulin signaling pathway","Fatty acid elongation",
#          "Retrograde endocannabinoid signaling","Thermogenesis") # "Fatty acid metabolism",

In [59]:
#if(bool_plot){  
#    enr_ctx.kegg = kegg.ctx@result[kegg.ctx@result[,5] < 0.05, c(2,5,6,8,9)]
#    enr_ctx.kegg$Tissue='CTX'
#    enr_hpt.kegg=kegg.hpt@result[kegg.hpt@result[,5] < 0.05, c(2,5,6,8,9)]
#    enr_hpt.kegg$Tissue='HPT'
#    enr_hip.kegg=kegg.hip@result[kegg.hip@result[,5] < 0.05, c(2,5,6,8,9)]
#    enr_hip.kegg$Tissue='Hippo'
#        
#    all = rbind(enr_ctx.kegg, enr_hpt.kegg, enr_hip.kegg)
#    #write.csv(all, file="KEGG_proteome.csv")
#    all = all[all$Description%in%kegg,]
#    
#    options(repr.plot.width=10, repr.plot.height=12)
#    ggplot(all, 
#           aes(Tissue, Description, col=pvalue, size=Count)) + 
#           geom_point()+ 
#           scale_colour_gradient(low="red", high="blue") +
#           xlab("Tissue") + 
#           ylab("KEGG enriched pathways") + 
#           theme(text=element_text(size=25),axis.text.x=element_text(angle=45, hjust=1))
#}

In [60]:
#if(bool_plot){
#    options(repr.plot.width=12, repr.plot.height=15)
#
#    all = rbind(enr_ctx.kegg, enr_hpt.kegg, enr_hip.kegg)
#    all = all[all$Description%in%kegg,]
#
#    sig.gene.hpt.all <- bitr(hpt$GeneName,
#                             fromType="SYMBOL",
#                             toType="ENTREZID",
#                             OrgDb=org.Mm.eg.db
#    )
#    sig.gene.ctx.all <- bitr(ctx$GeneName, 
#                             fromType="SYMBOL",
#                             toType="ENTREZID",
#                             OrgDb=org.Mm.eg.db
#    )
#    sig.gene.hip.all <- bitr(hippo$GeneName, 
#                             fromType="SYMBOL",
#                             toType="ENTREZID",
#                             OrgDb=org.Mm.eg.db
#    )
#}

In [61]:
#if(bool_plot){   
#    df_total.ctx=data.frame()
#    for(i in 1:nrow(all)){        
#        names = keggGet(substr(rownames(all)[i],1,8))[[1]]$GENE
#        namesodd = names[seq(0,length(names),2)]
#        p = ctx[as.matrix(ctx[,1])%in%gsub("\\;.*","",namesodd),c(2,3)]
#        
#        p$pathway = all$Description[i]
#        p$padjPath = all$p.adjust[i]
#        p$count = all$Count[i]
#        df_total.ctx = rbind(df_total.ctx,p)
#    }    
#    ggplot(df_total.ctx, aes(log2FoldChange, pathway)) + 
#           scale_color_gradient(low="grey", high="red") +
#           geom_point(aes(color=-log10(pvalue))) +
#           geom_vline(xintercept=0)+
#           theme(axis.title=element_text(size=20,face="bold"),
#                 title=element_text(size=20,face="bold"), 
#                 text = element_text(size=25)) +
#           ylab("Pathway Names") +
#           ggtitle("CTX") + 
#           xlab("log2FC")
#}

In [62]:
#if(bool_plot){       
#    df_total.hpt = data.frame()
#    for(i in 1:nrow(all)){
#        names = keggGet(substr(rownames(all)[i],1,8))[[1]]$GENE
#        namesodd = names[seq(0,length(names),2)]
#        p = hpt[as.matrix(hpt[,1])%in%gsub("\\;.*","",namesodd),c(2,3)]
# 
#        p$pathway = all$Description[i]
#        p$padjPath = all$p.adjust[i]
#        p$count = all$Count[i]
#        df_total.hpt = rbind(df_total.hpt, p)
#    }
#    ggplot(df_total.hpt, aes(log2FoldChange, pathway)) +
#           scale_color_gradient(low="grey", high="red") +
#           geom_point(aes(color=-log10(pvalue))) +
#           geom_vline(xintercept=0)+
#           theme(axis.title=element_text(size=20,face="bold"),
#                 title=element_text(size=20,face="bold"),
#                 text=element_text(size=25)) +
#           ylab("Pathway Names") +
#           ggtitle("HPT") +
#           xlab("log2FC")
#}

In [63]:
#if(bool_plot){ 
#    df_total.hip = data.frame()
#    for(i in 1:nrow(all)){
#        names = keggGet(substr(rownames(all)[i],1,8))[[1]]$GENE
#        namesodd = names[seq(0,length(names),2)]
#        p = hippo[as.matrix(hippo[,1])%in%gsub("\\;.*","",namesodd),c(2,3)]
#
#        p$pathway = all$Description[i]
#        p$padjPath = all$p.adjust[i]
#        p$count = all$Count[i]
#        df_total.hip = rbind(df_total.hip,p)
#    }
#    ggplot(df_total.hip, aes(log2FoldChange, pathway)) +
#               scale_color_gradient(low="grey", high="red") +
#               geom_point(aes(color=-log10(pvalue))) +
#               geom_vline(xintercept=0)+
#               theme(axis.title=element_text(size=20,face="bold"),
#                     title=element_text(size=20,face="bold"),
#                     text=element_text(size=25)) +
#               ylab("Pathway Names") +
#               ggtitle("Hippo") +
#               xlab("log2FC")
#}

In [None]:
#if(bool_plot){       
#    df_total.hpt = data.frame()
#    for(i in 1:nrow(all)){
#        p = as.data.frame(hpt[which(hpt$GeneName%in%
#                          sig.gene.hpt.all[sig.gene.hpt.all[,2]%in%unlist(strsplit(all$geneID[i],"/")),1]),c(2,3)])
#        p$pathway = all$Description[i]
#        p$padjPath = all$p.adjust[i]
#        p$count = all$Count[i]
#        df_total.hpt = rbind(df_total.hpt,p)
#    }
    #df_total.hpt$log2FoldChange <- ifelse(df_total.hpt$log2FoldChange>8,8,df_total.ctx$log2FoldChange )
    #df_total.hpt$log2FoldChange <- ifelse(df_total.hpt$log2FoldChange< -8,-8,df_total.ctx$log2FoldChange )
    #df_total.hpt$pvalue <- ifelse(df_total.hpt$pvalue<1e-05,1e-05,df_total.ctx$pvalue )

#    ggplot(df_total.hpt, aes(log2FoldChange, pathway)) +
#           scale_color_gradient(low="grey", high="red") +
#           geom_point(aes(color = -log10(pvalue))) +
#           geom_vline(xintercept=0)+
#           theme(axis.title=element_text(size=20,face="bold"),
#                 title=element_text(size=20,face="bold"),
#                 text=element_text(size=25)) +
#           ylab("Pathway Names") +
#           ggtitle("Significant Genes") +
#           xlab("log2FC")
#}

In [63]:
#if(bool_plot){   
#    df_total.ctx=data.frame()
#    for(i in 1:nrow(all)){
#        gene_list = data.frame(mget(substr(rownames(all)[i],1,10), org.Mm.egGO2ALLEGS)[[1]])
#        p = ctx[as.matrix(ctx[,1])%in%sig.gene.ctx.all[sig.gene.ctx.all[,2]%in%gene_list[,1],1],c(2,3)]
#        p$pathway = all$Description[i]
#        p$padjPath = all$p.adjust[i]
#        p$count = all$Count[i]
#        df_total.ctx = rbind(df_total.ctx, p)
#    }
#    ggplot(df_total.ctx, aes(log2FoldChange, pathway)) + 
#           scale_color_gradient(low="grey", high="red") +
#           geom_point(aes(color=-log10(pvalue))) +
#           geom_vline(xintercept=0)+
#           theme(axis.title=element_text(size=20,face="bold"),
#                 title=element_text(size=20,face="bold"), 
#                 text=element_text(size=25)) +
#           ylab("Pathway Names") +
#           ggtitle("CTX") + 
#           xlab("log2FC")
#}