# Transcriptomics data analysis

### Loading packages
Required packages for downstream analysis

In [None]:
inst <- suppressMessages(lapply(c("DESeq2",
                                  "ggplot2",
                                  "gplots",
                                  "RColorBrewer",
                                  "clusterProfiler",
                                  "org.Mm.eg.db",
                                  "stringdist"
                                  "dplyr"), 
                                library,
                                character.only = TRUE)
)

In [2]:
options(repr.plot.width=20, repr.plot.height=10)

### Set the global variables

Set whether anndata objects are recomputed or loaded from cache.

In [2]:
bool_recomp = FALSE

Set whether to produce plots, set to False for test runs.

In [1]:
bool_plot = FALSE

### Load the data

In [3]:
load("/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/RNA_seq_data/Initial_Analysis.RData")

In [4]:
sampleName = read.table(file='~/Documents/consultation/Katarina/Integration_Data/RNA_seq_data/Sample_names.txt',
                        header=T,
                        sep='\t'
)
colnames(expression.matrix) = as.character(sampleName[,2])
# Colors for heatmaps
colors = colorRampPalette(rev(brewer.pal(9, "Spectral")))(255)

In [5]:
# Obsolete terms
noliv <- read.csv("/Users/viktorian.miok/Documents/consultation/Katarina/200728_terms_not_related_to_liver.csv",
                  header=FALSE
)
noliv1 <- read.csv("/Users/viktorian.miok/Documents/consultation/Katarina/200826 non-liver related terms.csv",
                   header=FALSE
)
obsolete <- read.csv("/Users/viktorian.miok/Documents/consultation/Katarina/200728_obsolete_GO_terms.csv",
                     header=FALSE
)

## TRF Study

In [6]:
trfdat = expression.matrix[,c(64:71,56:58,60:63)]

In [7]:
id = colnames(trfdat)
condition = c(rep("TRFHFD",8),rep("ALHFD",7))
metaData = data.frame(id,
                      condition
)
dds <- DESeqDataSetFromMatrix(countData=trfdat, 
                              colData=metaData,
                              design=~condition
)
featureData = data.frame(gene=rownames(dds))
mcols(dds) = DataFrame(mcols(dds),
                       featureData
)

“some variables in design formula are characters, converting to factors”


In [8]:
design(dds) = ~ condition  
dds <- estimateSizeFactors(dds)
dds <- estimateDispersions(dds,
                           fit='parametric'
)

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates



In [9]:
keep = rowSums(counts(dds)) >= 10
dds = dds[keep,]

In [3]:
if(bool_plot){
    #Variance stabilizing transformation
    vsd=vst(dds)
    # PCA plot of the samples from different locations
    plotPCA(vsd, "condition") +
    geom_text(aes(label=id),vjust=2) + 
    xlim(-15, 35) +
    ylim(-12,18)
}

In [11]:
dds <- DESeq(dds)
res_alhdf_vs_trfhfd = results(dds)

using pre-existing size factors

estimating dispersions

found already estimated dispersions, replacing these

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

-- replacing outliers and refitting for 21 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



In [12]:
table(res_alhdf_vs_trfhfd$pvalue < 0.05)


FALSE  TRUE 
15483  2183 

In [13]:
# order results table by the smallest adjusted p value:
res = res_alhdf_vs_trfhfd[order(res_alhdf_vs_trfhfd$padj
                                ,res_alhdf_vs_trfhfd$pvalue),]

resultsTRF <- as.data.frame(dplyr::mutate(as.data.frame(res),
                                         sig=ifelse(res$pvalue < 0.05, "FDR<0.05", "Not Sig")),
                           row.names=rownames(res))
resultsTRF = resultsTRF[(!is.na(resultsTRF$pvalue)),]

In [4]:
if(bool_plot){
    # significant genes heatmap
    p = assay(vsd)[rownames(assay(vsd))%in%rownames(resultsTRF[which(resultsTRF$sig == "FDR<0.05"),]),]
    heatmap.2(p,
              col=colors,
              scale="row",
              trace="none",
              main="TRFHFD_vs_ALHFD",
              Rowv=FALSE,
              Colv=FALSE
    ) 
}

In [15]:
#setwd('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/RNA_seq_data/results')
#write.csv(results,"TRFstudy_TRFHFDvsALHDF.csv")

### Up regulated pathways

In [5]:
if(bool_plot){
    sig.gene <- bitr(rownames(resultsTRF[which(resultsTRF$sig == "FDR<0.05" & resultsTRF$log2FoldChange > 0),]),
                     fromType="ENSEMBL",
                     toType="ENTREZID",
                     OrgDb=org.Mm.eg.db
    )
    kk1 <- enrichKEGG(gene=sig.gene[,2],
                      organism='mmu',
                      pvalueCutoff=0.1
    )
    barplot(kk1,
            showCategory=20,
            title="KEGG pathways"
    )
}

In [6]:
if(bool_plot){
    ego1 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="BP", 
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego1, 
            showCategory=25, 
            title="GO pathways"
    )
}

In [7]:
if(bool_plot){
    ego2 <- enrichGO(gene=sig.gene[,2], 
                     OrgDb=org.Mm.eg.db,
                     ont="CC",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego2,
            showCategory=25,
            title="GO pathways"
    )
}

In [8]:
if(bool_plot){
    ego3 <- enrichGO(gene=sig.gene[,2], 
                     OrgDb=org.Mm.eg.db,
                     ont="MF",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego3,
            showCategory=25, 
            title="GO pathways"
    )
}

In [20]:
bp1 = ego1@result

In [21]:
setwd('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/RNA_seq_data/results/TRF')
write.csv(bp1, "TRF_GO_BP_up_RNA.csv")

In [22]:
bp1 = bp1[bp1$p.adjust<0.05,]

lk = as.numeric(sub("\\/.*", "", bp1[,4]))
bp1 = bp1[(lk>15)&(lk<500),]
bp1 = bp1[bp1$Count>4,]
bp1 = bp1[!bp1$ID %in% noliv[,1],]
bp1 = bp1[!bp1$ID %in% noliv1[,1],]
bp1 = bp1[!bp1$ID %in% obsolete[,1],]
bp1 = bp1[order(as.numeric(sub("\\/.*", "", bp1[,4])), decreasing=TRUE),]

In [23]:
write.csv(bp1, "TRF_GO_BP_trimed_up_RNA_a.csv")

In [24]:
rk = character()
for(i in 1:(nrow(bp1)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp1)){
        pk = c(pk,sum(unlist(strsplit(bp1[i,8], "/")) %in% unlist(strsplit(bp1[j,8], "/")))/length(unlist(strsplit(bp1[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp1 = bp1[rk1 == "ostavi",]

In [25]:
write.csv(bp1, "TRF_GO_BP_trimed_up_RNA_b.csv")

In [26]:
bp2 = ego2@result

In [27]:
write.csv(bp2, "TRF_GO_CC_up_RNA.csv")

In [28]:
bp2 = bp2[bp2$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp2[,4]))
bp2 = bp2[(lk > 15) & (lk < 500),]
bp2 = bp2[bp2$Count >4,]
bp2 = bp2[!bp2$ID %in% noliv[,1],]
bp2 = bp2[!bp2$ID %in% noliv1[,1],]
bp2 = bp2[!bp2$ID %in% obsolete[,1],]
bp2 = bp2[order(as.numeric(sub("\\/.*", "", bp2[,4])), decreasing=TRUE),]

In [29]:
write.csv(bp2, "TRF_GO_CC_trimed_up_RNA_a.csv")

In [30]:
rk = character()
for(i in 1:(nrow(bp2)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp2)){
        pk = c(pk,sum(unlist(strsplit(bp2[i,8], "/")) %in% unlist(strsplit(bp2[j,8], "/")))/length(unlist(strsplit(bp2[j,8], "/"))))
    }
    if (sum(pk>0.4)>0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp2 = bp2[rk1 == "ostavi",]

In [31]:
write.csv(bp2, "TRF_GO_CC_trimed_up_RNA_b.csv")

In [32]:
bp3 = ego3@result

In [33]:
write.csv(bp3, "TRF_GO_MF_up_RNA.csv")

In [34]:
bp3 = bp3[bp3$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp3[,4]))
bp3 = bp3[(lk > 15) & (lk < 500),]
bp3 = bp3[bp3$Count >4,]
bp3 = bp3[!bp3$ID %in% noliv[,1],]
bp3 = bp3[!bp3$ID %in% noliv1[,1],]
bp3 = bp3[!bp3$ID %in% obsolete[,1],]
bp3 = bp3[order(as.numeric(sub("\\/.*", "", bp3[,4])), decreasing=TRUE),]

In [35]:
write.csv(bp3,"TRF_GO_MF_trimed_up_RNA_a.csv")

In [36]:
rk = character()
for(i in 1:(nrow(bp3)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp3)){
        pk = c(pk,sum(unlist(strsplit(bp3[i,8], "/")) %in% unlist(strsplit(bp3[j,8], "/")))/length(unlist(strsplit(bp3[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp3 = bp3[rk1 == "ostavi",]

In [37]:
write.csv(bp3, "TRF_GO_MF_trimed_up_RNA_b.csv")

In [38]:
bp4 = kk1@result

In [39]:
write.csv(bp4, "TRF_KEGG_up_RNA.csv")

In [40]:
bp4 = bp4[bp4$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp4[,4]))
bp4 = bp4[(lk > 15) & (lk < 500),]
bp4 = bp4[bp4$Count > 4,]
bp4 = bp4[!bp4$ID %in% noliv[,1],]
bp4 = bp4[!bp4$ID %in% noliv1[,1],]
bp4 = bp4[!bp4$ID %in% obsolete[,1],]
bp4 = bp4[order(as.numeric(sub("\\/.*", "", bp4[,4])), decreasing=TRUE),]

In [41]:
write.csv(bp4,"TRF_KEGG_trimed_up_RNA_a.csv")

In [42]:
rk = character()
for(i in 1:(nrow(bp4)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp4)){
        pk = c(pk,sum(unlist(strsplit(bp4[i,8], "/")) %in% unlist(strsplit(bp4[j,8], "/")))/length(unlist(strsplit(bp4[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp4 = bp4[rk1 == "ostavi",]

In [43]:
write.csv(bp4, "TRF_KEGG_trimed_up_RNA_b.csv")

### Down regulated pathways

In [9]:
if(bool_plot){
    sig.gene <- bitr(rownames(resultsTRF[which(resultsTRF$sig == "FDR<0.05" & resultsTRF$log2FoldChange < 0),]),
                     fromType="ENSEMBL",
                     toType="ENTREZID",
                     OrgDb=org.Mm.eg.db
    )
    kk1 <- enrichKEGG(gene=sig.gene[,2], 
                      organism='mmu',
                      pvalueCutoff=0.1
    )
    barplot(kk1,
            showCategory=20,
            title="KEGG pathways"
    )
}

In [10]:
if(bool_plot){
    ego1 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="BP", 
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05, 
                     readable=TRUE
    )
    barplot(ego1, 
            showCategory=25,
            title="GO pathways"
    )
}

In [11]:
if(bool_plot){
    ego2 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="CC", 
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego2, 
            showCategory=25,
            title="GO pathways"
    )
}

In [12]:
if(bool_plot){
    ego3 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="MF", 
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05, 
                     readable=TRUE
    )
    barplot(ego3, 
            showCategory=25,
            title="GO pathways"
    )
}

In [48]:
bp1 = ego1@result

In [49]:
setwd('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/RNA_seq_data/results/TRF')
write.csv(bp1, "TRF_GO_BP_down_RNA.csv")

In [50]:
bp1 = bp1[bp1$p.adjust < 0.05,]

lk = as.numeric(sub("\\/.*", "", bp1[,4]))
bp1 = bp1[(lk > 15) & (lk < 500),]
bp1 = bp1[bp1$Count > 4,]
bp1 = bp1[!bp1$ID %in% noliv[,1],]
bp1 = bp1[!bp1$ID %in% noliv1[,1],]
bp1 = bp1[!bp1$ID %in% obsolete[,1],]
bp1 = bp1[order(as.numeric(sub("\\/.*", "", bp1[,4])), decreasing=TRUE),]

In [51]:
write.csv(bp1, "TRF_GO_BP_trimed_down_RNA_a.csv")

In [52]:
rk = character()
for(i in 1:(nrow(bp1)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp1)){
        pk = c(pk,sum(unlist(strsplit(bp1[i,8], "/")) %in% unlist(strsplit(bp1[j,8], "/")))/length(unlist(strsplit(bp1[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk,"brisi")
bp1 = bp1[rk1 == "ostavi",]

In [53]:
write.csv(bp1, "TRF_GO_BP_trimed_down_RNA_b.csv")

In [54]:
bp = ego2@result

In [55]:
write.csv(bp2, "TRF_GO_CC_down_RNA.csv")

In [56]:
bp2 = bp2[bp2$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp2[,4]))
bp2 = bp2[(lk > 15) & (lk < 500),]
bp2 = bp2[bp2$Count > 4,]
bp2 = bp2[!bp2$ID%in%noliv[,1],]
bp2 = bp2[!bp2$ID%in%noliv1[,1],]
bp2 = bp2[!bp2$ID%in%obsolete[,1],]
bp2 = bp2[order(as.numeric(sub("\\/.*", "", bp2[,4])), decreasing=TRUE),]

In [57]:
write.csv(bp2,"TRF_GO_CC_trimed_down_RNA_a.csv")

In [58]:
rk = character()
for(i in 1:(nrow(bp2)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp2)){
        pk = c(pk,sum(unlist(strsplit(bp2[i,8], "/")) %in% unlist(strsplit(bp2[j,8], "/")))/length(unlist(strsplit(bp2[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp2 = bp2[rk1 == "ostavi",]

In [59]:
write.csv(bp2, "TRF_GO_CC_trimed_down_RNA_b.csv")

In [60]:
bp3 = ego3@result

In [61]:
write.csv(bp3, "TRF_GO_MF_down_RNA.csv")

In [62]:
bp3 = bp3[bp3$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp3[,4]))
bp3 = bp3[(lk > 15) & (lk < 500),]
bp3 = bp3[bp3$Count > 4,]
bp3 = bp3[!bp3$ID %in% noliv[,1],]
bp3 = bp3[!bp3$ID %in% noliv1[,1],]
bp3 = bp3[!bp3$ID %in% obsolete[,1],]
bp3 = bp3[order(as.numeric(sub("\\/.*", "", bp3[,4])), decreasing=TRUE),]

In [63]:
write.csv(bp3, "TRF_GO_MF_trimed_down_RNA_a.csv")

In [64]:
rk = character()
for(i in 1:(nrow(bp3)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp3)){
        pk = c(pk,sum(unlist(strsplit(bp3[i,8], "/")) %in% unlist(strsplit(bp3[j,8], "/")))/length(unlist(strsplit(bp3[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp3 = bp3[rk1 == "ostavi",]

In [65]:
write.csv(bp3, "TRF_GO_MF_trimed_down_RNA_b.csv")

In [66]:
bp4 = kk1@result

In [67]:
write.csv(bp4, "TRF_KEGG_down_RNA.csv")

In [68]:
bp4 = bp4[bp4$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp4[,4]))
bp4 = bp4[(lk > 15) & (lk < 500),]
bp4 = bp4[bp4$Count > 4,]
bp4 = bp4[!bp4$ID %in% noliv[,1],]
bp4 = bp4[!bp4$ID %in% noliv1[,1],]
bp4 = bp4[!bp4$ID %in% obsolete[,1],]
bp4 = bp4[order(as.numeric(sub("\\/.*", "", bp4[,4])), decreasing=TRUE),]

In [69]:
write.csv(bp4,"TRF_KEGG_trimed_down_RNA_a.csv")

In [70]:
rk = character()
for(i in 1:(nrow(bp4)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp4)){
        pk = c(pk,sum(unlist(strsplit(bp4[i,8], "/")) %in% unlist(strsplit(bp4[j,8], "/")))/length(unlist(strsplit(bp4[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp4 = bp4[rk1 == "ostavi",]

In [71]:
write.csv(bp4, "TRF_KEGG_trimed_down_RNA_b.csv")

## VSG Study

In [72]:
vsgdat = expression.matrix[,c(75,77:81,83:89)]

In [73]:
id = colnames(vsgdat)
condition = c(rep("VSGHFD",6), rep("PFHFD",7))
metaData = data.frame(id,
                      condition
)
dds <- DESeqDataSetFromMatrix(countData=vsgdat, 
                              colData=metaData,
                              design=~ condition
)
featureData = data.frame(gene=rownames(dds))
mcols(dds) = DataFrame(mcols(dds), 
                       featureData
)

“some variables in design formula are characters, converting to factors”


In [74]:
design(dds)= ~ condition  
dds <- estimateSizeFactors(dds)
dds <- estimateDispersions(dds,
                        fit='parametric'
)

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates



In [75]:
keep = rowSums(counts(dds)) >= 10
dds = dds[keep,]

In [13]:
if(bool_plot){
    #Variance stabilizing transformation
    vsd <- vst(dds)
    # PCA plot of the samples from different locations
    plotPCA(vsd, "condition") +
            geom_text(aes(label=id), vjust=2) +
            xlim(-30, 13) +
            ylim(-15, 13)
}

In [77]:
dds <- DESeq(dds)
res_vsghfd_vs_pfhfd <- results(dds)

using pre-existing size factors

estimating dispersions

found already estimated dispersions, replacing these

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

-- replacing outliers and refitting for 37 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



In [78]:
table(res_vsghfd_vs_pfhfd$padj < 0.05)


FALSE  TRUE 
13848   311 

In [79]:
# order results table by the smallest adjusted p value:
res = res_vsghfd_vs_pfhfd[order(res_vsghfd_vs_pfhfd$padj, res_vsghfd_vs_pfhfd$pvalue),]

resultsVSG = as.data.frame(dplyr::mutate(as.data.frame(res),
                                      sig=ifelse(res$padj < 0.05, "FDR<0.05", "Not Sig")),
                            row.names=rownames(res))
resultsVSG = resultsVSG[(!is.na(resultsVSG$padj)),]

In [14]:
if(bool_plot){
    # significan genes heatmap
    p=assay(vsd)[rownames(assay(vsd))%in%rownames(resultsVSG[which(resultsVSG$sig == "FDR<0.05"),]),]
    heatmap.2(p, 
              col=colors,
              scale="row",
              trace="none",
              main="VSGHFD_vs_PFHFD",
              Rowv=FALSE,
              Colv=FALSE
    ) 
}

In [81]:
#setwd('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/RNA_seq_data/results')
#write.csv(results,"VSGstudy_VSGHFDvsPFHFD.csv")

### Up regulated pathways

In [15]:
if(bool_plot){
    sig.gene <- bitr(rownames(resultsVSG[which(resultsVSG$sig == "FDR<0.05" & resultsVSG$log2FoldChange > 0),]),
                     fromType="ENSEMBL",
                     toType="ENTREZID",
                     OrgDb=org.Mm.eg.db
    )
    kk1 <- enrichKEGG(gene=sig.gene[,2], 
                      organism='mmu',
                      pvalueCutoff=0.05
    )
    barplot(kk1,
            showCategory=25,
            title="KEGG pathways"
    )
}

In [16]:
if(bool_plot){
    ego1 <- enrichGO(gene=sig.gene[,2], 
                     OrgDb=org.Mm.eg.db,
                     ont="BP", 
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego1,
            showCategory=25,
            title="GO pathways"
    )
}

In [17]:
if(bool_plot){
    ego2 <- enrichGO(gene=sig.gene[,2], 
                     OrgDb=org.Mm.eg.db, 
                     ont="CC", 
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego2, 
            showCategory=25,
            title="GO pathways"
    )
}

In [18]:
if(bool_plot){
    ego3 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="MF",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05, 
                     readable=TRUE
    )
    barplot(ego3, 
            showCategory=25,
            title="GO pathways"
    )
}

In [86]:
bp1 = ego1@result

In [87]:
setwd('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/RNA_seq_data/results/VSG')
write.csv(bp1, "VSG_GO_BP_up_RNA.csv")

In [88]:
bp1 = bp1[bp1$p.adjust < 0.05,]

lk = as.numeric(sub("\\/.*", "", bp1[,4]))
bp1 = bp1[(lk > 15) & (lk < 500),]
bp1 = bp1[bp1$Count >4,]
bp1 = bp1[!bp1$ID %in% noliv[,1],]
bp1 = bp1[!bp1$ID %in% noliv1[,1],]
bp1 = bp1[!bp1$ID %in% obsolete[,1],]
bp1 = bp1[order(as.numeric(sub("\\/.*", "", bp1[,4])), decreasing = TRUE),]

In [89]:
write.csv(bp1,"VSG_GO_BP_trimed_up_RNA_a.csv")

In [90]:
rk = character()
for(i in 1:(nrow(bp1)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp1)){
        pk = c(pk,sum(unlist(strsplit(bp1[i,8], "/")) %in% unlist(strsplit(bp1[j,8], "/")))/length(unlist(strsplit(bp1[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp1 = bp1[rk1 == "ostavi",]

In [91]:
write.csv(bp1, "VSG_GO_BP_trimed_up_RNA_b.csv")

In [92]:
bp2 = ego2@result

In [93]:
write.csv(bp2, "VSG_GO_CC_up_RNA.csv")

In [94]:
bp2 = bp2[bp2$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp2[,4]))
bp2 = bp2[(lk > 15) & (lk < 500),]
bp2 = bp2[bp2$Count > 4,]
bp2 = bp2[!bp2$ID %in% noliv[,1],]
bp2 = bp2[!bp2$ID %in% noliv1[,1],]
bp2 = bp2[!bp2$ID %in% obsolete[,1],]
bp2 = bp2[order(as.numeric(sub("\\/.*", "", bp2[,4])),decreasing=TRUE),]

In [95]:
write.csv(bp2, "VSG_GO_CC_trimed_up_RNA_a.csv")

In [96]:
rk = character()
for(i in 1:(nrow(bp2)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp2)){
        pk = c(pk,sum(unlist(strsplit(bp2[i,8], "/")) %in% unlist(strsplit(bp2[j,8], "/")))/length(unlist(strsplit(bp2[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp2 = bp2[rk1 == "ostavi",]

In [97]:
write.csv(bp2, "VSG_GO_CC_trimed_up_RNA_b.csv")

In [98]:
bp3 = ego3@result

In [99]:
write.csv(bp3, "VSG_GO_MF_up_RNA.csv")

In [100]:
bp3 = bp3[bp3$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp3[,4]))
bp3 = bp3[(lk > 15) & (lk < 500),]
bp3 = bp3[bp3$Count > 4,]
bp3 = bp3[!bp3$ID %in% noliv[,1],]
bp3 = bp3[!bp3$ID %in% noliv1[,1],]
bp3 = bp3[!bp3$ID %in% obsolete[,1],]
bp3 = bp3[order(as.numeric(sub("\\/.*", "", bp3[,4])), decreasing=TRUE),]

In [101]:
write.csv(bp3, "VSG_GO_MF_trimed_up_RNA_a.csv")

In [102]:
rk = character()
for(i in 1:(nrow(bp3)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp3)){
        pk = c(pk,sum(unlist(strsplit(bp3[i,8], "/")) %in% unlist(strsplit(bp3[j,8], "/")))/length(unlist(strsplit(bp3[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp3 = bp3[rk1 == "ostavi",]

In [103]:
write.csv(bp3, "VSG_GO_MF_trimed_up_RNA_b.csv")

In [104]:
bp4 = k1@result

In [105]:
write.csv(bp4, "VSG_KEGG_up_RNA.csv")

In [106]:
bp4 = bp4[bp4$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp4[,4]))
bp4 = bp4[(lk > 15) & (lk < 500),]
bp4 = bp4[bp4$Count > 4,]
bp4 = bp4[!bp4$ID %in% noliv[,1],]
bp4 = bp4[!bp4$ID %in% noliv1[,1],]
bp4 = bp4[!bp4$ID %in% obsolete[,1],]
bp4 = bp4[order(as.numeric(sub("\\/.*", "", bp4[,4])), decreasing=TRUE),]

In [107]:
write.csv(bp4, "VSG_KEGG_trimed_up_RNA_a.csv")

In [108]:
rk = character()
for(i in 1:(nrow(bp4)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp4)){
        pk = c(pk,sum(unlist(strsplit(bp4[i,8], "/")) %in% unlist(strsplit(bp4[j,8], "/")))/length(unlist(strsplit(bp4[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp4 = bp4[rk1 == "ostavi",]

In [109]:
write.csv(bp4, "VSG_KEGG_trimed_up_RNA_b.csv")

### Down regulated pathways

In [19]:
if(bool_plot){
    sig.gene <- bitr(rownames(resultsVSG[which(resultsVSG$sig == "FDR<0.05" & resultsTRF$log2FoldChange < 0),]),
                     fromType="ENSEMBL",
                     toType="ENTREZID",
                     OrgDb=org.Mm.eg.db
    )
    kk1 <- enrichKEGG(gene=sig.gene[,2],
                      organism='mmu',
                      pvalueCutoff=0.05
    )
    barplot(kk1,
            showCategory=25,
            title="KEGG pathways"
    )
}

In [20]:
if(bool_plot){
    ego1 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db, 
                     ont="BP",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego1,
            showCategory=25,
            title="GO pathways"
    )
}

In [21]:
if(bool_plot){
    ego2 <- enrichGO(gene=sig.gene[,2], 
                     OrgDb=org.Mm.eg.db,
                     ont="CC", 
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego2, 
            showCategory=25, 
            title="GO pathways"
    )
}

In [22]:
if(bool_plot){
    ego3 <- enrichGO(gene=sig.gene[,2], 
                     OrgDb=org.Mm.eg.db,
                     ont="MF",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego3,
            showCategory=25, 
            title="GO pathways"
    )
}

In [114]:
bp1 = ego1@result

In [115]:
setwd('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/RNA_seq_data/results/VSG')
write.csv(bp1, "VSG_GO_BP_down_RNA.csv")

In [116]:
bp1 = bp1[bp1$p.adjust<0.05,]

lk = as.numeric(sub("\\/.*", "", bp1[,4]))
bp1 = bp1[(lk > 15) & (lk < 500),]
bp1 = bp1[bp1$Count > 4,]
bp1 = bp1[!bp1$ID %in% noliv[,1],]
bp1 = bp1[!bp1$ID %in% noliv1[,1],]
bp1 = bp1[!bp1$ID %in% obsolete[,1],]
bp1 = bp1[order(as.numeric(sub("\\/.*", "", bp1[,4])), decreasing = TRUE),]

In [117]:
write.csv(bp1, "VSG_GO_BP_trimed_down_RNA_a.csv")

In [118]:
rk = character()
for(i in 1:(nrow(bp1)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp1)){
        pk = c(pk,sum(unlist(strsplit(bp1[i,8], "/")) %in% unlist(strsplit(bp1[j,8], "/")))/length(unlist(strsplit(bp1[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp1 = bp1[rk1 == "ostavi",]

In [119]:
write.csv(bp1, "VSG_GO_BP_trimed_down_RNA_b.csv")

In [120]:
bp2 = ego2@result

In [121]:
write.csv(bp2, "VSG_GO_CC_down_RNA.csv")

In [122]:
bp2 = bp2[bp2$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp2[,4]))
bp2 = bp2[(lk > 15) & (lk < 500),]
bp2 = bp2[bp2$Count > 4,]
bp2 = bp2[!bp2$ID %in% noliv[,1],]
bp2 = bp2[!bp2$ID %in% noliv1[,1],]
bp2 = bp2[!bp2$ID %in% obsolete[,1],]
bp2 = bp2[order(as.numeric(sub("\\/.*", "", bp2[,4])), decreasing=TRUE),]

In [123]:
write.csv(bp2,"VSG_GO_CC_trimed_down_RNA_a.csv")

In [124]:
#rk = character()
#for(i in 1:(nrow(bp2)-1)){
#    pk = numeric()
#    for(j in (i+1):nrow(bp2)){
#        pk = c(pk,sum(unlist(strsplit(bp2[i,8], "/")) %in% unlist(strsplit(bp2[j,8], "/")))/length(unlist(strsplit(bp2[j,8], "/"))))
#    }
#    if (sum(pk > 0.4) > 0) {
#        rk[i] = "brisi"
#    } else rk[i] = "ostavi"
#}
#rk1 = c(rk, "brisi")
#bp2 = bp2[rk1 == "ostavi",]

In [125]:
#write.csv(bp2,"VSG_GO_CC_trimed_down_RNA_b.csv")

In [126]:
bp3 = ego3@result

In [127]:
write.csv(bp3, "VSG_GO_MF_down_RNA.csv")

In [128]:
bp3 = bp3[bp3$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp3[,4]))
bp3 = bp3[(lk > 15) & (lk < 500),]
bp3 = bp3[bp3$Count > 4,]
bp3 = bp3[!bp3$ID %in% noliv[,1],]
bp3 = bp3[!bp3$ID %in% noliv1[,1],]
bp3 = bp3[!bp3$ID %in% obsolete[,1],]
bp3 = bp3[order(as.numeric(sub("\\/.*", "", bp3[,4])), decreasing=TRUE),]

In [129]:
write.csv(bp3, "VSG_GO_MF_trimed_down_RNA_a.csv")

In [130]:
rk = character()
for(i in 1:(nrow(bp3)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp3)){
        pk = c(pk,sum(unlist(strsplit(bp3[i,8], "/")) %in% unlist(strsplit(bp3[j,8], "/")))/length(unlist(strsplit(bp3[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp3 = bp3[rk1 == "ostavi",]

In [131]:
write.csv(bp3, "VSG_GO_MF_trimed_down_RNA_b.csv")

In [132]:
bp4 = kk1@result

In [133]:
write.csv(bp4, "VSG_KEGG_down_RNA.csv")

In [134]:
bp4 = bp4[bp4$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp4[,4]))
bp4 = bp4[(lk > 15) & (lk < 500),]
bp4 = bp4[bp4$Count > 4,]
bp4 = bp4[!bp4$ID %in% noliv[,1],]
bp4 = bp4[!bp4$ID %in% noliv1[,1],]
bp4 = bp4[!bp4$ID %in% obsolete[,1],]
bp4 = bp4[order(as.numeric(sub("\\/.*", "", bp4[,4])), decreasing=TRUE),]

In [135]:
write.csv(bp4, "VSG_KEGG_trimed_down_RNA_a.csv")

In [136]:
rk = character()
for(i in 1:(nrow(bp4)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp4)){
        pk = c(pk,sum(unlist(strsplit(bp4[i,8], "/")) %in% unlist(strsplit(bp4[j,8], "/")))/length(unlist(strsplit(bp4[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp4 = bp4[rk1 == "ostavi",]

In [137]:
write.csv(bp4, "VSG_KEGG_trimed_down_RNA_b.csv")

## IF and PF Study

In [138]:
ifdat = expression.matrix[,c(24,26:31,32:39,16:23)]

In [139]:
id = colnames(ifdat)
condition = c(rep("IFHFD",7), rep("PFHFD",8), rep("ALHFD",8))
metaData = data.frame(id,
                      condition
)
dds <- DESeqDataSetFromMatrix(countData=ifdat,
                              colData=metaData,
                              design=~ condition
)
featureDatadata.frame(gene=rownames(dds))
mcols(dds) = DataFrame(mcols(dds),
                       featureData
)

“some variables in design formula are characters, converting to factors”


In [140]:
design(dds)= ~ condition  
dds <- estimateSizeFactors(dds)
dds <- estimateDispersions(dds, 
                           fit='parametric'
)

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates



In [141]:
keep = rowSums(counts(dds)) >= 50
dds = dds[keep,]

In [23]:
if(bool_plot){
    #Variance stabilizing transformation
    vsd=vst(dds)
    # PCA plot of the samples from different locations
    plotPCA(vsd, "condition") +
            geom_text(aes(label=id), vjust=2) +
            xlim(-20, 20) +
            ylim(-15, 15)
}

In [24]:
dds <- estimateSizeFactors(dds)
idx = rowMeans(counts(dds, normalized=TRUE)) >= 10
dds = dds[idx,]
vsd <- vst(dds,
           fitType="local"
)
pcaData <- plotPCA(vsd, 
                   intgroup=c("condition"), 
                   returnData=TRUE
)
percentVar <- round(100 * attr(pcaData, "percentVar"))
if(bool_plot){
    p2 <- ggplot(pcaData, aes(PC1, PC2, color=condition)) +
                 geom_point(size=3) +
                 geom_label(aes(label=colnames(dds))) +
                 xlab(paste0("PC1: ",percentVar[1],"% variance")) + 
                 ylab(paste0("PC2: ",percentVar[2],"% variance")) +
                 ggtitle("PCA Plot - PFhfd-ALhfd-IFhfd") +
                 theme(legend.position="right", legend.text=element_text(size=10)) + 
                 guides(col=guide_legend(nrow=4))
    p2 + ylim(-15,15) +
         xlim(-30,30)
}

In [144]:
dds <- DESeq(dds)

using pre-existing size factors

estimating dispersions

found already estimated dispersions, replacing these

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

-- replacing outliers and refitting for 1 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



In [145]:
res_ifhfd_vs_alhfd <- results(dds,
                              contrast=c("condition","IFHFD","ALHFD")
)
res_pfhfd_vs_alhfd <- results(dds, 
                              contrast = c("condition","PFHFD","ALHFD")
)

In [146]:
table(res_ifhfd_vs_alhfd$padj < 0.05)


FALSE  TRUE 
 9631   501 

In [147]:
table(res_pfhfd_vs_alhfd$padj < 0.05)


FALSE  TRUE 
 9587   545 

In [148]:
res = res_ifhfd_vs_alhfd[order(res_ifhfd_vs_alhfd$padj),]

In [149]:
results1 = as.data.frame(dplyr::mutate(as.data.frame(res), 
                               sig=ifelse(res$padj<0.05, "FDR<0.05", "Not Sig")),
                         row.names=rownames(res))
results1 = results1[(!is.na(results1$padj)),]

In [25]:
if(bool_plot){
    p <- ggplot2::ggplot(results1, ggplot2::aes(log2FoldChange, -log10(pvalue))) +
         ggplot2::geom_point(ggplot2::aes(col = sig)) + 
         ggplot2::scale_color_manual(values = c("red", "black")) +
         guides(col = guide_legend(nrow=2)) + 
         ggplot2::ggtitle("ifhfd_vs_alhfd") + xlim(-5,5) + ylim(0,20)

    p #+ ggrepel::geom_text_repel(data=results1[1:3, ], ggplot2::aes(label=rownames(results1[1:3, ])))
}

In [151]:
res = res_pfhfd_vs_alhfd[order(res_pfhfd_vs_alhfd$padj),]

In [152]:
results2 = as.data.frame(dplyr::mutate(as.data.frame(res), sig=ifelse(res$padj<0.05, "FDR<0.05", "Not Sig")),
                         row.names=rownames(res))
results2 = results2[(!is.na(results2$padj)),]

In [26]:
if(bool_plot){
    p <- ggplot2::ggplot(results2, ggplot2::aes(log2FoldChange, -log10(padj))) +
         ggplot2::geom_point(ggplot2::aes(col = results2$sig)) +
         ggplot2::scale_color_manual(values = c("red", "black")) + 
         guides(col = guide_legend(nrow=2)) +
         ggplot2::ggtitle("pfhfd_vs_alhfd")+xlim(-5,5)+ylim(0,20)

    p #+ ggrepel::geom_text_repel(data=results2[1:20,], ggplot2::aes(label=rownames(results2[1:20,])))
}

In [154]:
#write.csv(as.data.frame(results2), file="DifferentialExpressionAnalysis_PFhfd_vs_ALhfd.csv")

In [27]:
if(bool_plot){
    plot(res_pfhfd_vs_alhfd[,2], 
         res_ifhfd_vs_alhfd[,2]
    )
    abline(0,1,
           col="red", 
           ylim=c(-4,4)
    )
}

In [156]:
par(mfrow=c(1,2))

In [30]:
PF = data.frame(values=res_pfhfd_vs_alhfd[,5])
IF = data.frame(values=res_ifhfd_vs_alhfd[,5])

# Now, combine your two dataframes into one.  
# First make a new column in each that will be 
# a variable to identify where they came from later.
PF$diet = 'PF'
IF$diet = 'IF'

# and combine into your new data frame vegLengths
fasting = rbind(PF, IF)
if(bool_plot){
    ggplot(fasting, aes(values, fill=diet)) +
           geom_density(alpha=0.2)
}

In [31]:
if(bool_plot){
    hist(res_ifhfd_vs_alhfd[,2], 
         xlim = c(-4,4)
    )
}

In [159]:
# order results table by the smallest adjusted p value:
res = res_ifhfd_vs_alhfd[order(res_ifhfd_vs_alhfd$padj,res_ifhfd_vs_alhfd$pvalue),]

resultsIF = as.data.frame(dplyr::mutate(as.data.frame(res),
                                      sig=ifelse(res$padj < 0.05, "FDR<0.05", "Not Sig")),
                        row.names=rownames(res))
resultsIF = resultsIF[(!is.na(resultsIF$padj)),]

In [32]:
if(bool_plot){
    p = assay(vsd)[rownames(assay(vsd))%in%rownames(resultsIF[which(resultsIF$sig == "FDR<0.05"),]),]
    heatmap.2(p, 
              col=colors,
              scale="row",
              trace="none",
              main="IFHFD_vs_ALHFD",
              Rowv=FALSE,
              Colv=FALSE
    ) 
}

In [161]:
#setwd('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/RNA_seq_data/results')
#write.csv(results,"IFstudy_IFHFDvsALHFD.csv")

### Up regulated pathways

In [33]:
if(bool_plot){
    sig.gene <- bitr(rownames(resultsIF[which(resultsIF$sig == "FDR<0.05" & resultsTRF$log2FoldChange > 0),]),
                     fromType="ENSEMBL",
                     toType="ENTREZID",
                     OrgDb=org.Mm.eg.db
    )
    kk1 <- enrichKEGG(gene=sig.gene[,2],
                      organism='mmu',
                      pvalueCutoff=0.1
    )
    barplot(kk1,
            showCategory=25,
            title="KEGG pathways"
    )
}

In [34]:
if(bool_plot){
    ego1 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db, 
                     ont="BP", 
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego1, 
            showCategory=25,
            title="GO pathways"
    )
}

In [35]:
if(bool_plot){
    ego2 <- enrichGO(gene=sig.gene[,2], 
                     OrgDb=org.Mm.eg.db, 
                     ont="CC", 
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego2, 
            showCategory=25, 
            title="GO pathways"
    )
}

In [36]:
if(bool_plot){
    ego3 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="MF",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego3,
            showCategory=25,
            title="GO pathways"
    )
}

In [166]:
bp1 = ego1@result

In [167]:
setwd('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/RNA_seq_data/results/IF')
write.csv(bp1, "IF_GO_BP_up_RNA.csv")

In [168]:
bp1 = bp1[bp1$p.adjust < 0.05,]

lk = as.numeric(sub("\\/.*", "", bp1[,4]))
bp1 = bp1[(lk > 15) & (lk < 500),]
bp1 = bp1[bp1$Count > 4,]
bp1 = bp1[!bp1$ID %in% noliv[,1],]
bp1 = bp1[!bp1$ID %in% noliv1[,1],]
bp1 = bp1[!bp1$ID %in% obsolete[,1],]
bp1 = bp1[order(as.numeric(sub("\\/.*", "", bp1[,4])), decreasing=TRUE),]

In [169]:
write.csv(bp1,"IF_GO_BP_trimed_up_RNA_a.csv")

In [170]:
rk = character()
for(i in 1:(nrow(bp1)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp1)){
        pk = c(pk,sum(unlist(strsplit(bp1[i,8], "/")) %in% unlist(strsplit(bp1[j,8], "/")))/length(unlist(strsplit(bp1[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp1 = bp1[rk1 == "ostavi",]

In [171]:
write.csv(bp1, "IF_GO_BP_trimed_up_RNA_b.csv")

In [172]:
bp2 = ego2@result

In [173]:
write.csv(bp2, "IF_GO_CC_up_RNA.csv")

In [174]:
bp2 = bp2[bp2$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp2[,4]))
bp2 = bp2[(lk > 15) & (lk < 500),]
bp2 = bp2[bp2$Count > 4,]
bp2 = bp2[!bp2$ID %in% noliv[,1],]
bp2 = bp2[!bp2$ID %in% noliv1[,1],]
bp2 = bp2[!bp2$ID %in% obsolete[,1],]
bp2 = bp2[order(as.numeric(sub("\\/.*", "", bp2[,4])), decreasing=TRUE),]

In [175]:
write.csv(bp2,"IF_GO_CC_trimed_up_RNA_a.csv")

In [176]:
rk = character()
for(i in 1:(nrow(bp2)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp2)){
        pk = c(pk,sum(unlist(strsplit(bp2[i,8], "/")) %in% unlist(strsplit(bp2[j,8], "/")))/length(unlist(strsplit(bp2[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp2 = bp2[rk1 == "ostavi",]

In [177]:
write.csv(bp2, "IF_GO_CC_trimed_up_RNA_b.csv")

In [178]:
bp3 = ego3@result

In [179]:
write.csv(bp3, "IF_GO_MF_up_RNA.csv")

In [180]:
bp3 = bp3[bp3$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp3[,4]))
bp3 = bp3[(lk > 15) & (lk < 500),]
bp3 = bp3[bp3$Count > 4,]
bp3 = bp3[!bp3$ID %in% noliv[,1],]
bp3 = bp3[!bp3$ID %in% noliv1[,1],]
bp3 = bp3[!bp3$ID %in% obsolete[,1],]
bp3 = bp3[order(as.numeric(sub("\\/.*", "", bp3[,4])), decreasing=TRUE),]

In [181]:
write.csv(bp3, "IF_GO_MF_trimed_up_RNA_a.csv")

In [182]:
rk = character()
for(i in 1:(nrow(bp3)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp3)){
        pk <- c(pk,sum(unlist(strsplit(bp3[i,8], "/")) %in% unlist(strsplit(bp3[j,8], "/")))/length(unlist(strsplit(bp3[j,8], "/"))))
    }
    if (sum(pk>0.4)>0) {
        rk[i]="brisi"
    } else rk[i]="ostavi"
}
rk1 = c(rk, "brisi")
bp3 = bp3[rk1 == "ostavi",]

In [183]:
write.csv(bp3, "IF_GO_MF_trimed_up_RNA_b.csv")

In [184]:
bp4 = kk1@result

In [185]:
write.csv(bp4, "IF_KEGG_up_RNA.csv")

In [186]:
bp4 = bp4[bp4$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp4[,4]))
bp4 = bp4[(lk > 15) & (lk < 500),]
bp4 = bp4[bp4$Count > 4,]
bp4 = bp4[!bp4$ID %in% noliv[,1],]
bp4 = bp4[!bp4$ID %in% noliv1[,1],]
bp4 = bp4[!bp4$ID %in% obsolete[,1],]
bp4 = bp4[order(as.numeric(sub("\\/.*", "", bp4[,4])), decreasing=TRUE),]

In [187]:
write.csv(bp4, "IF_KEGG_trimed_up_RNA_a.csv")

In [188]:
rk = character()
for(i in 1:(nrow(bp4)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp4)){
        pk = c(pk,sum(unlist(strsplit(bp4[i,8], "/")) %in% unlist(strsplit(bp4[j,8], "/")))/length(unlist(strsplit(bp4[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp4 = bp4[rk1 == "ostavi",]

In [189]:
write.csv(bp4, "IF_KEGG_trimed_up_RNA_b.csv")

### Down regulated pathways

In [37]:
if(bool_plot){
    sig.gene <- bitr(rownames(resultsIF[which(resultsIF$sig=="FDR<0.05"&resultsTRF$log2FoldChange<0),]),
                     fromType="ENSEMBL",
                     toType="ENTREZID",
                     OrgDb=org.Mm.eg.db
    )
    kk1 <- enrichKEGG(gene=sig.gene[,2], 
                      organism='mmu', 
                      pvalueCutoff=0.1
    )
    barplot(kk1,
            showCategory=25,
            title="KEGG pathways"
    )
}

In [38]:
if(bool_plot){
    ego1 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="BP",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego1, 
            showCategory=25,
            title="GO pathways"
    )
}

In [39]:
if(bool_plot){
    ego2 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="CC",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego2,
            showCategory=25,
            title="GO pathways"
    )
}

In [40]:
if(bool_plot){
    ego3 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db, 
                     ont="MF",
                     pAdjustMethod="BH", 
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego3,
            showCategory=25, 
            title="GO pathways"
    )
}

In [194]:
bp1 = ego1@result

In [195]:
setwd('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/RNA_seq_data/results/IF')
write.csv(bp1, "IF_GO_BP_down_RNA.csv")

In [196]:
bp1 = bp1[bp1$p.adjust < 0.05,]

lk = as.numeric(sub("\\/.*", "", bp1[,4]))
bp1 = bp1[(lk > 15) & (lk < 500),]
bp1 = bp1[bp1$Count > 4,]
bp1 = bp1[!bp1$ID %in% noliv[,1],]
bp1 = bp1[!bp1$ID %in% noliv1[,1],]
bp1 = bp1[!bp1$ID %in% obsolete[,1],]
bp1 = bp1[order(as.numeric(sub("\\/.*", "", bp1[,4])), decreasing=TRUE),]

In [197]:
write.csv(bp1, "IF_GO_BP_trimed_down_RNA_a.csv")

In [198]:
rk = character()
for(i in 1:(nrow(bp1)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp1)){
        pk = c(pk,sum(unlist(strsplit(bp1[i,8], "/")) %in% unlist(strsplit(bp1[j,8], "/")))/length(unlist(strsplit(bp1[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp1 = bp1[rk1 == "ostavi",]

In [199]:
write.csv(bp1, "IF_GO_BP_trimed_down_RNA_b.csv")

In [200]:
bp2 = ego2@result

In [201]:
write.csv(bp2, "IF_GO_CC_down_RNA.csv")

In [202]:
bp2 = bp2[bp2$p.adjust<0.05,]
lk = as.numeric(sub("\\/.*", "", bp2[,4]))
bp2 = bp2[(lk > 15) & (lk < 500),]
bp2 = bp2[bp2$Count > 4,]
bp2 = bp2[!bp2$ID %in% noliv[,1],]
bp2 = bp2[!bp2$ID %in% noliv1[,1],]
bp2 = bp2[!bp2$ID %in% obsolete[,1],]
bp2 = bp2[order(as.numeric(sub("\\/.*", "", bp2[,4])), decreasing=TRUE),]

In [203]:
write.csv(bp2, "IF_GO_CC_trimed_down_RNA_a.csv")

In [204]:
rk = character()
for(i in 1:(nrow(bp2)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp2)){
        pk = c(pk,sum(unlist(strsplit(bp2[i,8], "/")) %in% unlist(strsplit(bp2[j,8], "/")))/length(unlist(strsplit(bp2[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp2 = bp2[rk1 == "ostavi",]

In [205]:
write.csv(bp2, "IF_GO_CC_trimed_down_RNA_b.csv")

In [206]:
bp3 = ego3@result

In [207]:
write.csv(bp3, "IF_GO_MF_down_RNA.csv")

In [208]:
bp3 = bp3[bp3$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp3[,4]))
bp3 = bp3[(lk > 15) & (lk < 500),]
bp3 = bp3[bp3$Count > 4,]
bp3 = bp3[!bp3$ID %in% noliv[,1],]
bp3 = bp3[!bp3$ID %in% noliv1[,1],]
bp3 = bp3[!bp3$ID %in% obsolete[,1],]
bp3 = bp3[order(as.numeric(sub("\\/.*", "", bp3[,4])), decreasing=TRUE),]

In [209]:
write.csv(bp3,"IF_GO_MF_trimed_down_RNA_a.csv")

In [210]:
rk = character()
for(i in 1:(nrow(bp3)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp3)){
        pk = c(pk,sum(unlist(strsplit(bp3[i,8], "/")) %in% unlist(strsplit(bp3[j,8], "/")))/length(unlist(strsplit(bp3[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp3 = bp3[rk1 == "ostavi",]

In [211]:
write.csv(bp3, "IF_GO_MF_trimed_down_RNA_b.csv")

In [212]:
bp4 = kk1@result

In [213]:
write.csv(bp4, "IF_KEGG_down_RNA.csv")

In [214]:
bp4 = bp4[bp4$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp4[,4]))
bp4 = bp4[(lk > 15) & (lk < 500),]
bp4 = bp4[bp4$Count > 4,]
bp4 = bp4[!bp4$ID %in% noliv[,1],]
bp4 = bp4[!bp4$ID %in% noliv1[,1],]
bp4 = bp4[!bp4$ID %in% obsolete[,1],]
bp4 = bp4[order(as.numeric(sub("\\/.*", "", bp4[,4])), decreasing=TRUE),]

In [215]:
write.csv(bp4, "IF_KEGG_trimed_down_RNA_a.csv")

In [216]:
rk = character()
for(i in 1:(nrow(bp4)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp4)){
        pk = c(pk,sum(unlist(strsplit(bp4[i,8], "/")) %in% unlist(strsplit(bp4[j,8], "/")))/length(unlist(strsplit(bp4[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp4 = bp4[rk1 == "ostavi",]

In [217]:
write.csv(bp4, "IF_KEGG_trimed_down_RNA_b.csv")

# Integrative Analysis Transcriptomics and Proteomics Data

In [218]:
setwd('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/RNA_seq_data')
load("ProteomOutput.RData")

In [219]:
protVSG = ProteomOutput$diffVSG
protTRF = ProteomOutput$diffTRF
protIF = ProteomOutput$diffIF

In [220]:
IF = cbind(rownames(resultsIF),
           resultsIF[,c(2,5,6)]
)
colnames(IF) = c("ENSEMBL", "log2FoldChange", "pvalue", "padj")
IF[,1] = as.character(IF[,1])

sig.gene = bitr(IF[,1],
                fromType="ENSEMBL",
                toType="SYMBOL",
                OrgDb=org.Mm.eg.db
)
colnames(sig.gene) = c("ENSEMBL","GeneName")

rnaIF <- merge(IF,
               sig.gene,
               by="ENSEMBL")[,c(5,2,3,4)]

'select()' returned 1:many mapping between keys and columns

“10.38% of input gene IDs are fail to map...”


In [221]:
VSG = cbind(rownames(resultsVSG),
            resultsVSG[,c(2,5,6)]
)
colnames(VSG) = c("ENSEMBL", "log2FoldChange", "pvalue", "padj")
VSG[,1] = as.character(VSG[,1])

sig.gene <- bitr(VSG[,1],
                 fromType="ENSEMBL",
                 toType="SYMBOL",
                 OrgDb=org.Mm.eg.db
)
colnames(sig.gene) = c("ENSEMBL", "GeneName")

rnaVSG <- merge(VSG,
                sig.gene,
                by="ENSEMBL")[,c(5,2,3,4)]

'select()' returned 1:many mapping between keys and columns

“16.54% of input gene IDs are fail to map...”


In [222]:
TRF = cbind(rownames(resultsTRF), 
            resultsTRF[,c(2,5,6)]
)
colnames(TRF) = c("ENSEMBL", "log2FoldChange", "pvalue", "padj")
TRF[,1] = as.character(TRF[,1])

sig.gene <- bitr(TRF[,1],
                 fromType="ENSEMBL",
                 toType="SYMBOL",
                 OrgDb=org.Mm.eg.db
)
colnames(sig.gene) = c("ENSEMBL", "GeneName")

rnaTRF <- merge(TRF,
                sig.gene,
                by="ENSEMBL")[,c(5,2,3,4)]

'select()' returned 1:many mapping between keys and columns

“22.64% of input gene IDs are fail to map...”


In [223]:
VSGfin <- merge(protVSG,
                rnaVSG, 
                by="GeneName"
)
TRFfin <- merge(protTRF,
                rnaTRF,
                by="GeneName"
)
IFfin <- merge(protIF,
               rnaIF,
               by="GeneName"
)

In [224]:
VSGfin["log10Pval_Trans"] = -log10(VSGfin$pvalue.y)*sign(VSGfin$log2FoldChange.y)
VSGfin["log10Pval_Prote"] = -log10(VSGfin$pvalue.x)*sign(VSGfin$log2FoldChange.x)

TRFfin["log10Pval_Trans"] = -log10(TRFfin$pvalue.y)*sign(TRFfin$log2FoldChange.y)
TRFfin["log10Pval_Prote"] = -log10(TRFfin$pvalue.x)*sign(TRFfin$log2FoldChange.x)

IFfin["log10Pval_Trans"] = -log10(IFfin$pvalue.y)*sign(IFfin$log2FoldChange.y)
IFfin["log10Pval_Prote"] = -log10(IFfin$pvalue.x)*sign(IFfin$log2FoldChange.x)

In [225]:
cor(VSGfin$log10Pval_Trans, 
    VSGfin$log10Pval_Prote, 
    method="spearman"
)
cor(TRFfin$log10Pval_Trans,
    TRFfin$log10Pval_Prote,
    method="spearman"
)
cor(IFfin$log10Pval_Trans,
    IFfin$log10Pval_Prote,
    method="spearman"
)

In [226]:
# cut-off
cutoff = 0.05

protVSG1 = protVSG[which(protVSG$pvalue < cutoff),]
protTRF1 = protTRF[which(protTRF$pvalue < cutoff),]
protIF1 = protIF[which(protIF$pvalue < cutoff),]

rnaVSG1 = rnaVSG[which(rnaVSG$pvalue < cutoff),]
rnaTRF1 = rnaTRF[which(rnaTRF$pvalue < cutoff),]
rnaIF1 = rnaIF[which(rnaIF$pvalue < cutoff),]

In [227]:
VSGfin1 <- merge(protVSG1, 
                 rnaVSG1,
                 by="GeneName"
)
TRFfin1 <- merge(protTRF1,
                 rnaTRF1,
                 by="GeneName"
)
IFfin1 <- merge(protIF1, 
                rnaIF1, 
                by="GeneName"
)

In [228]:
VSGfin1["log10Pval_Trans"] = -log10(VSGfin1$pvalue.y)*sign(VSGfin1$log2FoldChange.y)
VSGfin1["log10Pval_Prote"] = -log10(VSGfin1$pvalue.x)*sign(VSGfin1$log2FoldChange.x)

TRFfin1["log10Pval_Trans"] = -log10(TRFfin1$pvalue.y)*sign(TRFfin1$log2FoldChange.y)
TRFfin1["log10Pval_Prote"] = -log10(TRFfin1$pvalue.x)*sign(TRFfin1$log2FoldChange.x)

IFfin1["log10Pval_Trans"] = -log10(IFfin1$pvalue.y)*sign(IFfin1$log2FoldChange.y)
IFfin1["log10Pval_Prote"] = -log10(IFfin1$pvalue.x)*sign(IFfin1$log2FoldChange.x)

In [229]:
options(repr.plot.width=20, repr.plot.height=16)

## VSG Study

In [41]:
if(bool_plot){
    plot(VSGfin$log2FoldChange.y,
         VSGfin$log2FoldChange.x,
         xlab="Fold change - transciptome",
         ylab="Fold change - proteome",
         main="VSG", 
         pch=20,
         xlim=c(-6,7.5)
    )
    points(VSGfin1$log2FoldChange.y,
           VSGfin1$log2FoldChange.x,
           col="green",
           pch=19
    )
    #text(VSGfin1$log2FoldChange.y,
    #     VSGfin1$log2FoldChange.x, 
    #     labels=VSGfin1$GeneName,
    #     cex=0.9,
    #     pos=3,
    #     col="red",
    #     font=2
    #)
    text(-4.5, 
         1.5, 
         expression(correlaton:rho == 0.44), 
         cex=1.5,
         col="blue"
    )
    abline(v=0,
           h=0,
           lty=2
    )
}

In [42]:
if(bool_plot){
    #pdf('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/VSG_integrated.pdf',width=6,height=6,paper='special')
    plot(VSGfin$log10Pval_Trans,
         VSGfin$log10Pval_Prote,
         xlab="-Log10Pval(transcriptome)*sign(FC)",
         ylab="-Log10Pval(proteome)*sign(FC)",
         main="VSG",
         pch=20,
         xlim=c(-6,7.5)
    )
    points(VSGfin1$log10Pval_Trans,
           VSGfin1$log10Pval_Prote,
           col="green",
           pch=19
    )
    #text(VSGfin1$log10Pval_Trans, 
    #     VSGfin1$log10Pval_Prote, 
    #     labels=VSGfin1$GeneName, 
    #     cex= 0.9, 
    #     pos=3, 
    #     col="red",
    #     font=2
    )
    text(-4.5,
         6,
         expression(correlaton:rho == 0.44),
         cex=0.8, 
         col="blue"
    )
    abline(v=c(-log10(cutoff),
               log10(cutoff)),
           h=c(-log10(cutoff),log10(cutoff)),
           lty=2
    )
    #dev.off()
}

### Pathways enrichment

In [43]:
if(bool_plot){
    sig.gene <- bitr(VSGfin1$GeneName,fromType="SYMBOL",
                     toType="ENTREZID",
                     OrgDb=org.Mm.eg.db
    )
    kk1 <- enrichKEGG(gene=sig.gene[,2],
                      organism='mmu',
                      pvalueCutoff=0.1
    )
    barplot(kk1, 
            showCategory=25,
            title="KEGG pathways"
    )
}

In [44]:
if(bool_plot){
    ego1 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db, 
                     ont="BP",
                     pAdjustMethod="BH", 
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego1, 
            showCategory=25,
            title="GO pathways"
    )
}

In [45]:
if(bool_plot){
    ego2 <- enrichGO(gene=sig.gene[,2], 
                     OrgDb=org.Mm.eg.db,
                     ont="CC",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego2, 
            showCategory=25,
            title="GO pathways"
    )
}

In [46]:
if(bool_plot){
    ego3 <- enrichGO(gene=sig.gene[,2], 
                     OrgDb=org.Mm.eg.db,
                     ont="MF",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego3, 
            showCategory=25,
            title="GO pathways"
    )
}

In [252]:
bp1 = ego1@result

In [253]:
setwd('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/Integration/results/VSG')
write.csv(bp1, "VSG_GO_BP_integrated.csv")

In [254]:
bp1 = bp1[bp1$p.adjust < 0.05,]

lk = as.numeric(sub("\\/.*", "", bp1[,4]))
bp1 = bp1[(lk > 15) & (lk < 500),]
bp1 = bp1[bp1$Count > 4,]
bp1 = bp1[!bp1$ID %in% noliv[,1],]
bp1 = bp1[!bp1$ID %in% noliv1[,1],]
bp1 = bp1[!bp1$ID %in% obsolete[,1],]
bp1 = bp1[order(as.numeric(sub("\\/.*", "", bp1[,4])), decreasing=TRUE),]

In [255]:
write.csv(bp1, "VSG_GO_BP_integrated_a.csv")

In [256]:
rk = character()
for(i in 1:(nrow(bp1)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp1)){
        pk = c(pk,sum(unlist(strsplit(bp1[i,8], "/")) %in% unlist(strsplit(bp1[j,8], "/")))/length(unlist(strsplit(bp1[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp1 = bp1[rk1 == "ostavi",]

In [257]:
write.csv(bp1, "VSG_GO_BP_integrated_b.csv")

In [258]:
bp2 = ego2@result

In [259]:
write.csv(bp2, "VSG_GO_CC_integrated.csv")

In [260]:
bp2 = bp2[bp2$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp2[,4]))
bp2 = bp2[(lk > 15)&(lk < 500),]
bp2 = bp2[bp2$Count > 4,]
bp2 = bp2[!bp2$ID %in% noliv[,1],]
bp2 = bp2[!bp2$ID %in% noliv1[,1],]
bp2 = bp2[!bp2$ID %in% obsolete[,1],]
bp2 = bp2[order(as.numeric(sub("\\/.*", "", bp2[,4])), decreasing=TRUE),]

In [261]:
write.csv(bp2,"VSG_GO_CC_integrated_a.csv")

In [262]:
rk = character()
for(i in 1:(nrow(bp2)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp2)){
        pk = c(pk,sum(unlist(strsplit(bp2[i,8], "/")) %in% unlist(strsplit(bp2[j,8], "/")))/length(unlist(strsplit(bp2[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp2 = bp2[rk1 == "ostavi",]

In [263]:
write.csv(bp2, "VSG_GO_CC_integrated_b.csv")

In [264]:
bp3 = ego3@result 

In [265]:
write.csv(bp3, "VSG_GO_MF_integrated.csv")

In [266]:
bp3 = bp3[bp3$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp3[,4]))
bp3 = bp3[(lk > 15) & (lk < 500),]
bp3 = bp3[bp3$Count > 4,]
bp3 = bp3[!bp3$ID %in% noliv[,1],]
bp3 = bp3[!bp3$ID %in% noliv1[,1],]
bp3 = bp3[!bp3$ID %in% obsolete[,1],]
bp3 = bp3[order(as.numeric(sub("\\/.*", "", bp3[,4])), decreasing=TRUE),]

In [267]:
write.csv(bp3, "VSG_GO_MF_integrated_a.csv")

In [268]:
rk = character()
for(i in 1:(nrow(bp3)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp3)){
        pk = c(pk,sum(unlist(strsplit(bp3[i,8], "/")) %in% unlist(strsplit(bp3[j,8], "/")))/length(unlist(strsplit(bp3[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp3 = bp3[rk1 == "ostavi",]

In [269]:
write.csv(bp3, "VSG_GO_MF_integrateed_b.csv")

In [270]:
bp4 = kk1@result

In [271]:
write.csv(bp4, "VSG_KEGG_intgerated.csv")

In [272]:
bp4=bp4[bp4$p.adjust < 0.05,]
lk=as.numeric(sub("\\/.*", "", bp4[,4]))
bp4=bp4[(lk > 15) & (lk < 500),]
bp4=bp4[bp4$Count > 4,]
bp4=bp4[!bp4$ID %in% noliv[,1],]
bp4=bp4[!bp4$ID %in% noliv1[,1],]
bp4=bp4[!bp4$ID %in% obsolete[,1],]
bp4=bp4[order(as.numeric(sub("\\/.*", "", bp4[,4])), decreasing = T),]

In [273]:
write.csv(bp4, "VSG_KEGG_trimed_integrated_a.csv")

In [274]:
rk = character()
for(i in 1:(nrow(bp4)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp4)){
        pk = c(pk,sum(unlist(strsplit(bp4[i,8], "/")) %in% unlist(strsplit(bp4[j,8], "/")))/length(unlist(strsplit(bp4[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] ="ostavi"
}
rk1 = c(rk, "brisi")
bp4 = bp4[rk1 == "ostavi",]

In [275]:
write.csv(bp4, "VSG_KEGG_integrated_b.csv")

## TRF Study

In [47]:
if(bool_plot){
    #pdf('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/TRF_integrated.pdf',width=6,height=6,paper='special')
    plot(TRFfin$log10Pval_Trans, 
         TRFfin$log10Pval_Prote,
         xlab="-Log10Pval(transcriptome)*sign(FC)",
         ylab="-Log10Pval(proteome)*sign(FC)",
         main="TRF",
         pch=20,
         xlim=c(-6,7.5)
    )
    points(TRFfin1$log10Pval_Trans,
           TRFfin1$log10Pval_Prote,
           col="blue", 
           pch=19
    )
    #text(TRFfin1$log10Pval_Trans, 
    #     TRFfin1$log10Pval_Prote,
    #     labels=TRFfin1$GeneName, 
    #     cex= 0.9, 
    #     pos=3,
    #     col="red",
    #     font=2
    #)
    text(-4,
         4,
         expression(correlaton:rho == 0.24),
         cex=0.8,
         col="blue"
    )
    abline(v=c(-log10(cutoff),log10(cutoff)),
           h=c(-log10(cutoff),log10(cutoff)),
           lty=2
    )
    #dev.off()
}

### Pathways enrichment

In [48]:
if(bool_plot){
    sig.gene <- bitr(TRFfin1$GeneName,
                     fromType="SYMBOL",
                     toType="ENTREZID",
                     OrgDb=org.Mm.eg.db
    )
    kk1 <- enrichKEGG(gene=sig.gene[,2], 
                      organism='mmu',
                      pvalueCutoff = 0.1
    )
    barplot(kk1,
            showCategory=25,
            title="KEGG pathways"
    )
}

In [49]:
if(bool_plot){
    ego1 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db, 
                     ont="BP",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego1, 
            showCategory=25, 
            title="GO pathways"
    )
}

In [50]:
if(bool_plot){
    ego2 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db, 
                     ont="CC", 
                     pAdjustMethod="BH", 
                     pvalueCutoff=0.05, 
                     readable=TRUE
    )
    barplot(ego2, 
            showCategory=25,
            title="GO pathways"
    )
}

In [51]:
if(bool_plot){
    ego3 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="MF",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05, 
                     readable=TRUE
    )
    barplot(ego3,
            showCategory=25,
            title="GO pathways"
    )
}

In [281]:
bp1 = ego1@result

In [282]:
setwd('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/Integration/results/TRF')
write.csv(bp1, "TRF_GO_BP_integrated.csv")

In [283]:
bp1 = bp1[bp1$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp1[,4]))
bp1 = bp1[(lk > 15) & (lk < 500),]
bp1 = bp1[bp1$Count > 4,]
bp1 = bp1[!bp1$ID %in% noliv[,1],]
bp1 = bp1[!bp1$ID %in% noliv1[,1],]
bp1 = bp1[!bp1$ID %in% obsolete[,1],]
bp1 = bp1[order(as.numeric(sub("\\/.*", "", bp1[,4])), decreasing = T),]

In [284]:
write.csv(bp1, "TRF_GO_BP_integrated_a.csv")

In [285]:
rk = character()
for(i in 1:(nrow(bp1)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp1)){
        pk = c(pk,sum(unlist(strsplit(bp1[i,8], "/")) %in% unlist(strsplit(bp1[j,8], "/")))/length(unlist(strsplit(bp1[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp1 = bp1[rk1 == "ostavi",]

In [286]:
write.csv(bp1, "TRF_GO_BP_integrated_b.csv")

In [287]:
bp2 = ego2@result

In [288]:
write.csv(bp2, "TRF_GO_CC_integrated.csv")

In [289]:
bp2 = bp2[bp2$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp2[,4]))
bp2 = bp2[(lk > 15) & (lk < 500),]
bp2 = bp2[bp2$Count > 4,]
bp2 = bp2[!bp2$ID %in% noliv[,1],]
bp2 = bp2[!bp2$ID %in% noliv1[,1],]
bp2 = bp2[!bp2$ID %in% obsolete[,1],]
bp2 = bp2[order(as.numeric(sub("\\/.*", "", bp2[,4])), decreasing=TRUE),]

In [290]:
write.csv(bp2, "TRF_GO_CC_integrated_a.csv")

In [291]:
rk = character()
for(i in 1:(nrow(bp2)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp2)){
        pk = c(pk,sum(unlist(strsplit(bp2[i,8], "/")) %in% unlist(strsplit(bp2[j,8], "/")))/length(unlist(strsplit(bp2[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp2 = bp2[rk1 == "ostavi",]

In [292]:
write.csv(bp2, "TRF_GO_CC_integrated_b.csv")

In [293]:
bp3 = ego3@result

In [294]:
write.csv(bp3, "TRF_GO_MF_integrated.csv")

In [295]:
bp3 = bp3[bp3$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp3[,4]))
bp3 = bp3[(lk > 15) & (lk < 500),]
bp3 = bp3[bp3$Count > 4,]
bp3 = bp3[!bp3$ID %in% noliv[,1],]
bp3 = bp3[!bp3$ID %in% noliv1[,1],]
bp3 = bp3[!bp3$ID %in% obsolete[,1],]
bp3 = bp3[order(as.numeric(sub("\\/.*", "", bp3[,4])), decreasing=TRUE),]

In [296]:
write.csv(bp3, "TRF_GO_MF_integrated_a.csv")

In [297]:
rk = character()
for(i in 1:(nrow(bp3)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp3)){
        pk = c(pk,sum(unlist(strsplit(bp3[i,8], "/")) %in% unlist(strsplit(bp3[j,8], "/")))/length(unlist(strsplit(bp3[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp3 = bp3[rk1 == "ostavi",]

In [298]:
write.csv(bp3, "TRF_GO_MF_integrated_b.csv")

In [299]:
bp4 = kk1@result

In [300]:
write.csv(bp4, "TRF_KEGG_integrated.csv")

In [301]:
bp4 = bp4[bp4$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp4[,4]))
bp4 = bp4[(lk > 15) & (lk < 500),]
bp4 = bp4[bp4$Count > 4,]
bp4 = bp4[!bp4$ID %in% noliv[,1],]
bp4 = bp4[!bp4$ID %in% noliv1[,1],]
bp4 = bp4[!bp4$ID %in% obsolete[,1],]
bp4 = bp4[order(as.numeric(sub("\\/.*", "", bp4[,4])), decreasing=TRUE),]

In [302]:
write.csv(bp4, "TRF_KEGG_integrated_a.csv")

In [303]:
rk = character()
for(i in 1:(nrow(bp4)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp4)){
        pk = c(pk,sum(unlist(strsplit(bp4[i,8], "/")) %in% unlist(strsplit(bp4[j,8], "/")))/length(unlist(strsplit(bp4[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp4 = bp4[rk1 == "ostavi",]

In [304]:
write.csv(bp4, "TRF_KEGG_integrated_b.csv")

## IF Study

In [52]:
if(bool_plot){
    #pdf('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/IF_integrated.pdf',width=6,height=6,paper='special')
    plot(IFfin$log10Pval_Trans,
         IFfin$log10Pval_Prote, 
         xlab="-Log10Pval(transcriptome)*sign(FC)",
         ylab="-Log10Pval(proteome)*sign(FC)",
         main="IF", 
         pch=20,
         xlim=c(-6,7.5)
    )
    points(IFfin1$log10Pval_Trans,
           IFfin1$log10Pval_Prote,
           col="red", 
           pch=19
    )
    #text(IFfin1$log10Pval_Trans,
    #     IFfin1$log10Pval_Prote, 
    #     labels=IFfin1$GeneName, 
    #     cex= 0.9, 
    #.     pos=3, col="red", font=2)
    text(-4, 
         4.5,
         expression(correlaton:rho == 0.19),
         cex=0.8,
         col="blue"
    )
    abline(v=c(-log10(cutoff),log10(cutoff)),
           h=c(-log10(cutoff),log10(cutoff)),
           lty=2
    )
    #dev.off()
}

### Pathways enrichment

In [53]:
if(bool_plot){
    sig.gene <- bitr(IFfin1$GeneName,
                     fromType="SYMBOL",
                     toType="ENTREZID",
                     OrgDb=org.Mm.eg.db
    )
    kk1 <- enrichKEGG(gene=sig.gene[,2],
                      organism='mmu',
                      pvalueCutoff=0.1)
    barplot(kk1,
            showCategory=25,
            title="KEGG pathways"
    )
}

In [54]:
if(bool_plot){
    ego1 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db, 
                     ont="BP",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego1, 
            showCategory=25,
            title="GO pathways"
    )
}

In [55]:
if(bool_plot){
    ego2 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="CC",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05, 
                     readable=TRUE
    )
    barplot(ego2,
            showCategory=25,
            title="GO pathways"
    )
}

In [56]:
if(bool_plot){
    ego3 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="MF", 
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego3,
            showCategory=25,
            title="GO pathways"
    )
}

In [310]:
bp1 = ego1@result

In [311]:
setwd('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/Integration/results/IF')
write.csv(bp1, "IF_GO_BP_integrated.csv")

In [312]:
bp1 = bp1[bp1$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp1[,4]))
bp1 = bp1[(lk > 15) & (lk < 500),]
bp1 = bp1[bp1$Count > 4,]
bp1 = bp1[!bp1$ID %in% noliv[,1],]
bp1 = bp1[!bp1$ID %in% noliv1[,1],]
bp1 = bp1[!bp1$ID %in% obsolete[,1],]
bp1 = bp1[order(as.numeric(sub("\\/.*", "", bp1[,4])), decreasing=TRUE),]

In [313]:
write.csv(bp1, "IF_GO_BP_integrated_a.csv")

In [314]:
rk = character()
for(i in 1:(nrow(bp1)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp1)){
        pk = c(pk,sum(unlist(strsplit(bp1[i,8], "/")) %in% unlist(strsplit(bp1[j,8], "/")))/length(unlist(strsplit(bp1[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] ="ostavi"
}
rk1 = c(rk, "brisi")
bp1 = bp1[rk1 == "ostavi",]

In [315]:
write.csv(bp1, "IF_GO_BP_integrated_b.csv")

In [316]:
bp2 = ego2@result

In [317]:
write.csv(bp2, "IF_GO_CC_integrated.csv")

In [318]:
bp2 = bp2[bp2$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp2[,4]))
bp2 = bp2[(lk > 15) & (lk < 500),]
bp2 = bp2[bp2$Count > 4,]
bp2 = bp2[!bp2$ID %in% noliv[,1],]
bp2 = bp2[!bp2$ID %in% noliv1[,1],]
bp2 = bp2[!bp2$ID %in% obsolete[,1],]
bp2 = bp2[order(as.numeric(sub("\\/.*", "", bp2[,4])), decreasing=TRUE),]

In [319]:
write.csv(bp2, "IF_GO_CC_integrated_a.csv")

In [320]:
rk = character()
for(i in 1:(nrow(bp2)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp2)){
        pk = c(pk,sum(unlist(strsplit(bp2[i,8], "/")) %in% unlist(strsplit(bp2[j,8], "/")))/length(unlist(strsplit(bp2[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp2 = bp2[rk1 == "ostavi",]

In [321]:
write.csv(bp2, "IF_GO_CC_integrated_b.csv")

In [322]:
bp3 = ego3@result

In [323]:
write.csv(bp3, "IF_GO_MF_integrated.csv")

In [324]:
bp3 = bp3[bp3$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp3[,4]))
bp3 = bp3[(lk > 15) & (lk < 500),]
bp3 = bp3[bp3$Count > 4,]
bp3 = bp3[!bp3$ID %in% noliv[,1],]
bp3 = bp3[!bp3$ID %in% noliv1[,1],]
bp3 = bp3[!bp3$ID %in% obsolete[,1],]
bp3 = bp3[order(as.numeric(sub("\\/.*", "", bp3[,4])), decreasing=TRUE),]

In [325]:
write.csv(bp3, "IF_GO_MF_integrated_a.csv")

In [326]:
rk = character()
for(i in 1:(nrow(bp3)-1)){
    pk  =numeric()
    for(j in (i+1):nrow(bp3)){
        pk = c(pk,sum(unlist(strsplit(bp3[i,8], "/")) %in% unlist(strsplit(bp3[j,8], "/")))/length(unlist(strsplit(bp3[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp3 = bp3[rk1 == "ostavi",]

In [327]:
write.csv(bp3, "IF_GO_MF_integrated_b.csv")

In [328]:
bp4 = kk1@result

In [329]:
write.csv(bp4, "IF_KEGG_integrated.csv")

In [330]:
bp4 = bp4[bp4$p.adjust < 0.05,]
lk = as.numeric(sub("\\/.*", "", bp4[,4]))
bp4 = bp4[(lk > 15) & (lk < 500),]
bp4 = bp4[bp4$Count > 4,]
bp4 = bp4[!bp4$ID %in% noliv[,1],]
bp4 = bp4[!bp4$ID %in% noliv1[,1],]
bp4 = bp4[!bp4$ID %in% obsolete[,1],]
bp4 = bp4[order(as.numeric(sub("\\/.*", "", bp4[,4])), decreasing = T),]

In [331]:
write.csv(bp4, "IF_KEGG_integrated_a.csv")

In [332]:
rk = character()
for(i in 1:(nrow(bp4)-1)){
    pk = numeric()
    for(j in (i+1):nrow(bp4)){
        pk = c(pk,sum(unlist(strsplit(bp4[i,8], "/")) %in% unlist(strsplit(bp4[j,8], "/")))/length(unlist(strsplit(bp4[j,8], "/"))))
    }
    if (sum(pk > 0.4) > 0) {
        rk[i] = "brisi"
    } else rk[i] = "ostavi"
}
rk1 = c(rk, "brisi")
bp4 = bp4[rk1 == "ostavi",]

In [333]:
write.csv(bp4, "IF_KEGG_integrated_b.csv")