# Proteomics data analysis

### Loading packages
Required packages for downstream analysis

In [None]:
inst <- suppressMessages(lapply(c("DEP",
                                  "GenomicRanges",
                                  "SummarizedExperiment",
                                  "gplots",
                                  "dplyr",
                                  "EnhancedVolcano",
                                  "gdata",
                                  "org.Mm.eg.db",
                                  "clusterProfiler"
                                  "VennDiagram"
                                  "RColorBrewer"
                                  "readxl"), 
                                library,
                                character.only = TRUE)
)

### Set the global variables

Set whether anndata objects are recomputed or loaded from cache.

In [1]:
bool_recomp = FALSE

Set whether to produce plots, set to False for test runs.

In [2]:
bool_plot = FALSE

## IF Study

In [2]:
dat <- read_excel('~/Documents/consultation/Katarina/Integration_Data/Proteomics_data/IF_study.xlsx',
                  sheet=3
)
colors = colorRampPalette(rev(brewer.pal(9, "Spectral")))(255)

“Expecting numeric in X1310 / R1310C24: got 'Filtered'”
“Expecting numeric in W1880 / R1880C23: got 'Filtered'”
“Expecting numeric in Y1935 / R1935C25: got 'Filtered'”
“Coercing text to numeric in N1939 / R1939C14: 'NaN'”
“Expecting numeric in Y1958 / R1958C25: got 'Filtered'”
“Coercing text to numeric in R2006 / R2006C18: 'NaN'”
“Expecting numeric in F2083 / R2083C6: got 'Filtered'”
“Expecting numeric in R2105 / R2105C18: got 'Filtered'”
“Expecting numeric in R2151 / R2151C18: got 'Filtered'”
“Expecting numeric in R2176 / R2176C18: got 'Filtered'”
“Expecting numeric in Y2176 / R2176C25: got 'Filtered'”
“Expecting numeric in F2181 / R2181C6: got 'Filtered'”
“Expecting numeric in R2199 / R2199C18: got 'Filtered'”
“Expecting numeric in R2203 / R2203C18: got 'Filtered'”
“Expecting numeric in R2204 / R2204C18: got 'Filtered'”
“Expecting numeric in N2206 / R2206C14: got 'Filtered'”
“Expecting numeric in P2206 / R2206C16: got 'Filtered'”
“Expecting numeric in R2206 / R2206C18: got 'Filtered'

In [3]:
dat = dat[1:2215,]

In [4]:
data_unique <- make_unique(dat, 
                           "PG.Genes", 
                           "PG.ProteinDescriptions",
                           delim="/t")
a_columns = c(grep(c("IFHFD_"), colnames(data_unique)),grep(c("ALHFD_"), colnames(data_unique)))
# removed sample
a_columns = a_columns[-4]

In [5]:
label = colnames(data_unique)[a_columns]
condition = c("IFHFD","IFHFD","IFHFD","IFHFD","IFHFD","IFHFD","IFHFD","ALHFD","ALHFD","ALHFD","ALHFD","ALHFD","ALHFD","ALHFD")
    
replicate = c(1:7,1:7)
experimental_design = data.frame(label, condition, replicate)  
experimental_design[,1]=as.character(experimental_design[,1])
experimental_design[,2] = as.character(experimental_design[,2])
experimental_design[,3] = as.character(experimental_design[,3])

In [6]:
data_se <- make_se(data_unique,
                   a_columns,
                   experimental_design
)

In [7]:
assay(data_se)[is.nan(assay(data_se))] = NA

In [3]:
if(bool_plot){
    # Plot a barplot of the protein identification overlap between samples
    plot_frequency(data_se)
}

In [4]:
if(bool_plot){
    # Filter for proteins that are identified in 2 out of 3 replicates of at least one condition
    data_filt <- filter_missval(data_se,
                                thr=1
    )
}

In [5]:
if(bool_plot){
    # Plot a barplot of the number of identified proteins per samples
    plot_numbers(data_filt)
}

In [6]:
if(bool_plot){
    # Plot a barplot of the protein identification overlap between samples
    plot_coverage(data_filt)
}

In [7]:
if(bool_plot){
    # Normalize the data
    data_norm <- normalize_vsn(data_filt)
}

In [8]:
if(bool_plot){
    # Visualize normalization by boxplots for all samples before and after normalization
    plot_normalization(data_filt, 
                       data_norm
    )
}

In [9]:
if(bool_plot){
    # Plot a heatmap of proteins with missing values
    plot_missval(data_filt)
}

In [10]:
if(bool_plot){
    # Plot intensity distributions and cumulative fraction of proteins with and without missing values
    plot_detect(data_filt)
}

In [16]:
# Impute missing data using random draws from a Gaussian distribution centered around a minimal value (for MNAR)
data_imp <- impute(data_norm,
                   fun="MinProb",
                   q=0.01
)

Loading required package: imputeLCMD

Loading required package: tmvtnorm

Loading required package: mvtnorm

Loading required package: Matrix


Attaching package: ‘Matrix’


The following object is masked from ‘package:S4Vectors’:

    expand


Loading required package: gmm

Loading required package: sandwich

Loading required package: norm

Loading required package: pcaMethods


Attaching package: ‘pcaMethods’


The following object is masked from ‘package:stats’:

    loadings


Loading required package: impute



[1] 0.1722085


In [11]:
if(bool_plot){
    # Plot intensity distributions before and after imputation
    plot_imputation(data_norm,
                    data_imp
    )
}

In [18]:
data_diff_manual <- test_diff(data_imp,
                              type="manual",
                              test=c("IFHFD_vs_ALHFD")
)

Tested contrasts: IFHFD_vs_ALHFD



In [19]:
# Denote significant proteins based on user defined cutoffs
depIF <- add_rejections(data_diff_manual, 
                        alpha=0.05, 
                        lfc=log2(2)
)
res_ALHFD_vs_IFHFD <- get_results(depIF)

In [21]:
#write.table(res_ALHFD_vs_IFHFD, file = "IFstudy_IFHFDvsALHFD.csv", sep = ",", col.names = NA)

In [12]:
if(bool_plot){
    hist(res_ALHFD_vs_IFHFD$IFHFD_vs_ALHFD_ratio,
         n=30
    )
}

In [23]:
options(repr.plot.width=20, repr.plot.height=10)

### All regulated pathways

In [24]:
#sig.gene <- bitr(res_ALHFD_vs_IFHFD[abs(res_ALHFD_vs_IFHFD$IFHFD_vs_ALHFD_ratio)>0.2,1],
#                 fromType="SYMBOL",toType="ENTREZID",OrgDb=org.Mm.eg.db)
#kk1 <- enrichKEGG(gene = sig.gene[,2], organism = 'mmu', pvalueCutoff = 0.05)
#barplot(kk1, showCategory=25, title="KEGG pathways")

In [25]:
#ego1 <- enrichGO(gene=sig.gene[,2], OrgDb=org.Mm.eg.db, ont="BP", pAdjustMethod="BH", pvalueCutoff=0.05, readable=TRUE)
#barplot(ego1, showCategory=25, title="GO pathways")

In [26]:
#ego2 <- enrichGO(gene=sig.gene[,2], OrgDb=org.Mm.eg.db, ont="CC", pAdjustMethod="BH", pvalueCutoff=0.05, readable=TRUE)
#barplot(ego2, showCategory=25, title="GO pathways")

In [27]:
#ego3 <- enrichGO(gene=sig.gene[,2], OrgDb=org.Mm.eg.db, ont="MF", pAdjustMethod="BH", pvalueCutoff=0.05, readable=TRUE)
#barplot(ego3, showCategory=25, title="GO pathways")

### Up regulated pathways

In [13]:
if(bool_plot){
    sig.gene <- bitr(res_ALHFD_vs_IFHFD[res_ALHFD_vs_IFHFD$IFHFD_vs_ALHFD_ratio > 0.2,1],
                     fromType="SYMBOL",
                     toType="ENTREZID",
                     OrgDb=org.Mm.eg.db
    )
    kk1 <- enrichKEGG(gene=sig.gene[,2], 
                      organism='mmu', 
                      pvalueCutoff=0.05
    )
    barplot(kk1,
            showCategory=25,
            title="KEGG pathways"
    )
}

In [14]:
if(bool_plot){
    ego1 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="BP", 
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego1,
            showCategory=25,
            title="GO pathways"
    )
}

In [15]:
if(bool_plot){
    ego2 <- enrichGO(gene=sig.gene[,2], 
                     OrgDb=org.Mm.eg.db, 
                     ont="CC",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego2, 
            showCategory=25,
            title="GO pathways"
    )
}

In [16]:
if(bool_plot){
    ego3 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="MF",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego3,
            showCategory=25,
            title="GO pathways"
    )
}

In [32]:
bp1 <- simplify(ego1,
                cutoff=0.7, 
                by="p.adjust",
                select_fun=min
)
bp2 <- simplify(ego2,
                cutoff=0.7,
                by="p.adjust",
                select_fun=min
)
bp3 <- simplify(ego3, 
                cutoff=0.7,
                by="p.adjust",
                select_fun=min
)
lk = as.numeric(sub("\\/.*", "", bp1@result[,4]))
bp1 = bp1@result[(lk > 15) & (lk < 500),]
l1 = as.numeric(sub("\\/.*", "", bp2@result[,4]))
bp2 = bp2@result[(l1 > 15) & (l1 < 500),]
l2 = as.numeric(sub("\\/.*", "", bp3@result[,4]))
bp3 = bp3@result[(l2 > 15) & (l2 < 500),]

In [33]:
setwd('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/Proteomics_data/results/IF')
write.csv(kk1@result, "IF_KEGG_full_up_prot.csv")
write.csv(ego1@result, "IF_GO_BP_full_up_prot.csv")
write.csv(ego2@result, "IF_GO_CC_full_up_prot.csv")
write.csv(ego3@result, "IF_GO_MF_full_up_prot.csv")

write.csv(bp1, "IF_GO_BP_trimed_up_prot.csv")
write.csv(bp2, "IF_GO_CC_trimed_up_prot.csv")
write.csv(bp3, "IF_GO_MF_trimed_up_prot.csv")

### Down regulated pathways

In [17]:
if(bool_plot){
    sig.gene <- bitr(res_ALHFD_vs_IFHFD[res_ALHFD_vs_IFHFD$IFHFD_vs_ALHFD_ratio < -0.2,1],
                     fromType="SYMBOL",
                     toType="ENTREZID",
                     OrgDb=org.Mm.eg.db
    )
    kk1 <- enrichKEGG(gene=sig.gene[,2], 
                      organism='mmu',
                      pvalueCutoff=0.05
    )
    barplot(kk1,
            showCategory=25,
            title="KEGG pathways"
    )
}

In [18]:
if(bool_plot){
    ego1 <- enrichGO(gene=sig.gene[,2], 
                     OrgDb=org.Mm.eg.db,
                     ont="BP",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego1, 
            showCategory=25,
            title="GO pathways"
    )
}

In [19]:
if(bool_plot){
    ego2 <- enrichGO(gene=sig.gene[,2], 
                     OrgDb=org.Mm.eg.db,
                     ont="CC",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05, 
                     readable=TRUE
    )
    barplot(ego2, 
            showCategory=25,
            title="GO pathways"
    )
}

In [20]:
if(bool_plot){
    ego3 <- enrichGO(gene=sig.gene[,2], 
                     OrgDb=org.Mm.eg.db,
                     ont="MF",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego3, 
            showCategory=25, 
            title="GO pathways"
    )
}

In [38]:
bp1 <- simplify(ego1,
                cutoff=0.7, 
                by="p.adjust", 
                select_fun=min
)
bp2 <- simplify(ego2, 
                cutoff=0.7,
                by="p.adjust",
                select_fun=min
)
bp3 <- simplify(ego3,
                cutoff=0.7, 
                by="p.adjust",
                select_fun=min
)
lk = as.numeric(sub("\\/.*", "", bp1@result[,4]))
bp1 = bp1@result[(lk > 15) & (lk < 500),]
l1 = as.numeric(sub("\\/.*", "", bp2@result[,4]))
bp2 = bp2@result[(l1 > 15) & (l1 < 500),]
l2 = as.numeric(sub("\\/.*", "", bp3@result[,4]))
bp3 = bp3@result[(l2 > 15) & (l2 < 500),]

In [39]:
setwd('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/Proteomics_data/results/IF')
write.csv(kk1@result, "IF_KEGG_full_down_prot.csv")
write.csv(ego1@result, "IF_GO_BP_full_down_prot.csv")
write.csv(ego2@result, "IF_GO_CC_full_down_prot.csv")
write.csv(ego3@result, "IF_GO_MF_full_down_prot.csv")

write.csv(bp1, "IF_GO_BP_trimed_down_prot.csv")
write.csv(bp2, "IF_GO_CC_trimed_down_prot.csv")
write.csv(bp3, "IF_GO_MF_trimed_down_prot.csv")

In [21]:
if(bool_plot){
    plot_pca(depIF, 
             x=1,
             y=2,
             n=nrow(depIF), 
             label=T,
             point_size=4,
             indicate=c("condition")
    )
}

In [22]:
if(bool_plot){
    p <- assay(depIF)
    heatmap.2(p,
              col=colors,
              scale="row",
              trace="none",
              main="IFHFD_vs_ALHFD",
              Rowv=FALSE,
              Colv=FALSE
    ) 
}

## TRF Study

In [42]:
dat <- read_excel('~/Documents/consultation/Katarina/Integration_Data/Proteomics_data/TRF_study.xlsx',
                  sheet=3)

colors = colorRampPalette(rev(brewer.pal(9, "Spectral")))(255)

In [43]:
data_unique <- make_unique(dat,
                           "PG.Genes",
                           "PG.ProteinDescriptions",
                           delim="/t"
)
a_columns = c(grep(c("TRFHFD_"), colnames(data_unique)),
              grep(c("ALHFD_"), colnames(data_unique))
)
# removed sample
a_columns = a_columns[-10]

In [44]:
label = colnames(data_unique)[a_columns]
condition = c("TRFHFD","TRFHFD","TRFHFD","TRFHFD","TRFHFD","TRFHFD",
              "ALHFD","ALHFD","ALHFD","ALHFD","ALHFD")
    
replicate = c(1:6,1:5)
experimental_design = data.frame(label, condition, replicate)  
experimental_design[,1] = as.character(experimental_design[,1])
experimental_design[,2] = as.character(experimental_design[,2])
experimental_design[,3] = as.character(experimental_design[,3])

In [45]:
data_se <- make_se(data_unique, 
                   a_columns, 
                   experimental_design
)

In [46]:
assay(data_se)[is.nan(assay(data_se))] = NA

In [23]:
if(bool_plot){
    # Plot a barplot of the protein identification overlap between samples
    plot_frequency(data_se)
}

In [48]:
# Filter for proteins that are identified in 2 out of 3 replicates of at least one condition
data_filt <- filter_missval(data_se,
                            thr=1
)

In [24]:
if(bool_plot){
    # Plot a barplot of the number of identified proteins per samples
    plot_numbers(data_filt)
}

In [25]:
if(bool_plot){
    # Plot a barplot of the protein identification overlap between samples
    plot_coverage(data_filt)
}

In [51]:
# Normalize the data
data_norm <- normalize_vsn(data_filt)

In [26]:
if(bool_plot){
    # Visualize normalization by boxplots for all samples before and after normalization
    plot_normalization(data_filt, 
                       data_norm
    )
}

In [53]:
# Impute missing data using random draws from a Gaussian distribution centered around a minimal value (for MNAR)
data_imp <- impute(data_norm, 
                   fun="MinProb",
                   q=0.01
)

“No missing values in 'data_norm'. Returning the unchanged object.”


In [27]:
if(bool_plot){
    # Plot intensity distributions before and after imputation
    plot_imputation(data_norm,
                    data_imp
    )
}

In [55]:
data_diff_manual <- test_diff(data_imp,
                              type="manual",
                              test=c("TRFHFD_vs_ALHFD"))

Tested contrasts: TRFHFD_vs_ALHFD



In [56]:
# Denote significant proteins based on user defined cutoffs
depTRF <- add_rejections(data_diff_manual, 
                         alpha=0.05,
                         lfc=log2(2)
)
res_TRFHFD_vs_ALHFD <- get_results(depTRF)

In [28]:
if(bool_plot){
    plot_pca(depTRF,
             x=1, 
             y=2,
             n=500,
             point_size=4,
             label=TRUE,
             indicate=c("condition")
    )
}

In [29]:
if(bool_plot){
    p <- assay(depTRF)
    heatmap.2(p, 
              col=colors,
              scale = "row",
              trace="none",
              main="TRFHFD_vs_ALHFD",
              Rowv=FALSE,
              Colv=FALSE
    ) 
}

In [60]:
#write.table(res_TRFHFD_vs_ALHFD, file = "TRFstudy_TRFHFDvsALHDF.csv", sep = ",", col.names = NA)

### All regulated pathways

In [61]:
#sig.gene <- bitr(res_TRFHFD_vs_ALHFD[abs(res_TRFHFD_vs_ALHFD$TRFHFD_vs_ALHFD_ratio)>0.2,1],
#                 fromType="SYMBOL",toType="ENTREZID",OrgDb=org.Mm.eg.db)

In [62]:
#kk1 <- enrichKEGG(gene = sig.gene[,2], organism = 'mmu', pvalueCutoff = 0.05)
#barplot(kk1, showCategory=25, title="KEGG pathways")

In [63]:
#ego1 <- enrichGO(gene=sig.gene[,2], OrgDb=org.Mm.eg.db, ont="BP", pAdjustMethod="BH", pvalueCutoff=0.05, readable=TRUE)
#barplot(ego1, showCategory=25, title="GO pathways")

In [64]:
#ego2 <- enrichGO(gene=sig.gene[,2], OrgDb=org.Mm.eg.db, ont="CC", pAdjustMethod="BH", pvalueCutoff=0.05, readable=TRUE)
#barplot(ego2, showCategory=25, title="GO pathways")

In [65]:
#ego3 <- enrichGO(gene=sig.gene[,2], OrgDb=org.Mm.eg.db, ont="MF", pAdjustMethod="BH", pvalueCutoff=0.05, readable=TRUE)
#barplot(ego3, showCategory=25, title="GO pathways")

### Up regulated pathways

In [66]:
sig.gene <- bitr(res_TRFHFD_vs_ALHFD[res_TRFHFD_vs_ALHFD$TRFHFD_vs_ALHFD_ratio > 0.2, 1],
                 fromType="SYMBOL",
                 toType="ENTREZID",
                 OrgDb=org.Mm.eg.db
)
}

'select()' returned 1:1 mapping between keys and columns

“8.56% of input gene IDs are fail to map...”


In [30]:
if(bool_plot){
    kk1 <- enrichKEGG(gene = sig.gene[,2],
                      organism = 'mmu',
                      pvalueCutoff = 0.05
    )
    barplot(kk1,
            showCategory=25,
            title="KEGG pathways"
    )
}

In [31]:
if(bool_plot){
    ego1 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="BP",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05, 
                     readable=TRUE
    )
    barplot(ego1,
            showCategory=25,
            title="GO pathways"
    )
}

In [34]:
if(bool_plot){
    ego2 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="CC",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego2, 
            showCategory=25,
            title="GO pathways"
    )
}

In [35]:
if(bool_plot){
    ego3 <- enrichGO(gene=sig.gene[,2], 
                     OrgDb=org.Mm.eg.db,
                     ont="MF", 
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego3,
            showCategory=25, 
            title="GO pathways"
    )
}

In [71]:
bp1 <- simplify(ego1,
                cutoff=0.7,
                by="p.adjust",
                select_fun=min)
bp2 <- simplify(ego2, 
                cutoff=0.7,
                by="p.adjust",
                select_fun=min
)
bp3 <- simplify(ego3,
                cutoff=0.7,
                by="p.adjust",
                select_fun=min
)

lk=as.numeric(sub("\\/.*", "", bp1@result[,4]))
bp1=bp1@result[(lk>15)&(lk<500),]
l1=as.numeric(sub("\\/.*", "", bp2@result[,4]))
bp2=bp2@result[(l1>15)&(l1<500),]
l2=as.numeric(sub("\\/.*", "", bp3@result[,4]))
bp3=bp3@result[(l2>15)&(l2<500),]

In [72]:
setwd('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/Proteomics_data/results/TRF')
write.csv(kk1@result, "TRF_KEGG_full_up_prot.csv")
write.csv(ego1@result, "TRF_GO_BP_full_up_prot.csv")
write.csv(ego2@result, "TRF_GO_CC_full_up_prot.csv")
write.csv(ego3@result, "TRF_GO_MF_full_up_prot.csv")

write.csv(bp1, "TRF_GO_BP_trimed_up_prot.csv")
write.csv(bp2, "TRF_GO_CC_trimed_up_prot.csv")
write.csv(bp3, "TRF_GO_MF_trimed_up_prot.csv")

### Down regulated pathways

In [73]:
sig.gene=bitr(res_TRFHFD_vs_ALHFD[res_TRFHFD_vs_ALHFD$TRFHFD_vs_ALHFD_ratio < -0.2,1],
              fromType="SYMBOL",
              toType="ENTREZID",
              OrgDb=org.Mm.eg.db
)

'select()' returned 1:1 mapping between keys and columns

“5.45% of input gene IDs are fail to map...”


In [74]:
dim(sig.gene)

In [36]:
if(bool_plot){
    kk1 <- enrichKEGG(gene=sig.gene[,2], 
                      organism='mmu',
                      pvalueCutoff=0.05
    )
    barplot(kk1, 
            showCategory=25, 
            title="KEGG pathways"
    )
}

In [37]:
if(bool_plot){
    ego1 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="BP",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego1, 
            showCategory=25, 
            title="GO pathways"
    )
}

In [38]:
if(bool_plot){
    ego2 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="CC",
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05, 
                     readable=TRUE
    )
    barplot(ego2, 
            showCategory=25, 
            title="GO pathways"
    )
}

In [39]:
if(bool_plot){
    ego3 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="MF",
                     pAdjustMethod="BH", 
                     pvalueCutoff=0.05, 
                     readable=TRUE
    )
    barplot(ego3, 
            showCategory=25,
            title="GO pathways"
    )
}

In [79]:
bp1 <- simplify(ego1, 
                cutoff=0.7, 
                by="p.adjust",
                select_fun=min
)
bp2 <- simplify(ego2, 
                cutoff=0.7,
                by="p.adjust",
                select_fun=min
)
bp3 <- simplify(ego3,
                cutoff=0.7,
                by="p.adjust", 
                select_fun=min
)

lk = as.numeric(sub("\\/.*", "", bp1@result[,4]))
bp1 = bp1@result[(lk > 15) & (lk < 500),]
l1 = as.numeric(sub("\\/.*", "", bp2@result[,4]))
bp2 = bp2@result[(l1 > 15) & (l1 < 500),]
l2 = as.numeric(sub("\\/.*", "", bp3@result[,4]))
bp3 = bp3@result[(l2 > 15) & (l2 < 500),]

In [80]:
setwd('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/Proteomics_data/results/TRF')
write.csv(kk1@result, "TRF_KEGG_full_down_prot.csv")
write.csv(ego1@result, "TRF_GO_BP_full_down_prot.csv")
write.csv(ego2@result, "TRF_GO_CC_full_down_prot.csv")
write.csv(ego3@result, "TRF_GO_MF_full_down_prot.csv")

write.csv(bp1, "TRF_GO_BP_trimed_down_prot.csv")
write.csv(bp2, "TRF_GO_CC_trimed_down_prot.csv")
write.csv(bp3, "TRF_GO_MF_trimed_down_prot.csv")

## VSG Study

In [None]:
dat <- read_excel('~/Documents/consultation/Katarina/Integration_Data/Proteomics_data/191119 VSG study KLK13007_DirectDIA_nodecimal.xlsx',
                  sheet=2
                )

In [82]:
data_unique <- make_unique(dat, 
                           "PG.Genes",
                           "PG.ProteinDescriptions",
                           delim="/t"
)
a_columns = c(grep(c("VSG HFD_"), colnames(data_unique)),
              grep(c("PF HFD_"), colnames(data_unique)))

In [83]:
# removed sample
a_columns = a_columns[-c(2,9)]

In [84]:
label = colnames(data_unique)[a_columns]
condition = c("VSGHFD","VSGHFD","VSGHFD","VSGHFD","VSGHFD","VSGHFD","VSGHFD",
              "PFHFD","PFHFD","PFHFD","PFHFD","PFHFD","PFHFD","PFHFD")
    
replicate = c(1:7, 1:7)
experimental_design = data.frame(label, condition, replicate)  
experimental_design[,1] = as.character(experimental_design[,1])
experimental_design[,2] = as.character(experimental_design[,2])
experimental_design[,3] = as.character(experimental_design[,3])

In [85]:
data_unique[,7] = as.integer(data_unique[,7])
data_unique[,9] = as.integer(data_unique[,9])
data_unique[,10] = as.integer(data_unique[,10])
data_unique[,11] = as.integer(data_unique[,11])
data_unique[,12] = as.integer(data_unique[,12])
data_unique[,13] = as.integer(data_unique[,13])
data_unique[,14] = as.integer(data_unique[,14])

data_unique[,42] = as.integer(data_unique[,42])
data_unique[,43] = as.integer(data_unique[,43])
data_unique[,44] = as.integer(data_unique[,44])
data_unique[,45] = as.integer(data_unique[,45])
data_unique[,46] = as.integer(data_unique[,46])
data_unique[,47] = as.integer(data_unique[,47])
data_unique[,48] = as.integer(data_unique[,48])

“NAs introduced by coercion”
“NAs introduced by coercion”
“NAs introduced by coercion”
“NAs introduced by coercion”
“NAs introduced by coercion”
“NAs introduced by coercion”
“NAs introduced by coercion”
“NAs introduced by coercion”
“NAs introduced by coercion”
“NAs introduced by coercion”
“NAs introduced by coercion”
“NAs introduced by coercion”
“NAs introduced by coercion”
“NAs introduced by coercion”


In [86]:
data_se <- make_se(data_unique, 
                   a_columns,
                   experimental_design
)

In [87]:
assay(data_se)[is.nan(assay(data_se))] = NA

In [40]:
if(bool_plot){
    # Plot a barplot of the protein identification overlap between samples
    plot_frequency(data_se)
}

In [89]:
# Filter for proteins that are identified in 2 out of 3 replicates of at least one condition
data_filt <- filter_missval(data_se,
                            thr=1
)

In [41]:
if(bool_plot){
    # Plot a barplot of the number of identified proteins per samples
    plot_numbers(data_filt)
}

In [42]:
if(bool_plot){
    # Plot a barplot of the protein identification overlap between samples
    plot_coverage(data_filt)
}

In [92]:
# Normalize the data
data_norm <- normalize_vsn(data_filt)

In [43]:
if(bool_plot){
    # Visualize normalization by boxplots for all samples before and after normalization
    plot_normalization(data_filt, 
                       data_norm
    )
}

In [94]:
# Impute missing data using random draws from a Gaussian distribution centered around a minimal value (for MNAR)
data_imp <- impute(data_norm, 
                   fun="MinProb", 
                   q=0.01
)

[1] 0.1232543


In [44]:
if(bool_plot){
    # Plot intensity distributions before and after imputation
    plot_imputation(data_norm, 
                    data_imp
    )
}

In [96]:
data_diff_manual <- test_diff(data_imp,
                              type="manual", 
                              test=c("VSGHFD_vs_PFHFD")
)

Tested contrasts: VSGHFD_vs_PFHFD



In [97]:
# Denote significant proteins based on user defined cutoffs
depVGS <- add_rejections(data_diff_manual,
                         alpha=0.05, 
                         lfc=log2(2)
)
res_VSGHFD_vs_PFHFD <- get_results(depVGS)

In [45]:
if(bool_plot){
    colors = colorRampPalette(rev(brewer.pal(9, "Spectral")))(255)

    p <- assay(depVGS)
    heatmap.2(p, 
              col=colors,
              scale="row",
              trace="none",
              main="VSGHFD_vs_PFHFD",
              Rowv=FALSE,
              Colv=FALSE
    ) 
}

In [46]:
if(bool_plot){
    plot_pca(depVGS, 
             x=1,
             y=2,
             n=1000,
             label=T,
             point_size=4,
             indicate=c("condition")
    )
}

In [101]:
#write.table(res_VSGHFD_vs_PFHFD, file = "VSGstudy_VSGHFDvsPFHFD.csv", sep = ",", col.names = NA)

### All regulated pathways

In [102]:
#sig.gene <- bitr(res_VSGHFD_vs_PFHFD[abs(res_VSGHFD_vs_PFHFD$VSGHFD_vs_PFHFD_ratio)>0.2,1],
#                 fromType="SYMBOL",toType="ENTREZID",OrgDb=org.Mm.eg.db)

In [103]:
#kk1 <- enrichKEGG(gene = sig.gene[,2], organism = 'mmu', pvalueCutoff = 0.05)
#barplot(kk1, showCategory=25, title="KEGG pathways")

In [104]:
#ego1 <- enrichGO(gene=sig.gene[,2], OrgDb=org.Mm.eg.db, ont="BP", pAdjustMethod="BH", pvalueCutoff=0.05, readable=TRUE)
#barplot(ego1, showCategory=25, title="GO pathways")

In [105]:
#ego2 <- enrichGO(gene=sig.gene[,2], OrgDb=org.Mm.eg.db, ont="CC", pAdjustMethod="BH", pvalueCutoff=0.05, readable=TRUE)
#barplot(ego2, showCategory=25, title="GO pathways")

In [106]:
#ego3 <- enrichGO(gene=sig.gene[,2], OrgDb=org.Mm.eg.db, ont="MF", pAdjustMethod="BH", pvalueCutoff=0.05, readable=TRUE)
#barplot(ego3, showCategory=25, title="GO pathways")

### Up regulated pathways

In [47]:
if(bool_plot){
    sig.gene <- bitr(res_VSGHFD_vs_PFHFD[res_VSGHFD_vs_PFHFD$VSGHFD_vs_PFHFD_ratio>0.2,1],
                     fromType="SYMBOL",
                     toType="ENTREZID",
                     OrgDb=org.Mm.eg.db
    )
    kk1 <- enrichKEGG(gene=sig.gene[,2],
                      organism='mmu',
                      pvalueCutoff=0.05)
    barplot(kk1, 
            showCategory=25,
            title="KEGG pathways"
    )
}

In [48]:
if(bool_plot){
    ego1 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="BP", 
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego1, 
            showCategory=25, 
            title="GO pathways"
    )
}

In [49]:
if(bool_plot){
    ego2 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="CC", 
                     pAdjustMethod="BH", 
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego2,
            showCategory=25,
            title="GO pathways"
    )
}

In [50]:
if(bool_plot){
    ego3 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="MF",
                     pAdjustMethod="BH", 
                     pvalueCutoff=0.05, 
                     readable=TRUE
    )
    barplot(ego3, 
            showCategory=25,
            title="GO pathways"
    )
}

In [111]:
bp1 <- simplify(ego1,
                cutoff=0.7,
                by="p.adjust",
                select_fun=min
)
bp2 <- simplify(ego2,
                cutoff=0.7,
                by="p.adjust",
                select_fun=min
)
bp3 <- simplify(ego3,
                cutoff=0.7,
                by="p.adjust",
                select_fun=min
)

lk = as.numeric(sub("\\/.*", "", bp1@result[,4]))
bp1 = bp1@result[(lk > 15)&(lk < 500),]
l1 = as.numeric(sub("\\/.*", "", bp2@result[,4]))
bp2 = bp2@result[(l1 > 15)&(l1 < 500),]
l2 = as.numeric(sub("\\/.*", "", bp3@result[,4]))
bp3 = bp3@result[(l2 > 15)&(l2 < 500),]

In [112]:
setwd('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/Proteomics_data/results/VSG')
write.csv(kk1@result, "VSG_KEGG_full_up_prot.csv")
write.csv(ego2@result, "VSG_GO_CC_full_up_prot.csv")
write.csv(ego3@result, "VSG_GO_MF_full_up_prot.csv")

write.csv(bp1, "VSG_GO_BP_trimed_up_prot.csv")
write.csv(bp2, "VSG_GO_CC_trimed_up_prot.csv")
write.csv(bp3, "VSG_GO_MF_trimed_up_prot.csv")

### Down regulated pathways

In [114]:
sig.gene <- bitr(res_VSGHFD_vs_PFHFD[res_VSGHFD_vs_PFHFD$VSGHFD_vs_PFHFD_ratio < -0.2,1],
                 fromType="SYMBOL",
                 toType="ENTREZID",
                 OrgDb=org.Mm.eg.db
)
dim(sig.gene)

'select()' returned 1:1 mapping between keys and columns

“2.59% of input gene IDs are fail to map...”


In [51]:
if(bool_plot){
    kk1 <- enrichKEGG(gene=sig.gene[,2], 
                      organism='mmu',
                      pvalueCutoff=0.05
    )
    barplot(kk1, 
            showCategory=25,
            title="KEGG pathways"
    )
}

In [52]:
if(bool_plot){
    ego1 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="BP",
                     pAdjustMethod="BH", 
                     pvalueCutoff=0.05, 
                     readable=TRUE
    )
    barplot(ego1, 
            showCategory=25,
            title="GO pathways"
    )
}

In [53]:
if(bool_plot){
    ego2 <- enrichGO(gene=sig.gene[,2], 
                     OrgDb=org.Mm.eg.db, 
                     ont="CC", 
                     pAdjustMethod="BH", 
                     pvalueCutoff=0.05, 
                     readable=TRUE
    )
    barplot(ego2,
            showCategory=25,
            title="GO pathways"
    )
}

In [54]:
if(bool_plot){
    ego3 <- enrichGO(gene=sig.gene[,2],
                     OrgDb=org.Mm.eg.db,
                     ont="MF", 
                     pAdjustMethod="BH",
                     pvalueCutoff=0.05,
                     readable=TRUE
    )
    barplot(ego3, 
            showCategory=25,
            title="GO pathways"
    )
}

In [119]:
bp1 <- simplify(ego1,
                cutoff=0.7,
                by="p.adjust",
                select_fun=min
)
bp2 <- simplify(ego2,
                cutoff=0.7,
                by="p.adjust",
                select_fun=min
)
bp3 <- simplify(ego3,
                cutoff=0.7,
                by="p.adjust",
                select_fun=min
)
lk = as.numeric(sub("\\/.*", "", bp1@result[,4]))
bp1 = bp1@result[(lk > 15) & (lk < 500),]
l1 = as.numeric(sub("\\/.*", "", bp2@result[,4]))
bp2 = bp2@result[(l1 > 15) & (l1 < 500),]
l2 = as.numeric(sub("\\/.*", "", bp3@result[,4]))
bp3 = bp3@result[(l2 > 15) & (l2 < 500),]

In [120]:
setwd('/Users/viktorian.miok/Documents/consultation/Katarina/Integration_Data/Proteomics_data/results/VSG')
write.csv(kk1@result, "VSG_KEGG_full_down_prot.csv")
write.csv(ego1@result, "VSG_GO_BP_full_down_prot.csv")
write.csv(ego2@result, "VSG_GO_CC_full_down_prot.csv")
write.csv(ego3@result, "VSG_GO_MF_full_down_prot.csv")

write.csv(bp1, "VSG_GO_BP_trimed_down_prot.csv")
write.csv(bp2, "VSG_GO_CC_trimed_down_prot.csv")
write.csv(bp3, "VSG_GO_MF_trimed_down_prot.csv")

In [121]:
diffVSG = res_VSGHFD_vs_PFHFD[,c(1,7,3)]
colnames(diffVSG) = c("GeneName", "log2FoldChange", "pvalue")

In [122]:
diffTRF = res_TRFHFD_vs_ALHFD[,c(1,7,3)]
colnames(diffTRF) = c("GeneName", "log2FoldChange", "pvalue")

In [123]:
diffIF = res_ALHFD_vs_IFHFD[,c(1,7,3)]
colnames(diffIF) = c("GeneName", "log2FoldChange", "pvalue")

In [124]:
#ProteomOutput <- list(diffVSG=diffVSG, diffTRF=diffTRF, diffIF=diffIF)

In [125]:
#save(ProteomOutput, file="ProteomOutput.RData")