-
Notifications
You must be signed in to change notification settings - Fork 4
/
RI.R
205 lines (178 loc) · 11.4 KB
/
RI.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#' @name computeRIerror
#' @aliases computeRIerror
#' @title computeRIerror
#' @description This function uses RI of mslib database and RT of the identified compounds to discrimine proper compound identification.
#' @param Experiment S4 object with experiment Data, Metadata and Results. Results of experiment are used to extract RT and Compound DB Id.
#' @param id.database Name of the preloaded database, in this case the regular db used by erah mslib
#' @param reference.list List with the compounds and their attributes (AlignId...)
#' @param ri.error.type Specify wether absolute or relative RI error is to be computed.
#' @param plot.results Shows the RI/RT graphic (True by default)
#' @details See eRah vignette for more details. To open the vignette, execute the following code in R:
#' vignette("eRahManual", package="erah")
#' @references [1] Xavier Domingo-Almenara, et al., eRah: A Computational Tool Integrating Spectral Deconvolution and Alignment with Quantification and Identification of Metabolites in GC-MS-Based Metabolomics. Analytical Chemistry (2016). DOI: 10.1021/acs.analchem.6b02927
#' @author Xavier Domingo-Almenara. xavier.domingo@urv.cat
#' @seealso \code{\link{showRTRICurve}}
#' @examples \dontrun{
#' ex <- computeRIerror(
#' ex,
#' mslib,
#' reference.list=list(AlignID = c(45,67,92,120)),
#' ri.error.type = "relative"
#' )
#' }
#' @export
#' @importFrom stats smooth.spline predict
#' @importFrom graphics points
computeRIerror <- function(Experiment, id.database=mslib, reference.list, ri.error.type=c('relative','absolute'), plot.results=TRUE){
if(!inherits(reference.list,'list')) stop('The parameter reference.list must be a list')
if(is.null(reference.list)) stop('A reference list must be provided')
if(!is.null(reference.list$AlignID) & !(is.null(reference.list$RT) | is.null(reference.list$RI))) stop('reference.list must contain a) user-defined RT and RI values, or b) the AlignID of the compounds to be used as a reference')
if(is.null(reference.list$AlignID) & (is.null(reference.list$RT) | is.null(reference.list$RI))) stop('reference.list must contain a) user-defined RT and RI values, or b) the AlignID of the compounds to be used as a reference')
if(!is.null(reference.list$RT)) if(length(reference.list$RT)!=length(reference.list$RI)) stop('Both RI and RT vectors in reference list must have the same length! (You provided a different number of RT and RI values, they must have the same length)')
ri.error.type <- match.arg(ri.error.type, c('relative','absolute'))
if(is.null(id.database)) stop("A database is needed for spectra comparison. Select a database or set 'compare' parameter to 'False'")
colnames(Experiment@Results@Identification)
if(nrow(Experiment@Results@Identification) == 0) stop("Factors must be identified first")
if(Experiment@Results@Parameters@Identification$database.name != id.database@name) {
error.msg <- paste("This experiment was not processed with the database selected. Please use ",
Experiment@Results@Parameters@Identification$database.name,
sep = "")
stop(error.msg)
}
colRamp <- c("#67001F", "#B2182B", "#D6604D", "#F4A582", "#FDDBC7", "#F7F7F7",
"#D1E5F0", "#92C5DE", "#4393C3", "#2166AC", "#053061")
colSeq <- seq(100,0,length.out=8)
n.putative <- Experiment@Results@Parameters@Identification$n.putative
idlist <- idList(object=Experiment, id.database=id.database)
genID <- idlist$AlignID
rtVect.gen <- idlist$tmean
genInd <- which(Experiment@Results@Identification$AlignID %in% genID)
riVect.gen <- sapply(as.numeric(as.vector(Experiment@Results@Identification$DB.Id.1[genInd])), function(i) id.database@database[[i]]$RI.VAR5.ALK)
mf <- as.numeric(as.vector(idlist$MatchFactor.1))
colVect <- sapply(mf, function(x) colRamp[which.min(abs(x - colSeq))])
if(!is.null(reference.list$AlignID)){
refInd <- which(Experiment@Results@Identification$AlignID %in% reference.list$AlignID)
riVect.std <- sapply(as.numeric(as.vector(Experiment@Results@Identification$DB.Id.1[refInd])), function(i) id.database@database[[i]]$RI.VAR5.ALK)
rtVect.std <- as.numeric(as.vector(Experiment@Results@Identification$tmean[refInd]))
#plot(riVect.std, rtVect.std, type='n')
#text(riVect.std, rtVect.std, text=1:length(riVect.std))
}else{
riVect.std <- reference.list$RI
rtVect.std <- reference.list$RT
}
chrom.method <- list(name = 'RI/RT Calibration Curve', method = 'alk.var5', ref.rt = rtVect.std, ref.ri = riVect.std)
RtIS <- smooth.spline(chrom.method$ref.rt , chrom.method$ref.ri, df=(length(chrom.method$ref.ri)-1))
riMatColnames <- sapply(1:n.putative, function(x) paste('DB.Id.', x, sep=''))
riMatNewColnames <- sapply(1:n.putative, function(x) paste('RI.error.', x, sep=''))
refRImatrix <- apply(Experiment@Results@Identification[,riMatColnames], 2, function(x){
sapply(as.numeric(as.vector(x)), function(i) id.database@database[[i]]$RI.VAR5.ALK)
})
rtVect <- Experiment@Results@Identification$tmean
empRI <- predict(RtIS, rtVect)$y
if(ri.error.type=='absolute') RI.error <- apply(refRImatrix, 2, function(x) abs(x-empRI))
if(ri.error.type=='relative') RI.error <- apply(refRImatrix, 2, function(x) round(100*abs(x-empRI)/empRI,2))
colnames(RI.error) <- riMatNewColnames
Experiment@Results@Identification <- cbind(Experiment@Results@Identification, RI.error, stringsAsFactors=FALSE)
if(plot.results){
#colRamp <- RColorBrewer::brewer.pal(n = 11, name = "RdBu")
#nColors <- 10
#colRamp <- rainbow(n=nColors, s = 1, v = 1, start = 0, end = max(1, nColors - 1)/nColors, alpha = 0.85)
#colSeq <- seq(100,0,length.out=8)
#plot(1:length(colRamp), col=colRamp, pch=20, cex=5)
colRamp <- c("red2", "#B2182B" ,"#D6604D", "#F4A582", "#FDDBC7", "#92C5DE")
colSeq <- c(100,90,80,70,60,50)
legend_label <- c(colSeq[-length(colSeq)], paste('<', colSeq[length(colSeq)], sep=''))
par(mfrow=c(2,1))
plot(rtVect.gen, riVect.gen, pch=20, col=colVect, main='Observed RI vs RT w/ Match Factors', xlab='RT (min)', ylab='RI (experimental)')
legend('topleft', legend=legend_label,col=colRamp, y.intersp=0.8, cex=1, box.lwd=0.5, pt.cex=2, pch=15, title='MF value:', bg='white')
plot(rtVect.gen, riVect.gen, pch=20, col='grey', main='Observed RI vs RT w/ calibration curve', xlab='RT (min)', ylab='RI (experimental)')
lines(RtIS, lwd=3, col='blue3', lty=2)
points(RtIS, pch=8, col='red2')
}
Experiment
}
#' @name showRTRICurve
#' @aliases showRTRICurve
#' @title Show RT-RI curve
#' @description This function uses RI of mslib database and RT of the identified compounds to discrimine proper compound identification.
#' @param Experiment S4 object with experiment Data, Metadata and Results. Results of experiment are used to extract RT and Compound DB Id.
#' @param reference.list List with the compounds and their attributes (AlignId...)
#' @param nAnchors The desired equivalent number of degrees of freedom for the smooth.spline function
#' @param ri.thrs Retention Index treshold given by the user to discrimine bewteen identification results
#' @param id.database Name of the preloaded database (mslib by default, the regular db used by erah)
#' @details See eRah vignette for more details. To open the vignette, execute the following code in R:
#' vignette("eRahManual", package="erah")
#' @references [1] Xavier Domingo-Almenara, et al., eRah: A Computational Tool Integrating Spectral Deconvolution and Alignment with Quantification and Identification of Metabolites in GC-MS-Based Metabolomics. Analytical Chemistry (2016). DOI: 10.1021/acs.analchem.6b02927
#' @author Xavier Domingo-Almenara. xavier.domingo@urv.cat
#' @seealso \code{\link{computeRIerror}}
#' @examples \dontrun{
#' # The following set erah to determine which indetified compounds are in RI treshold
#' RTRICurve <- showRTRICurve(ex, list, nAnchors=4, ri.thrs='1R')
#' }
#' @export
showRTRICurve <- function(Experiment, reference.list, nAnchors=4, ri.thrs='1R', id.database = mslib){
if (!inherits(reference.list,"list"))
stop("The parameter reference.list must be a list")
if (is.null(reference.list))
stop("A reference list must be provided")
if (!is.null(reference.list$AlignID) & !(is.null(reference.list$RT) |
is.null(reference.list$RI)))
stop("reference.list must contain a) user-defined RT and RI values, or b) the AlignID of the compounds to be used as a reference")
if (is.null(reference.list$AlignID) & (is.null(reference.list$RT) |
is.null(reference.list$RI)))
stop("reference.list must contain a) user-defined RT and RI values, or b) the AlignID of the compounds to be used as a reference")
if (!is.null(reference.list$RT))
if (length(reference.list$RT) != length(reference.list$RI))
stop("Both RI and RT vectors in reference list must have the same length! (You provided a different number of RT and RI values, they must have the same length)")
if (is.null(id.database))
stop("A database is needed for spectra comparison. Select a database or set 'compare' parameter to 'False'")
if (nrow(Experiment@Results@Identification) == 0)
stop("Factors must be identified first")
if (Experiment@Results@Parameters@Identification$database.name !=
id.database@name) {
error.msg <- paste("This experiment was not processed with the database selected. Please use ",
Experiment@Results@Parameters@Identification$database.name,
sep = "")
stop(error.msg)
}
idlist <- idList(object = Experiment, id.database = id.database)
if (!is.null(reference.list$AlignID)) {
refInd <- which(Experiment@Results@Identification$AlignID %in%
reference.list$AlignID)
riVect.std <- sapply(as.numeric(as.vector(Experiment@Results@Identification$DB.Id.1[refInd])),
function(i) id.database@database[[i]]$RI.VAR5.ALK)
rtVect.std <- as.numeric(as.vector(Experiment@Results@Identification$tmean[refInd]))
}
else {
riVect.std <- reference.list$RI
rtVect.std <- reference.list$RT
}
relativeThr <- NULL
if(length(grep('r', tolower(ri.thrs)))==1) {
relativeThr <- TRUE
riThrs <- as.numeric(as.vector(gsub('r','', tolower(ri.thrs))))
}
if(length(grep('a', tolower(ri.thrs)))==1) {
relativeThr <- FALSE
riThrs <- as.numeric(as.vector(gsub('a','', tolower(ri.thrs))))
}
if(is.null(relativeThr)) stop('Incorrect "ri.thrs" value, please check the documentation for assinging "ri.thrs" values.')
sSp <- smooth.spline(rtVect.std , riVect.std,df=nAnchors)
riError <- abs(sSp$y - riVect.std)
if(relativeThr) riError <- 100*(riError/sSp$y)
plot(rtVect.std, riVect.std, type='n', main='RT/RI curve', xlab='RT (min)', ylab='RI')
points(rtVect.std[which(riError<riThrs)], riVect.std[which(riError<riThrs)], pch=19)
if(length(which(riError>riThrs))!=0) points(rtVect.std[which(riError>riThrs)], riVect.std[which(riError>riThrs)], pch=19, col='red2')
lines(sSp)
if(length(which(riError>riThrs))!=0){
if (!is.null(reference.list$AlignID)) {
cat('The points outside the RI threshold are: ')
cat(paste0(Experiment@Results@Identification$AlignID[refInd][which(riError>riThrs)], collapse=', '))
}else{
cat('The points outside the RI threshold are: \n')
#cbind(reference.list$RI[which(riError>riThrs)],reference.list$RT[which(riError>riThrs)])
}
}else{
cat('No points outside the selected RI threshold')
}
}