-
Notifications
You must be signed in to change notification settings - Fork 44
/
DelimitedText.scala
219 lines (174 loc) · 7.89 KB
/
DelimitedText.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
/*
* Copyright 2012-2020 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package laika.parse.text
import laika.parse._
import laika.parse.text.DelimiterResult._
import scala.annotation.tailrec
/** A parser for text that ends with a specific delimiter condition,
* either marking the end of the text span or the start of an embedded
* inner span.
*
* @author Jens Halm
*/
class DelimitedText private[text] (private[laika] val delimiter: TextDelimiter)
extends Parser[String] {
private lazy val parser: DelimitedParser[String] = new DelimitedParser[String](delimiter)
/** Creates a delimiter that also allows reaching the end of the input before encountering the delimiter.
* By default a delimiter based parser fails in that case.
*/
def acceptEOF: DelimitedText = new DelimitedText(delimiter.copy(acceptEOF = true))
/** Creates a delimiter that also allows empty result,
* meaning reaching the delimiter before any non-delimiter characters have been parsed.
* By default a delimiter based parser fails in that case.
*/
def nonEmpty: DelimitedText = new DelimitedText(delimiter.copy(nonEmpty = true))
/** Creates a delimiter that keeps the delimiter itself on the remaining input.
* By default all delimiter characters are consumed.
*/
def keepDelimiter: DelimitedText = new DelimitedText(delimiter.copy(keepDelimiter = true))
/** Creates a delimiter that fails when parsing any of the specified characters before a delimiter is encountered.
*/
def failOn(chars: Char*): DelimitedText = new DelimitedText(delimiter.copy(failOn = chars.toSet))
def parse(source: SourceCursor): Parsed[String] = parser.parse(source)
}
object DelimitedText {
/** A parser that reads to the end of the input,
* unless further conditions are set on the returned `DelimiterOptions`.
*/
lazy val Undelimited: DelimitedText = new DelimitedText(TextDelimiter(None)).acceptEOF
}
/** Internal parser implementation that both, the public DelimitedText parser
* and the internal InlineParser delegate to.
*/
private[laika] class DelimitedParser[T](val delimiter: Delimiter[T]) extends Parser[T] {
private val maxChar: Char = if (delimiter.startChars.nonEmpty) delimiter.startChars.max else 0
private lazy val optimizedDelimiters: Array[Byte] =
Characters.optimizedLookup(delimiter.startChars)
def parse(source: SourceCursor): Parsed[T] = {
val sourceString = source.input
val end = sourceString.length
val lookup = optimizedDelimiters
@tailrec
def parse(offset: Int): Parsed[T] = {
def charsConsumed = offset - source.offset
if (offset == end) delimiter.atEOF(charsConsumed, source)
else {
val char = sourceString.charAt(offset)
if (char <= maxChar && lookup(char.toInt) == 1)
delimiter.atStartChar(char, charsConsumed, source) match {
case Complete(result) => result
case Continue => parse(offset + 1)
}
else parse(offset + 1)
}
}
parse(source.offset)
}
}
/** Represents the logic of a specific kind of text delimiter.
*
* @tparam T the type of result produced by this delimiter
*/
private[laika] trait Delimiter[T] {
/** The start characters that mark the (potential) end of the delimited text
* in case the conditions implemented in `atStartChar` are met.
*/
def startChars: Set[Char]
/** Method invoked every time the parser encounters any of the `startChars`.
* The result is either `Continue` in case the additional conditions for the
* delimiter are not met at this position, or a `Complete` instance containing
* the result.
*
* @param startChar the start character that was encountered on the input string (matches one of the characters
* in the `startChar` set)
* @param charsConsumed the number of characters consumed before the delimiter has been reached
* @param source the parser context at the position the delimiter has been reached
* @return either `Continue` in case the additional conditions for the
* delimiter are not met at this position, or a `Complete` instance containing
* the result
*/
def atStartChar(startChar: Char, charsConsumed: Int, source: SourceCursor): DelimiterResult[T]
/** Method invoked when the end of the input is reached.
*
* @param charsConsumed the number of characters consumed before EOF has been reached
* @param source the parser context at the position EOF has been reached
* @return the result of the parser
*/
def atEOF(charsConsumed: Int, source: SourceCursor): Parsed[T]
}
/** Delimiter implementation that allows for various kinds of customization.
*/
private[laika] case class TextDelimiter(
parser: Option[PrefixedParser[String]],
acceptEOF: Boolean = false,
nonEmpty: Boolean = false,
keepDelimiter: Boolean = false,
failOn: Set[Char] = Set()
) extends Delimiter[String] {
val startChars: Set[Char] = parser.fold(failOn) { _.startChars.toSortedSet ++ failOn }
private val emptyResult = Message.fixed(s"expected at least 1 character before end delimiter")
private val unexpectedInput: Char => Message =
Message.forRuntimeValue[Char](char => s"unexpected input in delimited text: `$char`")
def atStartChar(
startChar: Char,
charsConsumed: Int,
source: SourceCursor
): DelimiterResult[String] = {
def applyPostCondition: Option[Int] = parser.fold(Option(0)) { parser =>
parser.parse(source.consume(charsConsumed)) match {
case Success(_, next) => Some(next.offset - (source.offset + charsConsumed))
case _ => None
}
}
def result(delimConsumed: Int): Success[String] = {
val capturedText = source.capture(charsConsumed)
val totalConsumed = if (keepDelimiter) charsConsumed else charsConsumed + delimConsumed
Success(capturedText, source.consume(totalConsumed))
}
if (failOn.contains(startChar))
Complete(Failure(unexpectedInput(startChar), source, source.offset + charsConsumed))
else {
applyPostCondition match {
case None => Continue
case Some(_) if charsConsumed == 0 && nonEmpty => Complete(Failure(emptyResult, source))
case Some(delimConsumed) => Complete(result(delimConsumed))
}
}
}
def atEOF(charsConsumed: Int, source: SourceCursor): Parsed[String] = {
if (!acceptEOF) Failure(Message.UnexpectedEOF, source, source.offset + charsConsumed)
else if (charsConsumed == 0 && nonEmpty) Failure(emptyResult, source)
else Success(source.capture(charsConsumed), source.consume(charsConsumed))
}
}
private[laika] object TextDelimiter {
def apply(parser: PrefixedParser[String]): TextDelimiter = apply(Some(parser))
}
/** Represents the result of parsing a delimiter.
*
* @tparam T the type of result produced by this delimiter
*/
private[laika] sealed trait DelimiterResult[+T]
private[laika] object DelimiterResult {
/** Signals that the parsing should continue, meaning some conditions
* for the delimiter at this positions have not been met.
*/
case object Continue extends DelimiterResult[Nothing]
/** Signals that the delimiter has been successfully parsed, ending
* the parsing of the delimited text and providing the result.
*/
case class Complete[T](result: Parsed[T]) extends DelimiterResult[T]
}