-
Notifications
You must be signed in to change notification settings - Fork 44
/
Identifier.scala
129 lines (109 loc) · 5.43 KB
/
Identifier.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/*
* Copyright 2012-2020 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package laika.parse.code.common
import cats.data.NonEmptySet
import laika.ast.{ CategorizedCode, CodeSpan }
import laika.parse.code.{ CodeCategory, CodeSpanParser }
import laika.parse.builders._
import laika.parse.text.{ CharGroup, PrefixedParser }
/** Configurable base parsers for identifiers in code blocks.
*
* @author Jens Halm
*/
object Identifier {
/* TODO - support for non-ASCII identifier characters requires changes in the low-level optimizer
for the span parser. This ASCII-only support will probably already cover a large range of common use cases.
*/
/** Function that applies the `TypeName` category to identifiers starting
* with an uppercase letter, and the `Identifier` category to those starting
* with a lowercase letter.
*/
val upperCaseTypeName: String => CodeCategory = s =>
if (s.nonEmpty && s.head.isUpper) CodeCategory.TypeName else CodeCategory.Identifier
/** Configurable base parser for identifiers in code blocks. */
class IdParser private[Identifier] (
idStartChars: NonEmptySet[Char],
nonStartChars: NonEmptySet[Char],
category: String => CodeCategory = _ => CodeCategory.Identifier,
prefixParser: Option[PrefixedParser[String]] = None,
digitBeforeStart: Boolean = false
) extends PrefixedParser[CodeSpan] with CodeSpanParser {
/** Applies a function to the parser result to determine the code category.
*/
def withCategoryChooser(f: String => CodeCategory): IdParser =
new IdParser(idStartChars, nonStartChars, f, prefixParser, digitBeforeStart)
/** Associates the result with the specified code category.
*/
def withCategory(category: CodeCategory): IdParser =
withCategoryChooser(_ => category)
/** Adds the specified characters to the set of characters allowed to start an identifier.
* Will also be added to the set of characters for the parser of the rest of the identifier.
*/
def withIdStartChars(char: Char, chars: Char*): IdParser = {
val startChars = idStartChars ++ NonEmptySet.of(char, chars: _*)
new IdParser(startChars, nonStartChars, category, prefixParser, digitBeforeStart)
}
/** Adds the specified characters to the set of characters allowed as part of an identifier,
* but not allowed as the first character.
*/
def withIdPartChars(char: Char, chars: Char*): IdParser = {
val partChars = nonStartChars ++ NonEmptySet.of(char, chars: _*)
new IdParser(idStartChars, partChars, category, prefixParser, digitBeforeStart)
}
/** Adds the specified prefix to this identifier.
*
* The resulting parser will first parse this prefix and subsequently the
* base parser for this identifier that will still apply the rules
* for which characters are allowed to start the identifier.
*
* An example would be an identifier like `#foo` in CSS, where `#` is the prefix,
* and `foo` follows the rules of this identifier parser.
*/
def withPrefix(parser: PrefixedParser[String]): IdParser =
new IdParser(idStartChars, nonStartChars, category, Some(parser), digitBeforeStart)
/** Allows an identifier to be recognized if it immediately follows a digit,
* for example in CSS length values: `1.2em`.
*/
def allowDigitBeforeStart: IdParser =
new IdParser(idStartChars, nonStartChars, category, prefixParser, digitBeforeStart = true)
lazy val underlying: PrefixedParser[CodeSpan] = {
val firstChar = oneOf(idStartChars)
val idStart =
prefixParser.fold[PrefixedParser[String]](firstChar)(p => (p ~ firstChar).source)
val idDelim = delimiter(idStart).prevNot { c =>
(Character.isDigit(c) && !digitBeforeStart) || Character.isLetter(c)
}
val idRest = anyOf(idStartChars ++ nonStartChars)
(idDelim ~ idRest).source.map(id => CodeSpan(id, category(id)))
}
override def parsers: Seq[PrefixedParser[CategorizedCode]] = Seq(this)
override def startChars: NonEmptySet[Char] = underlying.startChars
}
/** Parses an alphanumeric identifier; digits are not allowed as start characters.
*
* Other characters like underscore are not allowed by this base parser, but can be added
* to the returned instance with the `withIdStartChars` or `withIdPartChars` methods.
*/
def alphaNum: IdParser = new IdParser(CharGroup.alpha, CharGroup.digit)
/** Parses an identifier based on the specified character sets.
*
* @param startChars the characters allowed to appear in any position of the identifier, including the first character
* @param partChars the characters not allowed to appear as the first character, but at any other position of the identifier
* @return
*/
def forCharacterSets(startChars: NonEmptySet[Char], partChars: NonEmptySet[Char]): IdParser =
new IdParser(startChars, partChars)
}