-
Notifications
You must be signed in to change notification settings - Fork 44
/
MarkupParser.scala
168 lines (146 loc) · 6.11 KB
/
MarkupParser.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
/*
* Copyright 2012-2020 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package laika.api
import cats.syntax.all.*
import laika.api.builder.{ OperationConfig, ParserBuilder }
import laika.api.config.{ Config, ConfigBuilder, ConfigError, ConfigValue, Origin }
import laika.api.errors.{ InvalidConfig, InvalidDocument, InvalidElements, ParserError }
import laika.api.format.MarkupFormat
import laika.ast.Path.Root
import laika.ast.{ Document, EmbeddedConfigValue, Path, RewritePhase, UnresolvedDocument }
import laika.api.config.Origin.DocumentScope
import laika.internal.parse.markup.DocumentParser
import DocumentParser.DocumentInput
/** Performs a parse operation from text markup to a
* document tree without a subsequent render operation.
*
* In cases where a render operation should follow immediately, it is more
* convenient to use a [[laika.api.Transformer]] instead which
* combines a parse and a render operation directly.
*
* Example for parsing Markdown:
*
* {{{
* val res: Either[ParserError, Document] = MarkupParser
* .of(Markdown)
* .using(GitHubFlavor)
* .build
* .parse("hello *there*)
* }}}
*
* This is a pure API that does not perform any side-effects.
* For additional options like File and Stream I/O, templating
* or parallel processing, use the corresponding builders in
* the laika-io module.
*
* @author Jens Halm
*/
class MarkupParser private[laika] (val format: MarkupFormat, val config: OperationConfig) {
/** The file suffixes this parser will recognize
* as a supported format.
*/
val fileSuffixes: Set[String] = format.fileSuffixes
private val fallbackPath: Path = Root / "doc"
private val docParser =
DocumentParser.forMarkup(format, config.markupExtensions, config.configProvider)
/** Parses the specified markup string into a document AST structure.
*/
def parse(input: String): Either[ParserError, Document] = parse(
DocumentInput(fallbackPath, input)
)
/** Parses the specified markup string into a document AST structure.
* The given (virtual) path will be assigned to the result.
*/
def parse(input: String, path: Path): Either[ParserError, Document] = parse(
DocumentInput(path, input)
)
/** Parses the specified markup input into a document AST structure.
*/
private def parse(input: DocumentInput): Either[ParserError, Document] = {
def merge(config: Config, values: Seq[(String, ConfigValue)]): Config =
values.foldLeft(ConfigBuilder.withFallback(config)) { case (builder, (key, value)) =>
builder.withValue(key, value)
}.build
def resolveDocument(unresolved: UnresolvedDocument, docConfig: Config): Document = {
val embeddedConfig = unresolved.document.content.collect { case c: EmbeddedConfigValue =>
(c.key, c.value)
}
unresolved.document.withConfig(merge(docConfig, embeddedConfig))
}
def rewritePhase(doc: Document, phase: RewritePhase): Either[ParserError, Document] = for {
rules <- config.rewriteRulesFor(doc, phase).leftMap(InvalidConfig(_))
result <- doc.rewrite(rules).leftMap(InvalidConfig(_))
} yield result
def createError(doc: InvalidDocument): ParserError = doc.errors match {
case Left(configErrors) => InvalidConfig(ConfigError.MultipleErrors(configErrors))
case Right(invalidNodes) => InvalidElements(invalidNodes)
}
def rewriteDocument(resolvedDoc: Document): Either[ParserError, Document] = for {
phase1 <- rewritePhase(resolvedDoc, RewritePhase.Build)
phase2 <- rewritePhase(phase1, RewritePhase.Resolve)
result <- InvalidDocument
.from(phase2, config.messageFilters.failOn)
.map(createError)
.toLeft(phase2)
} yield result
for {
unresolved <- docParser(input)
resolvedConfig <- unresolved.config
.resolve(Origin(DocumentScope, input.path), config.baseConfig)
.left.map(InvalidConfig(_))
resolvedDoc = resolveDocument(unresolved, resolvedConfig)
result <- rewriteDocument(resolvedDoc)
} yield result
}
def parseUnresolved(input: String): Either[ParserError, UnresolvedDocument] =
parseUnresolved(DocumentInput(fallbackPath, input))
def parseUnresolved(input: String, path: Path): Either[ParserError, UnresolvedDocument] =
parseUnresolved(DocumentInput(path, input))
/** Returns an unresolved document without applying
* the default rewrite rules and without resolving the configuration header (if present).
*
* The default rewrite rules resolve link and image references and
* rearrange the tree into a hierarchy of sections based on the sequence
* of header instances found in the document.
*
* The default configuration resolver allows for variable references in the HOCON
* header of the document to be resolved against values defined in the base configuration.
*
* This low-level hook is rarely used by application code.
*/
private[laika] def parseUnresolved(
input: DocumentInput
): Either[ParserError, UnresolvedDocument] =
docParser(input)
}
/** Entry point for building a MarkupParser instance.
*
* @author Jens Halm
*/
object MarkupParser {
/** Returns a new builder instance for the specified markup format.
*
* The format is usually an object provided by the library
* or a plugin that is capable of parsing a specific markup
* format like Markdown or reStructuredText.
*
* @param format the markup format to use for all parsing operations
*/
def of(format: MarkupFormat): ParserBuilder = new ParserBuilder(
format,
OperationConfig.default.withBundlesFor(format)
)
}