The grammar of µDhall is a much simplified version of the [Dhall grammar](https://github.com/dhall-lang/dhall-lang/blob/master/standard/dhall.abnf).

```
end-of-line =
      %x0A     ; "\n"
    / %x0D.0A  ; "\r\n"

valid-non-ascii = %x80-FFFD

tab = %x09  ; "\t"

not-end-of-line = %x20-7F / valid-non-ascii / tab

line-comment = "--" *not-end-of-line end-of-line

whitespace-chunk =
      " "
    / tab
    / end-of-line
    / line-comment

whsp = *whitespace-chunk

; Nonempty whitespace.
whsp1 = 1*whitespace-chunk

; Uppercase or lowercase ASCII letter.
ALPHA = %x41-5A / %x61-7A

; ASCII digit.
DIGIT = %x30-39  ; 0-9

ALPHANUM = ALPHA / DIGIT

; A simple label cannot be one of the reserved keywords
; listed in the `keyword` rule.
; A PEG parser could use negative lookahead to
; enforce this, e.g. as follows:
; label =
;       keyword 1*label-next-char
;     / !keyword (label-first-char *label-next-char)
label-first-char = ALPHA / "_"

label-next-char = ALPHANUM / "-" / "/" / "_"

label = label-first-char *label-next-char

; A nonreserved-label cannot be any of the reserved identifiers for builtins.
; Their list can be found in the `builtin` rule.
; The only place where this restriction applies is bound variables.
; A PEG parser could use negative lookahead to avoid parsing those identifiers,
; e.g. as follows:
; nonreserved-label =
;      builtin 1*label-next-char
;    / !builtin label
nonreserved-label = label

; Keywords.
let                   = "let"
in                    = "in"
forall-keyword        = "forall"
forall-symbol         = %x2200 ; Unicode FOR ALL: ∀
forall                = forall-symbol / forall-keyword

keyword = let / in / forall-keyword

; Builtin constants.
Natural = "Natural"
Natural-fold = "Natural/fold"
Natural-subtract = "Natural/subtract"
Type = "Type"
Kind = "Kind"

builtin =
    Natural
    / Natural-fold
    / Natural-subtract
    / Type / Kind

; Operators.
lambda        = %x3BB  / "\"
arrow         = %x2192 / "->"
plus          = "+"
times         = "*"

natural-literal =
    ; Decimal; leading 0 digits are not allowed
    / ("1" / "2" / "3" / "4" / "5" / "6" / "7" / "8" / "9") *DIGIT
    ; ... except for 0 itself
    / "0"

identifier = variable / builtin

variable = nonreserved-label [ whsp "@" whsp natural-literal ]

expression =
    ; "\(x : a) -> b"
      lambda whsp "(" whsp nonreserved-label whsp ":" whsp1 expression whsp ")" whsp arrow whsp expression
    ;
    ; "let x = e1 in e2"
    ; We allow dropping the `in` between adjacent let-expressions; the following are equivalent:
    ; "let x = e1 let y = e2 in e3"
    ; "let x = e1 in let y = e2 in e3"
    / 1*let-binding in whsp1 expression
    ;
    ; "forall (x : a) -> b"
    / forall whsp "(" whsp nonreserved-label whsp ":" whsp1 expression whsp ")" whsp arrow whsp expression
    ;
    ; "a -> b" is shorthand syntax for "forall (_ : a) -> b"
    ;
    ; NOTE: Backtrack if parsing this alternative fails
    / operator-expression whsp arrow whsp expression
    ;
    ; "x : t"
    / annotated-expression

; Nonempty-whitespace to disambiguate `env:VARIABLE` from type annotations
annotated-expression = operator-expression [ whsp ":" whsp1 expression ]

; "let x = e1"
let-binding = let whsp1 nonreserved-label whsp "=" whsp expression whsp1

operator-expression = plus-expression

plus-expression = times-expression *(whsp plus whsp times-expression)

times-expression = application-expression *(whsp times whsp application-expression)

application-expression = primitive-expression *(whsp1 primitive-expression)

primitive-expression =
    ; 123
    natural-literal
    ; "x"
    ; "x@2"
    / identifier
    ;
    ; "( e )"
    / "(" ~ full-expression ~ ")"

full-expression = whsp expression whsp
```

When a grammar rule specifies `a / b` then `a` is preferred to `b`; if `a` and the rest are parsed successfully then the `b` variant is not considered.

We use the `fastparse` library to implement this grammar.

In [74]:
import fastparse._
import fastparse.NoWhitespace._

object Grammar {

  // end-of-line = %x0A / %x0D.0A
  def end_of_line[_: P] = P( "\n" | "\r\n" )

  // valid-non-ascii = %x80-10FFFD
  def valid_non_ascii[_: P]: P[Unit] = P(
      CharIn("\u0080-\uD7FF") | // U+0080 to U+D7FF (excludes surrogates)
      CharIn("\uE000-\uFFFD")   // U+E000 to U+FFFD
  )

  // tab = %x09
  def tab[_: P] = P( "\t" )

  // not-end-of-line = %x20-7F / valid-non-ascii / tab
  def not_end_of_line[_: P] = P( CharIn("\u0020-\u007F") | valid_non_ascii | tab )

  // line-comment = "--" *not-end-of-line end-of-line
  def line_comment[_: P] = P( "--" ~ not_end_of_line.rep ~ end_of_line )

  // whitespace-chunk = " " / tab / end-of-line / line-comment
  def whitespace_chunk[_: P] = P( " " | tab | end_of_line | line_comment )

  // whsp = *whitespace-chunk
  def whsp[_: P] = P( whitespace_chunk.rep )

  // whsp1 = 1*whitespace-chunk
  def whsp1[_: P] = P( whitespace_chunk.rep(1) )

  // ALPHA = %x41-5A / %x61-7A
  def ALPHA[_: P] = P( CharIn("A-Z", "a-z") )

  // DIGIT = %x30-39
  def DIGIT[_: P] = P( CharIn("0-9") )

  // ALPHANUM = ALPHA / DIGIT
  def ALPHANUM[_: P] = P( ALPHA | DIGIT )

  // label-first-char = ALPHA / "_"
  def label_first_char[_: P] = P( ALPHA | "_" )

  // label-next-char = ALPHANUM / "-" / "/" / "_"
  // NOTE: CharIn("-") cannot be used in fastparse!
  def label_next_char[_: P] = P( ALPHANUM | "-" | "/" | "_" )

  // label = label-first-char *label-next-char
  // label =
  //   keyword 1*label-next-char
  //   / !keyword (label-first-char *label-next-char)

  def label[_: P] = P( ( keyword ~ label_next_char.rep(1))  | ( !keyword ~ label_first_char ~ label_next_char.rep ) )

  // nonreserved-label =]
  //      builtin 1*label-next-char
  //    / !builtin label

  def nonreserved_label[_: P] : P[String] = P( (builtin ~ label_next_char.rep(1)) | ( !builtin ~ label) ).!

  // let = "let"
  def let[_: P] = P( "let" )

  // in = "in"
  def in[_: P] = P( "in" )

  // forall-keyword = "forall"
  def forall_keyword[_: P] = P( "forall" )

  // forall-symbol = %x2200 ; Unicode FOR ALL: ∀
  def forall_symbol[_: P] = P( "∀" )

  // forall = forall-symbol / forall-keyword
  def forall[_: P] = P( forall_symbol | forall_keyword )

  // keyword = let / in / forall-keyword
  def keyword[_: P] = P( let | in | forall_keyword )

  // Builtin constants.
  def Natural[_: P] = P( "Natural" )
  def Natural_fold[_: P] = P( "Natural/fold" )
  def Natural_subtract[_: P] = P( "Natural/subtract" )
  def Type[_: P] = P( "Type" )
  def Kind[_: P] = P( "Kind" )

  // builtin = Natural / Natural-fold / Natural-subtract / Type / Kind
  // Need to reverse the order to disambiguate parsing of symbols that are substrings of each other.
  def builtin[_: P] = P( Natural_fold | Natural_subtract | Natural | Type | Kind ).!.map(s => Expr.Builtin(Constant.withName(s)))

  // lambda = %x3BB / "\"
  def lambda[_: P] = P( "λ" | "\\" )

  // arrow = %x2192 / "->"
  def arrow[_: P] = P( "→" | "->" )

  // plus = "+"
  def plus[_: P] = P( "+" )

  // times = "*"
  def times[_: P] = P( "*" )

  // natural-literal = ("1"/..."9") *DIGIT / "0"
  def natural_literal[_: P]: P[Expr.NaturalLiteral] = P( CharIn("1-9") ~ DIGIT.rep | "0" ).!.map(n => Expr.NaturalLiteral(n.toInt))

  // variable = nonreserved-label [ whsp "@" whsp natural-literal ]
  def variable[_: P]: P[Expr.Variable] = P( nonreserved_label ~ ( whsp ~ "@" ~ whsp ~ natural_literal ).? )
    .map {
      case (name, Some(i)) => Expr.Variable(name, i.value)
      case (name, None) => Expr.Variable(name, 0)
    }

  // identifier = variable / builtin
  def identifier[_: P]: P[Expr] = P( variable | builtin )

  // parenthesized-expression = "(" whsp expression whsp ")"
  def parenthesized_expression[_: P] = P( "(" ~ whsp ~ expression ~ whsp ~ ")" )

  // primitive-expression = natural-literal / identifier / parenthesized-expression
  def primitive_expression[_: P]: P[Expr] = P( natural_literal | identifier | parenthesized_expression )

  // application-expression = primitive-expression *(whsp1 primitive-expression)
  def application_expression[_: P]: P[Expr] =
    P( primitive_expression ~ ( whsp1 ~ primitive_expression ).rep )
        .map { case (a, tail) => tail.foldLeft(a) { case (prev, arg) => Expr.Applied(prev, arg) } }

  // times-expression = application-expression *(whsp times whsp application-expression)
  def times_expression[_: P]: P[Expr] =
    P( application_expression ~ ( whsp ~ times ~ whsp ~ application_expression ).rep )
        .map { case (a, tail) => tail.foldLeft(a) { case (prev, arg) => Expr.BinaryOp(prev, Operator.Times, arg) } }

  // plus-expression = times-expression *(whsp plus whsp times-expression)
  def plus_expression[_: P]: P[Expr] =
    P( times_expression ~ ( whsp ~ plus ~ whsp ~ times_expression ).rep )
        .map { case (a, tail) => tail.foldLeft(a) { case (prev, arg) => Expr.BinaryOp(prev, Operator.Plus, arg) } }

  // operator-expression = plus-expression
  def operator_expression[_: P]: P[Expr] = P( plus_expression )

  // annotated-expression = operator-expression [ whsp ":" whsp1 expression ]
  def annotated_expression[_: P]: P[Expr] =
    P( operator_expression ~ ( whsp ~ ":" ~ whsp1 ~ expression ).? )
        .map {
            case (a, Some(b)) => Expr.Annotated(a, b)
            case (a, None) => a
        }

  // let-binding = let whsp1 nonreserved-label whsp "=" whsp expression whsp1
  def let_binding[_: P]: P[(String, Expr)] =
    P( let ~ whsp1 ~ nonreserved_label ~ whsp ~ "=" ~ whsp ~ expression ~ whsp1 )

  def expression[_: P]: P[Expr] = P(
      lambda_abstraction | let_expression | forall_abstraction | function_type | annotated_expression
  )

  def let_expression[_: P]: P[Expr] = P(
    // 1*let-binding in whsp1 expression
    let_binding.rep(1) ~ in ~ whsp1 ~ expression
  ).map { case (lets, e) => lets.foldRight(e) { case ((varName, body), prev) => Expr.Let(varName, body, prev) } }
    
  def function_type[_: P]: P[Expr] = P( 
    // a -> b (shorthand for forall (_ : a) -> b)
    operator_expression ~ whsp ~ arrow ~ whsp ~ expression
  ).map { case (a, b) => Expr.Forall("_", a, b) }

  def lambda_abstraction[_: P]: P[Expr] = P(
      // \(x : a) -> b
    lambda ~ whsp ~ "(" ~ whsp ~ nonreserved_label ~ whsp ~ ":" ~ whsp1 ~ expression ~ whsp ~ ")" ~ whsp ~ arrow ~ whsp ~ expression
  ).map { case (name, tipe, body) => Expr.Lambda(name, tipe, body) }

  def forall_abstraction[_: P]: P[Expr] = P(
      // forall (x : a) -> b
    forall ~ whsp ~ "(" ~ whsp ~ nonreserved_label ~ whsp ~ ":" ~ whsp1 ~ expression ~ whsp ~ ")" ~ whsp ~ arrow ~ whsp ~ expression
  ).map { case (name, tipe, body) => Expr.Forall(name, tipe, body) }

  // The main production of the grammar is "expression" that may be surrounded by whitespace.
  def full_expression[_: P]: P[Expr] = P( Start ~ whsp ~ expression ~ whsp ~ End )

  def parse(input: String): Expr = fastparse.parse(input, full_expression(_)).get.value

  def debugParse(input: String): Option[Expr] = {
    println(s"Parsing test string:\n---$input---\n")
    fastparse.parse(input, full_expression(_)) match {
      case Parsed.Success(e, index) =>
        println(s"Parsing succeeded! Reached index: $index (Full length)")
        Some(e)
      
      case Parsed.Failure(expected, index, extra) =>
        println(s"Parsing failed at index: $index")
        println(s"Expected: '$expected'")
        // Print context of the failure.
        val pre = input.substring(0, index)
        val post = input.substring(index)
        println(s"Context:\n'${pre}█${post}'")
        println(s"Trace: ${extra.trace().longAggregateMsg}")
        None
    }
  }

}

val testString =
    """
    let x = 1 + y@0 * 2 -- A comment
    let z = Natural in
    λ(a : Type) → ∀(b : Type) → a → b → a
    """

prettyprint(Grammar.debugParse(testString).get)

48 deprecations (since 2.13.7); re-run enabling -deprecation for details, or try -help


Parsing test string:
---
    let x = 1 + y@0 * 2 -- A comment
    let z = Natural in
    λ(a : Type) → ∀(b : Type) → a → b → a
    ---

Parsing succeeded! Reached index: 107 (Full length)


[32mimport [39m[36mfastparse._[39m
[32mimport [39m[36mfastparse.NoWhitespace._[39m
defined [32mobject[39m [36mGrammar[39m
[36mtestString[39m: [32mString[39m = [32m"""
    let x = 1 + y@0 * 2 -- A comment
    let z = Natural in
    λ(a : Type) → ∀(b : Type) → a → b → a
    """[39m
[36mres74_4[39m: [32mString[39m = [32m"let x = 1 + y * 2 in let z = Natural in λ(a : Type) → ∀(b : Type) → ∀(_ : a) → ∀(_ : b) → a"[39m

In [75]:
Grammar.debugParse("1 + 1 )")  // This will fail because of the unbalanced parenthesis.

Parsing test string:
---1 + 1 )---

Parsing failed at index: 6
Expected: ''
Context:
'1 + 1 █)'
Trace: Expected full_expression:1:1 / (whitespace_chunk | primitive_expression | times | plus | arrow | ":" | end-of-input):1:7, found ")"


[36mres75[39m: [32mOption[39m[[32mExpr[39m] = [32mNone[39m

In [76]:
implicit class ParseDhall(input: String) { def dhall: Expr = Grammar.parse(input) }

"1 + 1".dhall

defined [32mclass[39m [36mParseDhall[39m
[36mres76_1[39m: [32mExpr[39m = [33mBinaryOp[39m(
  left = [33mNaturalLiteral[39m(value = [32m1[39m),
  op = Plus,
  right = [33mNaturalLiteral[39m(value = [32m1[39m)
)

In [80]:
implicit class PrintDhall(e: Expr) { def print: String = prettyprint(e) }

Seq(
    "(1 + 1) + 1" -> "1 + 1 + 1",
    "1 + (1 + (1))" -> "1 + 1 + 1",
).validate(_.dhall.print)

"Tests passed for prettyprint() associativity."

defined [32mclass[39m [36mPrintDhall[39m
[36mres80_2[39m: [32mString[39m = [32m"Tests passed for prettyprint() associativity."[39m