@@ -900,6 +900,119 @@ extension Parser {
900
900
}
901
901
}
902
902
903
+ extension TokenConsumer {
904
+ /// Disambiguate the word at the cursor looks like a keyword-prefixed syntax.
905
+ ///
906
+ /// - Parameters:
907
+ /// - exprFlavor: The expression context. When using this function for a statement, e.g. 'yield',
908
+ /// use `.basic`.
909
+ /// - acceptClosure: When the next token is '{' and it looks like a closure, use this value as the result.
910
+ /// - preferPostfixExpr: When the next token is '.', '(', or '[' and there is a space between the word,
911
+ /// use `!preferPostfixExpr` as the result.
912
+ mutating func atContextualKeywordPrefixedSyntax(
913
+ exprFlavor: Parser . ExprFlavor ,
914
+ acceptClosure: Bool = false ,
915
+ preferPostfixExpr: Bool = true
916
+ ) -> Bool {
917
+ let next = peek ( )
918
+
919
+ // The next token must be at the same line.
920
+ if next. isAtStartOfLine {
921
+ return false
922
+ }
923
+
924
+ switch next. rawTokenKind {
925
+
926
+ case . identifier, . dollarIdentifier, . wildcard:
927
+ // E.g. <word> foo
928
+ return true
929
+
930
+ case . integerLiteral, . floatLiteral,
931
+ . stringQuote, . multilineStringQuote, . singleQuote, . rawStringPoundDelimiter,
932
+ . regexSlash, . regexPoundDelimiter:
933
+ // E.g. <word> 1
934
+ return true
935
+
936
+ case . prefixAmpersand, . prefixOperator, . atSign, . backslash, . pound:
937
+ // E.g. <word> !<expr>
938
+ return true
939
+
940
+ case . keyword:
941
+ switch Keyword ( next. tokenText) {
942
+ case . as, . is, . in:
943
+ // E.g. <word> is <expr>
944
+ return false
945
+ default :
946
+ // Other lexer-classified keywords are identifier-like.
947
+ // E.g. <word> self
948
+ return true
949
+ }
950
+
951
+ case . binaryOperator, . equal, . arrow, . infixQuestionMark:
952
+ // E.g. <word> != <expr>
953
+ return false
954
+ case . postfixOperator, . postfixQuestionMark, . exclamationMark, . ellipsis:
955
+ // E.g. <word>++
956
+ return false
957
+ case . rightBrace, . rightParen, . rightSquare:
958
+ // E.g. <word>]
959
+ return false
960
+ case . colon, . comma:
961
+ // E.g. <word>,
962
+ return false
963
+ case . semicolon, . endOfFile, . poundElse, . poundElseif, . poundEndif:
964
+ return false
965
+
966
+ case . leftAngle, . rightAngle:
967
+ // Lexer never produce these token kinds.
968
+ return false
969
+
970
+ case . stringSegment, . regexLiteralPattern:
971
+ // Calling this function inside a string/regex literal?
972
+ return false
973
+
974
+ case . backtick, . poundAvailable, . poundUnavailable,
975
+ . poundSourceLocation, . poundIf, . shebang, . unknown:
976
+ // These are invalid for both cases
977
+ // E.g. <word> #available
978
+ return false
979
+
980
+ case . period, . leftParen, . leftSquare:
981
+ // These are truly ambiguous. They can be both start of postfix expression
982
+ // suffix or start of primary expression:
983
+ //
984
+ // - Member access vs. implicit member expression
985
+ // - Call vs. tuple expression
986
+ // - Subscript vs. collection literal
987
+ //
988
+ let hasSpace = ( next. leadingTriviaByteLength + currentToken. trailingTriviaByteLength) != 0
989
+ if !hasSpace {
990
+ // No space, the word is an decl-ref expression
991
+ return false
992
+ }
993
+ return !preferPostfixExpr
994
+
995
+ case . leftBrace:
996
+ // E.g. <word> { ... }
997
+ // Trailing closure is also ambiguous:
998
+ //
999
+ // - Trailing closure vs. immediately-invoked closure
1000
+ //
1001
+ // Checking whitespace between the word cannot help this because people
1002
+ // usually put a space before trailing closures. Even though that is source
1003
+ // breaking, we prefer parsing it as a keyword if the syntax accepts
1004
+ // immediately-invoked closure patterns. E.g. 'unsafe { ... }()'
1005
+ if !acceptClosure {
1006
+ return false
1007
+ }
1008
+ return self . withLookahead {
1009
+ $0. consumeAnyToken ( )
1010
+ return $0. atValidTrailingClosure ( flavor: exprFlavor)
1011
+ }
1012
+ }
1013
+ }
1014
+ }
1015
+
903
1016
// MARK: Lookahead
904
1017
905
1018
extension Parser . Lookahead {
@@ -949,91 +1062,16 @@ extension Parser.Lookahead {
949
1062
// FIXME: 'repeat' followed by '{' could be a pack expansion
950
1063
// with a closure pattern.
951
1064
return self . peek ( ) . rawTokenKind == . leftBrace
952
- case . yield? :
953
- switch self . peek ( ) . rawTokenKind {
954
- case . prefixAmpersand:
955
- // "yield &" always denotes a yield statement.
956
- return true
957
- case . leftParen:
958
- // "yield (", by contrast, must be disambiguated with additional
959
- // context. We always consider it an apply expression of a function
960
- // called `yield` for the purposes of the parse.
961
- return false
962
- case . binaryOperator:
963
- // 'yield &= x' treats yield as an identifier.
964
- return false
965
- default :
966
- // "yield" followed immediately by any other token is likely a
967
- // yield statement of some singular expression.
968
- return !self . peek ( ) . isAtStartOfLine
969
- }
970
- case . discard? :
971
- let next = peek ( )
972
- // The thing to be discarded must be on the same line as `discard`.
973
- if next. isAtStartOfLine {
974
- return false
975
- }
976
- switch next. rawTokenKind {
977
- case . identifier, . keyword:
978
- // Since some identifiers like "self" are classified as keywords,
979
- // we want to recognize those too, to handle "discard self". We also
980
- // accept any identifier since we want to emit a nice error message
981
- // later on during type checking.
982
- return true
983
- default :
984
- // any other token following "discard" means it's not the statement.
985
- // For example, could be the function call "discard()".
986
- return false
987
- }
988
-
989
- case . then:
990
- return atStartOfThenStatement ( preferExpr: preferExpr)
1065
+ case . yield? , . discard? :
1066
+ return atContextualKeywordPrefixedSyntax ( exprFlavor: . basic, preferPostfixExpr: true )
1067
+ case . then? :
1068
+ return atContextualKeywordPrefixedSyntax ( exprFlavor: . basic, preferPostfixExpr: false )
991
1069
992
1070
case nil :
993
1071
return false
994
1072
}
995
1073
}
996
1074
997
- /// Whether we're currently at a `then` token that should be parsed as a
998
- /// `then` statement.
999
- mutating func atStartOfThenStatement( preferExpr: Bool ) -> Bool {
1000
- guard self . at ( . keyword( . then) ) else {
1001
- return false
1002
- }
1003
-
1004
- // If we prefer an expr and aren't at the start of a newline, then don't
1005
- // parse a ThenStmt.
1006
- if preferExpr && !self . atStartOfLine {
1007
- return false
1008
- }
1009
-
1010
- // If 'then' is followed by a binary or postfix operator, prefer to parse as
1011
- // an expr.
1012
- if peek ( isAtAnyIn: BinaryOperatorLike . self) != nil || peek ( isAtAnyIn: PostfixOperatorLike . self) != nil {
1013
- return false
1014
- }
1015
-
1016
- switch PrepareForKeywordMatch ( peek ( ) ) {
1017
- case TokenSpec ( . is) , TokenSpec ( . as) :
1018
- // Treat 'is' and 'as' like the binary operator case, and parse as an
1019
- // expr.
1020
- return false
1021
-
1022
- case . leftBrace:
1023
- // This is a trailing closure.
1024
- return false
1025
-
1026
- case . leftParen, . leftSquare, . period:
1027
- // These are handled based on whether there is trivia between the 'then'
1028
- // and the token. If so, it's a 'then' statement. Otherwise it should
1029
- // be treated as an expression, e.g `then(...)`, `then[...]`, `then.foo`.
1030
- return !self . currentToken. trailingTriviaText. isEmpty || !peek( ) . leadingTriviaText. isEmpty
1031
- default :
1032
- break
1033
- }
1034
- return true
1035
- }
1036
-
1037
1075
/// Returns whether the parser's current position is the start of a switch case,
1038
1076
/// given that we're in the middle of a switch already.
1039
1077
mutating func atStartOfSwitchCase( allowRecovery: Bool = false ) -> Bool {
0 commit comments