Skip to content

Commit 295997a

Browse files
committed
[Parser] Add 'atContextualKeywordPrefixedSyntax' method
Unified disambiguation method for contextual-keyword prefixed syntax.
1 parent d9fabf7 commit 295997a

File tree

3 files changed

+130
-135
lines changed

3 files changed

+130
-135
lines changed

Sources/SwiftParser/Expressions.swift

Lines changed: 6 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -381,20 +381,6 @@ extension Parser {
381381
}
382382
}
383383

384-
/// Whether the current token is a valid contextual exprssion modifier like
385-
/// `copy`, `consume`.
386-
///
387-
/// `copy` etc. are only contextually a keyword if they are followed by an
388-
/// identifier or keyword on the same line. We do this to ensure that we do
389-
/// not break any copy functions defined by users.
390-
private mutating func atContextualExpressionModifier() -> Bool {
391-
return self.peek(
392-
isAt: TokenSpec(.identifier, allowAtStartOfLine: false),
393-
TokenSpec(.dollarIdentifier, allowAtStartOfLine: false),
394-
TokenSpec(.self, allowAtStartOfLine: false)
395-
)
396-
}
397-
398384
/// Parse an expression sequence element.
399385
mutating func parseSequenceExpressionElement(
400386
flavor: ExprFlavor,
@@ -445,27 +431,7 @@ extension Parser {
445431
)
446432
)
447433
case (.unsafe, let handle)?:
448-
if self.peek().isAtStartOfLine
449-
// Closing paired syntax
450-
|| self.peek(isAt: .rightParen, .rightSquare, .rightBrace)
451-
// Assignment
452-
|| self.peek(isAt: .equal)
453-
// As an argument label or in a list context.
454-
|| self.peek(isAt: .colon, .comma)
455-
// Start of a closure in a context where it should be interpreted as
456-
// being part of a statement.
457-
|| (flavor == .stmtCondition && self.peek(isAt: .leftBrace))
458-
// Avoid treating as an "unsafe" expression when there is no trivia
459-
// following the "unsafe" and the following token could either be a
460-
// postfix expression or a subexpression:
461-
// - Member access vs. leading .
462-
// - Call vs. tuple expression.
463-
// - Subscript vs. array or dictionary expression
464-
|| (self.peek(isAt: .period, .leftParen, .leftSquare) && self.peek().leadingTriviaByteLength == 0
465-
&& self.currentToken.trailingTriviaByteLength == 0)
466-
// End of file
467-
|| self.peek(isAt: .endOfFile)
468-
{
434+
if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor, acceptClosure: true, preferPostfixExpr: false) {
469435
break EXPR_PREFIX
470436
}
471437

@@ -486,7 +452,7 @@ extension Parser {
486452
assert(self.experimentalFeatures.contains(.oldOwnershipOperatorSpellings))
487453
fallthrough
488454
case (.borrow, let handle)?:
489-
if !atContextualExpressionModifier() {
455+
if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor) {
490456
break EXPR_PREFIX
491457
}
492458
let borrowTok = self.eat(handle)
@@ -503,7 +469,7 @@ extension Parser {
503469
)
504470

505471
case (.copy, let handle)?:
506-
if !atContextualExpressionModifier() {
472+
if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor) {
507473
break EXPR_PREFIX
508474
}
509475

@@ -524,7 +490,7 @@ extension Parser {
524490
assert(self.experimentalFeatures.contains(.oldOwnershipOperatorSpellings))
525491
fallthrough
526492
case (.consume, let handle)?:
527-
if !atContextualExpressionModifier() {
493+
if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor) {
528494
break EXPR_PREFIX
529495
}
530496

@@ -546,7 +512,7 @@ extension Parser {
546512
return RawExprSyntax(parsePackExpansionExpr(repeatHandle: handle, flavor: flavor, pattern: pattern))
547513

548514
case (.each, let handle)?:
549-
if !atContextualExpressionModifier() {
515+
if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor) {
550516
break EXPR_PREFIX
551517
}
552518

@@ -561,7 +527,7 @@ extension Parser {
561527
)
562528

563529
case (.any, _)?:
564-
if !atContextualExpressionModifier() && !self.peek().isContextualPunctuator("~") {
530+
if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor) && !self.peek().isContextualPunctuator("~") {
565531
break EXPR_PREFIX
566532
}
567533

Sources/SwiftParser/Statements.swift

Lines changed: 117 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -900,6 +900,119 @@ extension Parser {
900900
}
901901
}
902902

903+
extension TokenConsumer {
904+
/// Disambiguate the word at the cursor looks like a keyword-prefixed syntax.
905+
///
906+
/// - Parameters:
907+
/// - exprFlavor: The expression context. When using this function for a statement, e.g. 'yield',
908+
/// use `.basic`.
909+
/// - acceptClosure: When the next token is '{' and it looks like a closure, use this value as the result.
910+
/// - preferPostfixExpr: When the next token is '.', '(', or '[' and there is a space between the word,
911+
/// use `!preferPostfixExpr` as the result.
912+
mutating func atContextualKeywordPrefixedSyntax(
913+
exprFlavor: Parser.ExprFlavor,
914+
acceptClosure: Bool = false,
915+
preferPostfixExpr: Bool = true
916+
) -> Bool {
917+
let next = peek()
918+
919+
// The next token must be at the same line.
920+
if next.isAtStartOfLine {
921+
return false
922+
}
923+
924+
switch next.rawTokenKind {
925+
926+
case .identifier, .dollarIdentifier, .wildcard:
927+
// E.g. <word> foo
928+
return true
929+
930+
case .integerLiteral, .floatLiteral,
931+
.stringQuote, .multilineStringQuote, .singleQuote, .rawStringPoundDelimiter,
932+
.regexSlash, .regexPoundDelimiter:
933+
// E.g. <word> 1
934+
return true
935+
936+
case .prefixAmpersand, .prefixOperator, .atSign, .backslash, .pound:
937+
// E.g. <word> !<expr>
938+
return true
939+
940+
case .keyword:
941+
switch Keyword(next.tokenText) {
942+
case .as, .is, .in:
943+
// E.g. <word> is <expr>
944+
return false
945+
default:
946+
// Other lexer-classified keywords are identifier-like.
947+
// E.g. <word> self
948+
return true
949+
}
950+
951+
case .binaryOperator, .equal, .arrow, .infixQuestionMark:
952+
// E.g. <word> != <expr>
953+
return false
954+
case .postfixOperator, .postfixQuestionMark, .exclamationMark, .ellipsis:
955+
// E.g. <word>++
956+
return false
957+
case .rightBrace, .rightParen, .rightSquare:
958+
// E.g. <word>]
959+
return false
960+
case .colon, .comma:
961+
// E.g. <word>,
962+
return false
963+
case .semicolon, .endOfFile, .poundElse, .poundElseif, .poundEndif:
964+
return false
965+
966+
case .leftAngle, .rightAngle:
967+
// Lexer never produce these token kinds.
968+
return false
969+
970+
case .stringSegment, .regexLiteralPattern:
971+
// Calling this function inside a string/regex literal?
972+
return false
973+
974+
case .backtick, .poundAvailable, .poundUnavailable,
975+
.poundSourceLocation, .poundIf, .shebang, .unknown:
976+
// These are invalid for both cases
977+
// E.g. <word> #available
978+
return false
979+
980+
case .period, .leftParen, .leftSquare:
981+
// These are truly ambiguous. They can be both start of postfix expression
982+
// suffix or start of primary expression:
983+
//
984+
// - Member access vs. implicit member expression
985+
// - Call vs. tuple expression
986+
// - Subscript vs. collection literal
987+
//
988+
let hasSpace = (next.leadingTriviaByteLength + currentToken.trailingTriviaByteLength) != 0
989+
if !hasSpace {
990+
// No space, the word is an decl-ref expression
991+
return false
992+
}
993+
return !preferPostfixExpr
994+
995+
case .leftBrace:
996+
// E.g. <word> { ... }
997+
// Trailing closure is also ambiguous:
998+
//
999+
// - Trailing closure vs. immediately-invoked closure
1000+
//
1001+
// Checking whitespace between the word cannot help this because people
1002+
// usually put a space before trailing closures. Even though that is source
1003+
// breaking, we prefer parsing it as a keyword if the syntax accepts
1004+
// immediately-invoked closure patterns. E.g. 'unsafe { ... }()'
1005+
if !acceptClosure {
1006+
return false
1007+
}
1008+
return self.withLookahead {
1009+
$0.consumeAnyToken()
1010+
return $0.atValidTrailingClosure(flavor: exprFlavor)
1011+
}
1012+
}
1013+
}
1014+
}
1015+
9031016
// MARK: Lookahead
9041017

9051018
extension Parser.Lookahead {
@@ -949,91 +1062,16 @@ extension Parser.Lookahead {
9491062
// FIXME: 'repeat' followed by '{' could be a pack expansion
9501063
// with a closure pattern.
9511064
return self.peek().rawTokenKind == .leftBrace
952-
case .yield?:
953-
switch self.peek().rawTokenKind {
954-
case .prefixAmpersand:
955-
// "yield &" always denotes a yield statement.
956-
return true
957-
case .leftParen:
958-
// "yield (", by contrast, must be disambiguated with additional
959-
// context. We always consider it an apply expression of a function
960-
// called `yield` for the purposes of the parse.
961-
return false
962-
case .binaryOperator:
963-
// 'yield &= x' treats yield as an identifier.
964-
return false
965-
default:
966-
// "yield" followed immediately by any other token is likely a
967-
// yield statement of some singular expression.
968-
return !self.peek().isAtStartOfLine
969-
}
970-
case .discard?:
971-
let next = peek()
972-
// The thing to be discarded must be on the same line as `discard`.
973-
if next.isAtStartOfLine {
974-
return false
975-
}
976-
switch next.rawTokenKind {
977-
case .identifier, .keyword:
978-
// Since some identifiers like "self" are classified as keywords,
979-
// we want to recognize those too, to handle "discard self". We also
980-
// accept any identifier since we want to emit a nice error message
981-
// later on during type checking.
982-
return true
983-
default:
984-
// any other token following "discard" means it's not the statement.
985-
// For example, could be the function call "discard()".
986-
return false
987-
}
988-
989-
case .then:
990-
return atStartOfThenStatement(preferExpr: preferExpr)
1065+
case .yield?, .discard?:
1066+
return atContextualKeywordPrefixedSyntax(exprFlavor: .basic, preferPostfixExpr: true)
1067+
case .then?:
1068+
return atContextualKeywordPrefixedSyntax(exprFlavor: .basic, preferPostfixExpr: false)
9911069

9921070
case nil:
9931071
return false
9941072
}
9951073
}
9961074

997-
/// Whether we're currently at a `then` token that should be parsed as a
998-
/// `then` statement.
999-
mutating func atStartOfThenStatement(preferExpr: Bool) -> Bool {
1000-
guard self.at(.keyword(.then)) else {
1001-
return false
1002-
}
1003-
1004-
// If we prefer an expr and aren't at the start of a newline, then don't
1005-
// parse a ThenStmt.
1006-
if preferExpr && !self.atStartOfLine {
1007-
return false
1008-
}
1009-
1010-
// If 'then' is followed by a binary or postfix operator, prefer to parse as
1011-
// an expr.
1012-
if peek(isAtAnyIn: BinaryOperatorLike.self) != nil || peek(isAtAnyIn: PostfixOperatorLike.self) != nil {
1013-
return false
1014-
}
1015-
1016-
switch PrepareForKeywordMatch(peek()) {
1017-
case TokenSpec(.is), TokenSpec(.as):
1018-
// Treat 'is' and 'as' like the binary operator case, and parse as an
1019-
// expr.
1020-
return false
1021-
1022-
case .leftBrace:
1023-
// This is a trailing closure.
1024-
return false
1025-
1026-
case .leftParen, .leftSquare, .period:
1027-
// These are handled based on whether there is trivia between the 'then'
1028-
// and the token. If so, it's a 'then' statement. Otherwise it should
1029-
// be treated as an expression, e.g `then(...)`, `then[...]`, `then.foo`.
1030-
return !self.currentToken.trailingTriviaText.isEmpty || !peek().leadingTriviaText.isEmpty
1031-
default:
1032-
break
1033-
}
1034-
return true
1035-
}
1036-
10371075
/// Returns whether the parser's current position is the start of a switch case,
10381076
/// given that we're in the middle of a switch already.
10391077
mutating func atStartOfSwitchCase(allowRecovery: Bool = false) -> Bool {

Tests/SwiftParserTest/ThenStatementTests.swift

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -297,13 +297,7 @@ final class ThenStatementTests: ParserTestCase {
297297
"""
298298
then1️⃣
299299
""",
300-
diagnostics: [
301-
DiagnosticSpec(
302-
message: "expected expression in 'then' statement",
303-
fixIts: ["insert expression"]
304-
)
305-
],
306-
fixedSource: "then <#expression#>"
300+
substructure: DeclReferenceExprSyntax(baseName: .identifier("then"))
307301
)
308302
}
309303

@@ -312,13 +306,7 @@ final class ThenStatementTests: ParserTestCase {
312306
"""
313307
then1️⃣;
314308
""",
315-
diagnostics: [
316-
DiagnosticSpec(
317-
message: "expected expression in 'then' statement",
318-
fixIts: ["insert expression"]
319-
)
320-
],
321-
fixedSource: "then <#expression#>;"
309+
substructure: DeclReferenceExprSyntax(baseName: .identifier("then"))
322310
)
323311
}
324312

@@ -342,7 +330,7 @@ final class ThenStatementTests: ParserTestCase {
342330
then
343331
0
344332
""",
345-
substructure: ThenStmtSyntax(expression: IntegerLiteralExprSyntax(0))
333+
substructure: DeclReferenceExprSyntax(baseName: .identifier("then"))
346334
)
347335
}
348336

@@ -685,7 +673,10 @@ final class ThenStatementTests: ParserTestCase {
685673
then
686674
.foo
687675
""",
688-
substructure: ThenStmtSyntax(expression: MemberAccessExprSyntax(name: .identifier("foo")))
676+
substructure: MemberAccessExprSyntax(
677+
base: DeclReferenceExprSyntax(baseName: .identifier("then")),
678+
name: .identifier("foo")
679+
)
689680
)
690681
}
691682

0 commit comments

Comments
 (0)