Skip to content

PHP lexer generation ommits semicolon in switch case statement #4788

Open
@Debdutta-Panda

Description

@Debdutta-Panda

Below is the generated code(problematic part only)
private function actionDATE(?RuleContext $localContext, int $actionIndex): void
{
switch ($actionIndex) {
case 0:
4//here the semicolon missed

			break;

			case 1:
			2//here the semicolon missed

			break;

			case 2:
			2//here the semicolon missed

			break;
		}
	}
	private function actionTIME(?RuleContext $localContext, int $actionIndex): void
	{
		switch ($actionIndex) {
			case 3:
			2//here the semicolon missed

			break;

			case 4:
			2//here the semicolon missed

			break;

			case 5:
			2//here the semicolon missed

			break;
		}
	}

It is happening for the following grammar
grammar flux;

// =================== Lexer Rules ===================

// Keywords
BOOLEAN_LITERAL: 'true' | 'false';
IF : 'if';
ELSE : 'else';
THEN : 'then';
FOR : 'for';
IN : 'in';
FUNCTION : 'function';
RETURN : 'return';
PROGRAM : 'program';
DEFAULT : 'default';

// Operators
PLUS : '+';
MINUS : '-';
MULTIPLY : '*';
DIVIDE : '/';
MODULO : '%';
EQUALS : '==';
NOT_EQUAL : '!=';
GREATER : '>';
LESS : '<';
GREATER_EQ : '>=';
LESS_EQ : '<=';
AND : 'and';
OR : 'or';
QUESTION : '?';
COLON : ':';
SEMICOLON : ';';
COMMA : ',';
DOT : '.';
LBRACE : '{';
RBRACE : '}';
LPAREN : '(';
RPAREN : ')';
LBRACKET : '[';
RBRACKET : ']';
IMPLIES : '=>';
INCREMENT : '++';
DECREMENT : '--';
NEGATION : '!';

// Literals
NUMBER : [0-9]+ ('.' [0-9]+)?;
STRING : '"' (~["\] | '\' .)* '"';
DATE : [0-9]{4} '' [0-9]{2} '' [0-9]{2};
TIME : [0-9]{2} ':' [0-9]{2} (':' [0-9]{2})?;
DATETIME : DATE 'T' TIME;
PERCENTAGE : NUMBER '%';
WEEKDAY : 'Monday' | 'Tuesday' | 'Wednesday' | 'Thursday' | 'Friday' | 'Saturday' | 'Sunday';
MONTH : 'January' | 'February' | 'March' | 'April' | 'May' | 'June' |
'July' | 'August' | 'September' | 'October' | 'November' | 'December';
RELATIVE_DATE : 'today' | 'tomorrow' | 'yesterday' | 'prevMonth' | 'nextMonth' |
'prevYear' | 'nextYear' | 'prevWeek' | 'nextWeek';
BREAK : 'break' [0-9]* ; // Exit N levels of loops
CONTINUE : 'continue' ; // Skips the current iteration of the innermost loop

// Identifiers
IDENTIFIER : [a-zA-Z_][a-zA-Z0-9_]*;

// Whitespace & Comments
WS : [ \t\r\n]+ -> skip;
COMMENT : '#' ~[\r\n]* -> skip;

// =================== Parser Rules ===================
program: block EOF;
block : (statement SEMICOLON?)* ;

statement
: assignment #assignmentItem
| ifStatement #ifStatementItem
| loop #loopItem
| functionDefinition #functionDefinitionItem
| functionCall #functionCallAsStatementItem
| switchCase #switchCaseItem
| returnStatement #returnStatementItem
| expression #expressionItem
;

ASSIGNMENT_OPERATOR
: '='
| '+='
| '-='
| '*='
| '/='
| '%='
| '&='
| '|='
;

assignment : IDENTIFIER ASSIGNMENT_OPERATOR expression ;

// Refactored expression to eliminate left recursion
expression
: logicalOrExpression
;

logicalOrExpression
: logicalAndExpression (OR logicalAndExpression)*
;

logicalAndExpression
: equalityExpression (AND equalityExpression)*
;

equalityExpression
: relationalExpression ((EQUALS | NOT_EQUAL) relationalExpression)?
;

relationalExpression
: left=additiveExpression (operator=(GREATER | LESS | GREATER_EQ | LESS_EQ) right=additiveExpression)?
;

additiveExpression
: multiplicativeExpression ((PLUS | MINUS) multiplicativeExpression)*
;

multiplicativeExpression
: unaryExpression ((MULTIPLY | DIVIDE | MODULO) unaryExpression)*
;

unaryExpression
: primaryExpression #primaryExpressionItem
| operator=(PLUS | MINUS | INCREMENT | DECREMENT | NEGATION) unary=unaryExpression #unaryOprationItem
;

primaryExpression
: literal #literalItem
| IDENTIFIER #identifierItem
| LPAREN expression RPAREN #parenthesizedExpression
| functionCall #functionCallItem
| objectLiteral #objectLiteralItem
| objectAccess #objectAccessItem
| ifStatement #ifStatementExpressionItem
| functionDefinition #functionDefinitionExpressionItem
| switchCase #switchCaseStatementExpressionItem
| rangeExpression #rangeExpressionItem
;

literal
: NUMBER #numberItem
| STRING #stringItem
| INTERPOLATED_STRING #interpolatedStringItem
| DATE #dateItem
| TIME #timeItem
| DATETIME #dateTimeItem
| PERCENTAGE #percentageItem
| WEEKDAY #weekdayItem
| MONTH #monthItem
| RELATIVE_DATE #relativDateItem
| BOOLEAN_LITERAL #booleanLiteralItem
;

rangeExpression : LPAREN expression '...' expression RPAREN // Open interval
| LBRACKET expression '...' expression RBRACKET // Closed interval
| LPAREN expression '...' expression RBRACKET // Half-open interval
| LBRACKET expression '...' expression RPAREN // Half-open interval
| LBRACKET expression '...' expression RBRACKET // Half-open interval
| array // Half-open interval
;
array: LBRACKET (expression (COMMA expression)*)? RBRACKET;
//braced_block: LBRACE block RBRACE;

ifStatement : IF ifexpr=expression THEN LBRACE ifblock=block RBRACE elseIfItems=elseIfStatement* (ELSE LBRACE elseblock=block RBRACE)? ;
elseIfStatement: ELSE IF ifexpr=expression THEN LBRACE ifblock=block RBRACE;
loop : FOR IDENTIFIER IN expression ('step' expression)? LBRACE block RBRACE ;

functionDefinition : FUNCTION IDENTIFIER? LPAREN (expression (COMMA expression)*)? RPAREN LBRACE block RBRACE ;

functionCallIdentifier
: IDENTIFIER
| (LPAREN expression RPAREN)
;

functionCall : functionCallIdentifier LPAREN (expression (COMMA expression)*)? RPAREN ;

switchCase : 'switch' LBRACE
switchCaseItems*
switchCaseDefault?
RBRACE;
switchCaseDefault: (DEFAULT IMPLIES switchCaseResult);
switchCaseItems: (condition=expression IMPLIES result=switchCaseResult);
switchCaseResult: ((LBRACE block RBRACE) | expression);

returnStatement : RETURN expression ;

// Object Literal
objectLiteral : LBRACE (objectProperty (COMMA objectProperty)*)? RBRACE ;

objectProperty : IDENTIFIER COLON expression ;

// Object Access
objectAccessStart
: literal
| IDENTIFIER
| functionCall
| (LPAREN expression RPAREN)
| objectLiteral
| switchCase
| rangeExpression
;
objectAccessNext
: IDENTIFIER
;

objectAccessSuffix: ((LBRACKET expression RBRACKET)|(LPAREN (expression (COMMA expression))? RPAREN));
objectAccess
: (objectAccessStart objectAccessSuffix
(DOT objectAccessNext objectAccessSuffix*)+) // e.g., obj.property or obj["property"]
;

fragment ESCAPED_BACKTICK : '\' ; fragment ESCAPED_CHAR : '\\' . ; fragment STRING_CONTENT : ~[\{}] | ESCAPED_CHAR ;

INTERPOLATED_STRING : '' (STRING_CONTENT | '{{' IDENTIFIER '}}')* '' ;

I am generating php lexer and parser by the following command
java -jar antlr-4.13.2-complete.jar -Dlanguage=PHP ./flux/flux.g4

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions