Skip to content

Commit

Permalink
fix(parsing): improve regex patterns for DevInParser and DevInLexer t…
Browse files Browse the repository at this point in the history
…o support more complex identifiers and whitespace handling #101

The commit addresses issues with the DevInParser and DevInLexer in the ext/devin-lang package. It introduces more flexible regex patterns for identifiers, allowing characters other than the initial '$', '@', or '/'. Additionally, the commit refactors the lexer to handle whitespace more efficiently, using a dedicated WHITE_SPACE token type. The parser definition is updated to leverage the new token types, and the build script is modified to reflect the changes in the generated parser and lexer classes. Finally, a test file is updated to demonstrate the new parsing capabilities.
  • Loading branch information
phodal committed Mar 11, 2024
1 parent b2bfd1b commit 7ea39e3
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 23 deletions.
5 changes: 1 addition & 4 deletions build.gradle.kts
Expand Up @@ -600,15 +600,12 @@ project(":exts:devin-lang") {
tasks {
generateLexer {
sourceFile.set(file("src/grammar/DevInLexer.flex"))
// targetDir.set("src/gen/com/feakin/intellij/lexer")
targetOutputDir.set(file("src/gen/cc/unitmesh/language/lexer"))
// targetClass.set("_FeakinLexer")
targetOutputDir.set(file("src/gen/cc/unitmesh/language"))
purgeOldFiles.set(true)
}

generateParser {
sourceFile.set(file("src/grammar/DevInParser.bnf"))
// targetRoot.set("src/gen")
targetRootOutputDir.set(file("src/gen"))
pathToParser.set("cc/unitmesh/language/parser/DevInParser.java")
pathToPsiRoot.set("cc/unitmesh/language/psi")
Expand Down
30 changes: 18 additions & 12 deletions exts/devin-lang/src/grammar/DevInLexer.flex
@@ -1,9 +1,8 @@
// Copyright 2000-2022 JetBrains s.r.o. and other contributors. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file.
package cc.unitmesh.language;

import com.intellij.lexer.FlexLexer;
import com.intellij.psi.tree.IElementType;
import cc.unitmesh.language.psi.DevInTypes;
import static cc.unitmesh.language.psi.DevInTypes.*;
import com.intellij.psi.TokenType;

%%
Expand All @@ -23,20 +22,27 @@ import com.intellij.psi.TokenType;
%eof{ return;
%eof}

CRLF=\R
WHITE_SPACE=[\ \n\t\f]
// $ variable
STRING=\"([^\\\"\r\n]|\\[^\r\n])*\"?
IDENTIFIER=[_a-zA-Z][_a-zA-Z0-9]*
EOL=\R
WHITE_SPACE=\s+

%state WAITING_VALUE
IDENTIFIER=[_a-zA-Z][_a-zA-Z0-9]*
TEXT_SEGMENT=[^[@\\$]_a-zA-Z0-9]+
WS=[ \t\n\x0B\f\r]
NEWLINE=\n|\r\n

%%
<YYINITIAL> {
{STRING} { return DevInTypes.STRING; }
{IDENTIFIER} { return DevInTypes.IDENTIFIER; }
}
{WHITE_SPACE} { return TokenType.WHITE_SPACE; }

({CRLF}|{WHITE_SPACE})+ { yybegin(YYINITIAL); return TokenType.WHITE_SPACE; }
"$" { return DOLLAR; }
"@" { return AT; }
"/" { return SLASH; }

{IDENTIFIER} { return IDENTIFIER; }
{TEXT_SEGMENT} { return TEXT_SEGMENT; }
{WS} { return WS; }
{NEWLINE} { return NEWLINE; }

}

[^] { return TokenType.BAD_CHARACTER; }
17 changes: 12 additions & 5 deletions exts/devin-lang/src/grammar/DevInParser.bnf
Expand Up @@ -18,18 +18,25 @@
DOLLAR = '$'
AT = '@'
SLASH = '/'
// char should be any character except start with $, @, /
IDENTIFIER = 'regexp:[_a-zA-Z][_a-zA-Z0-9]*'
TEXT_SEGMENT = 'regexp:[^[@\\$]_a-zA-Z0-9]+'

WS = 'regexp:\s'

NEWLINE = 'regexp:\n|\r\n'
]
}

DevInFile ::= item_*
DevInFile ::= item*

item_ ::= (useVariable|useAgent|useCommand|STRING)
private item ::= (useVariable|useAgent|useCommand|TEXT_SEGMENT|NEWLINE)

useVariable ::= '$' IDENTIFIER
// $use-variable
useVariable ::= DOLLAR IDENTIFIER WS*

// @use-agent
useAgent ::= '@' IDENTIFIER
useAgent ::= AT IDENTIFIER WS*

// /use-command
useCommand ::= '/' IDENTIFIER
useCommand ::= SLASH IDENTIFIER WS*
Expand Up @@ -2,6 +2,7 @@ package cc.unitmesh.language

import cc.unitmesh.language.parser.DevInParser
import cc.unitmesh.language.psi.DevInFile
import cc.unitmesh.language.psi.DevInTypes
import com.intellij.lang.ASTNode
import com.intellij.lang.ParserDefinition
import com.intellij.lang.PsiParser
Expand Down Expand Up @@ -44,7 +45,7 @@ internal class DevInParserDefinition : ParserDefinition {

@NotNull
override fun createElement(node: ASTNode?): PsiElement {
TODO()
return DevInTypes.Factory.createElement(node)
}

companion object {
Expand Down
2 changes: 1 addition & 1 deletion exts/devin-lang/src/test/testData/parser/BasicTest.devin
@@ -1 +1 @@
Gen hello, world @gen
@gen what's this?
12 changes: 12 additions & 0 deletions exts/devin-lang/src/test/testData/parser/BasicTest.txt
@@ -0,0 +1,12 @@
FILE
DevInUseAgentImpl(USE_AGENT)
PsiElement(DevInTokenType.@)('@')
PsiElement(DevInTokenType.IDENTIFIER)('gen')
PsiWhiteSpace(' ')
PsiErrorElement:DevInTokenType.$, DevInTokenType./, DevInTokenType.@, DevInTokenType.NEWLINE, DevInTokenType.TEXT_SEGMENT or DevInTokenType.WS expected, got 'what'
PsiElement(DevInTokenType.IDENTIFIER)('what')
PsiElement(DevInTokenType.TEXT_SEGMENT)(''')
PsiElement(DevInTokenType.IDENTIFIER)('s')
PsiWhiteSpace(' ')
PsiElement(DevInTokenType.IDENTIFIER)('this')
PsiElement(DevInTokenType.TEXT_SEGMENT)('?')

0 comments on commit 7ea39e3

Please sign in to comment.