Skip to content

Commit

Permalink
Drop some compatibility and tighten up lexing
Browse files Browse the repository at this point in the history
Old versions of the C tools used to ignore unhandled characters in some
contexts due to sloppy lexing, which Kconfiglib emulated for
compatibility (things like "---help---" used to depend on it).

This was improved in the C tools by commit c2264564 ("kconfig: warn of
unhandled characters in Kconfig commands"), committed in July 2015.

Remove the compatibility hack and tighten up the lexing in Kconfiglib as
well. It will make implementing the new preprocessor stuff smoother.

The major version will be bumped.
  • Loading branch information
ulfalizer committed Jul 1, 2018
1 parent fccfbae commit c19fc11
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 37 deletions.
50 changes: 14 additions & 36 deletions kconfiglib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1584,28 +1584,21 @@ def _tokenize(self):
# Token index (minus one). Set for later -- not further updated here.
self._tokens_i = -1

# See comment at _initial_token_re_match definition
initial_token_match = _initial_token_re_match(s)
if not initial_token_match:
# Initial token on the line
command_match = _command_re_match(s)
if not command_match:
self._tokens = (None,)
return

# Tricky implementation detail: While parsing a token, 'token' refers
# to the previous token. See _STRING_LEX for why this is needed.
token = _get_keyword(initial_token_match.group(1))

if token == _T_HELP:
# Avoid junk after "help", e.g. "---", being registered as a
# symbol
self._tokens = (token, None)
return

token = _get_keyword(command_match.group(1))
if token is None:
self._parse_error("expected keyword as first token")

self._tokens = [token]
# The current index in the string being tokenized
i = initial_token_match.end()
i = command_match.end()

# Main tokenization loop (for tokens past the first one)
while i < len(s):
Expand Down Expand Up @@ -1712,17 +1705,15 @@ def _tokenize(self):
self._lookup_const_sym(val)

elif c == "&":
# Invalid characters are ignored (backwards-compatible)
if i >= len(s) or s[i] != "&":
continue
self._parse_error("malformed operator")

token = _T_AND
i += 1

elif c == "|":
# Invalid characters are ignored (backwards-compatible)
if i >= len(s) or s[i] != "|":
continue
self._parse_error("malformed operator")

token = _T_OR
i += 1
Expand Down Expand Up @@ -1763,8 +1754,7 @@ def _tokenize(self):
token = _T_GREATER

else:
# Invalid characters are ignored (backwards-compatible)
continue
self._parse_error("invalid character in line")

# Skip trailing whitespace
while i < len(s) and s[i].isspace():
Expand Down Expand Up @@ -5317,6 +5307,7 @@ def _warn_choice_select_imply(sym, expr, expr_type):
# Keyword to token map, with the get() method assigned directly as a small
# optimization
_get_keyword = {
"---help---": _T_HELP,
"allnoconfig_y": _T_ALLNOCONFIG_Y,
"bool": _T_BOOL,
"boolean": _T_BOOL,
Expand Down Expand Up @@ -5393,25 +5384,12 @@ def _warn_choice_select_imply(sym, expr, expr_type):
# Use ASCII regex matching on Python 3. It's already the default on Python 2.
_RE_ASCII = 0 if _IS_PY2 else re.ASCII

# Note: This hack is no longer needed as of upstream commit c226456
# (kconfig: warn of unhandled characters in Kconfig commands). It
# is kept around for backwards compatibility.
#
# The initial word on a line is parsed specially. Let
# command_chars = [A-Za-z0-9_]. Then
# - leading non-command_chars characters are ignored, and
# - the first token consists the following one or more
# command_chars characters.
# This is why things like "----help--" are accepted.
#
# In addition to the initial token, the regex also matches trailing whitespace
# so that we can jump straight to the next token (or to the end of the line if
# there's just a single token).
# The initial token on a line. Also eats leading and trailing whitespace, so
# that we can jump straight to the next token (or to the end of the line if
# there is only one token).
#
# As an optimization, this regex fails to match for lines containing just a
# comment.
_initial_token_re_match = \
re.compile(r"[^A-Za-z0-9_#]*([A-Za-z0-9_]+)\s*", _RE_ASCII).match
# This regex will also fail to match for empty lines and comment lines.
_command_re_match = re.compile(r"\s*([A-Za-z0-9_-]+)\s*", _RE_ASCII).match

# Matches an identifier/keyword, also eating trailing whitespace
_id_keyword_re_match = re.compile(r"([A-Za-z0-9_/.-]+)\s*", _RE_ASCII).match
Expand Down
2 changes: 1 addition & 1 deletion tests/Krepr
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ endif
config BASIC
bool
default y
help
---help---

config VISIBLE
bool "visible"
Expand Down

0 comments on commit c19fc11

Please sign in to comment.