Skip to content

Commit

Permalink
feat: add preprocessor
Browse files Browse the repository at this point in the history
  • Loading branch information
vberlier committed Aug 3, 2023
1 parent 7917a5e commit 2a5573c
Show file tree
Hide file tree
Showing 4 changed files with 220 additions and 41 deletions.
86 changes: 85 additions & 1 deletion tests/test_stream.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
import re
from itertools import islice

import pytest

from tokenstream import TokenStream, UnexpectedToken
from tokenstream import (
INITIAL_LOCATION,
SourceLocation,
Token,
TokenStream,
UnexpectedToken,
)


def test_basic():
Expand Down Expand Up @@ -321,3 +330,78 @@ def test_eof():
stream.expect_eof()

stream.expect_eof()


WRAP_REGEX = re.compile(r"(\\[ \t]*\r?\n[ \t]*)")


def wrap_lines(source: str) -> tuple[str, list[SourceLocation], list[SourceLocation]]:
it = iter(WRAP_REGEX.split(source))
text = next(it)

result = [text]
source_mappings: list[SourceLocation] = []
preprocessed_mappings: list[SourceLocation] = []

source_location = INITIAL_LOCATION.skip_over(text)
preprocessed_location = source_location

while True:
try:
backslash, text = islice(it, 2)
except ValueError:
break

source_location = source_location.skip_over(backslash)
source_mappings.append(source_location)
preprocessed_mappings.append(preprocessed_location)

result.append(text)
source_location = source_location.skip_over(text)
preprocessed_location = preprocessed_location.skip_over(text)

return "".join(result), source_mappings, preprocessed_mappings


def test_wrap_line():
source = r"""
hello\
world
f\
o\
o
bar
"""

expected_preprocessing = """
helloworld
foo
bar
"""

stream = TokenStream(source, preprocessor=wrap_lines)
assert stream.preprocessed_source == expected_preprocessing

with stream.syntax(word=r"\w+"):
assert list(stream) == [
Token(
type="word",
value="helloworld",
location=SourceLocation(pos=9, lineno=2, colno=9),
end_location=SourceLocation(pos=29, lineno=3, colno=14),
),
Token(
type="word",
value="foo",
location=SourceLocation(pos=38, lineno=4, colno=9),
end_location=SourceLocation(pos=61, lineno=6, colno=10),
),
Token(
type="word",
value="bar",
location=SourceLocation(pos=71, lineno=8, colno=9),
end_location=SourceLocation(pos=74, lineno=8, colno=12),
),
]
6 changes: 3 additions & 3 deletions tokenstream/error.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
]


from .location import SourceLocation
from .location import INITIAL_LOCATION, SourceLocation
from .token import Token, TokenPattern, explain_patterns


Expand All @@ -31,8 +31,8 @@ class InvalidSyntax(Exception):

def __init__(self, *args: object) -> None:
super().__init__(*args)
self.location = SourceLocation(0, 1, 1)
self.end_location = SourceLocation(0, 1, 1)
self.location = INITIAL_LOCATION
self.end_location = INITIAL_LOCATION
self.alternatives = {}
self.notes = []

Expand Down
68 changes: 66 additions & 2 deletions tokenstream/location.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
__all__ = [
"SourceLocation",
"set_location",
"INITIAL_LOCATION",
"UNKNOWN_LOCATION",
]


from bisect import bisect
from dataclasses import FrozenInstanceError, replace
from typing import Any, NamedTuple, TypeVar
from typing import Any, NamedTuple, Sequence, TypeVar

T = TypeVar("T")

Expand Down Expand Up @@ -39,14 +41,76 @@ def format(self, filename: str, message: str) -> str:
def with_horizontal_offset(self, offset: int) -> "SourceLocation":
"""Create a modified source location along the horizontal axis.
>>> SourceLocation(0, 1, 1).with_horizontal_offset(41)
>>> INITIAL_LOCATION.with_horizontal_offset(41)
SourceLocation(pos=41, lineno=1, colno=42)
"""
if self.unknown:
return self
return SourceLocation(self.pos + offset, self.lineno, self.colno + offset)

def skip_over(self, value: str) -> "SourceLocation":
"""Return the source location after skipping over a piece of text.
>>> INITIAL_LOCATION.skip_over("hello\\nworld")
SourceLocation(pos=11, lineno=2, colno=6)
"""
return SourceLocation(
self.pos + len(value),
self.lineno + value.count("\n"),
self.colno + len(value)
if (line_start := value.rfind("\n")) == -1
else len(value) - line_start,
)

def map(
self,
input_mappings: Sequence["SourceLocation"],
output_mappings: Sequence["SourceLocation"],
) -> "SourceLocation":
"""Map a source location.
The mappings must contain corresponding source locations in order.
>>> INITIAL_LOCATION.map([], [])
SourceLocation(pos=0, lineno=1, colno=1)
>>> mappings1 = [SourceLocation(16, 2, 27), SourceLocation(19, 2, 30)]
>>> mappings2 = [SourceLocation(24, 3, 8), SourceLocation(67, 4, 12)]
>>> INITIAL_LOCATION.map(mappings1, mappings2)
SourceLocation(pos=0, lineno=1, colno=1)
>>> SourceLocation(15, 2, 26).map(mappings1, mappings2)
SourceLocation(pos=15, lineno=2, colno=26)
>>> SourceLocation(16, 2, 27).map(mappings1, mappings2)
SourceLocation(pos=24, lineno=3, colno=8)
>>> SourceLocation(18, 2, 29).map(mappings1, mappings2)
SourceLocation(pos=26, lineno=3, colno=10)
>>> SourceLocation(19, 2, 30).map(mappings1, mappings2)
SourceLocation(pos=67, lineno=4, colno=12)
>>> SourceLocation(31, 3, 6).map(mappings1, mappings2)
SourceLocation(pos=79, lineno=5, colno=6)
"""
index = bisect(input_mappings, self) - 1
if index < 0:
return self
return self.relocate(input_mappings[index], output_mappings[index])

def relocate(
self,
base_location: "SourceLocation",
target_location: "SourceLocation",
) -> "SourceLocation":
"""Return the current location transformed relative to the target location."""
pos, lineno, colno = self

pos = target_location.pos + (pos - base_location.pos)
lineno = target_location.lineno + (lineno - base_location.lineno)

if lineno == target_location.lineno:
colno = target_location.colno + (colno - base_location.colno)

return SourceLocation(pos, lineno, colno)


INITIAL_LOCATION = SourceLocation(pos=0, lineno=1, colno=1)
UNKNOWN_LOCATION = SourceLocation(pos=-1, lineno=0, colno=0)


Expand Down

0 comments on commit 2a5573c

Please sign in to comment.