Skip to content

Commit

Permalink
Fix: enable Python 3.12 to parse templates (#785)
Browse files Browse the repository at this point in the history
* Fix: enable Python 3.12 to parse templates

Closes #784.

This PR fixes the inability of `web.py` to run on Python 3.12, and it
does so by matching any unmatched `"` in a string.

In Python 3.12, changes to `tokenize.generate_tokens()` requires all
quotes to be matched, and the `web.py` template parser hands
unterminated strings to `tokenize.generate_tokens()` quite frequently.

This PR makes a tacit assumption that we should avoid rewriting the
template parser if possible while also allowing `web.py` to run on
Python 3.12.

To that end, it handles every `TokenError` that is because of unmatched
string literals by adding a `"` to the line. Although this appears to
work, I am aware that appending a `"` to each line that has an unmatched
`"` is less than ideal. However, it seemed a very easy way to avoid
rewriting the template parser. I am absolutely open to other approaches.

*IF* this approach looks as if it has legs, I can can look more into
where that "extra" `"` goes, why it seems not to matter, and write some
unit tests.

With regard to testing, I tested this with as many pages on Open Library
that I could (via the local development environment), and it seems to
work.

* noqa: complexity and excess statements for read_expr()

* Add suggestions from @tfmorris.

* Switch to more-itertools.peekable

 and set linter limits back down again

* Fix off-by-1 error and add test coverage

* Linting fixes

* Add `more_itertools` as a dependency

`more_itertools.peekable()` is used now.

`cheroot` already requires `more_itertools`, so this depedency is
already installed anyway, but adding it as a dependency for `webpy`
itself will ensure that if `cheroot` drops the `more_itertools`,
dependency, `webpy` will still require it.

---------

Co-authored-by: Tom Morris <tfmorris@gmail.com>
  • Loading branch information
scottbarnes and tfmorris committed Feb 21, 2024
1 parent 5709b1f commit d364932
Show file tree
Hide file tree
Showing 9 changed files with 121 additions and 48 deletions.
3 changes: 1 addition & 2 deletions pyproject.toml
Expand Up @@ -83,14 +83,13 @@ show-source = true
target-version = "py38" target-version = "py38"


[tool.ruff.mccabe] [tool.ruff.mccabe]
max-complexity = 26 max-complexity = 20


[tool.ruff.pylint] [tool.ruff.pylint]
allow-magic-value-types = ["int", "str"] allow-magic-value-types = ["int", "str"]
max-args = 9 # default is 5 max-args = 9 # default is 5
max-branches = 17 # default is 12 max-branches = 17 # default is 12
max-returns = 8 # default is 6 max-returns = 8 # default is 6
max-statements = 51 # default is 50


[tool.codespell] [tool.codespell]
ignore-words-list = "asend,gae" ignore-words-list = "asend,gae"
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
@@ -1,2 +1,3 @@
cheroot>=6.0.0 cheroot>=6.0.0
more_itertools>=2.6
multipart>=0.2.4 multipart>=0.2.4
64 changes: 63 additions & 1 deletion tests/test_template.py
@@ -1,7 +1,14 @@
import unittest import unittest


import web import web
from web.template import SecurityError, Template from web.template import ExpressionNode, Parser, SecurityError, Template


class TestItem:
__test__ = False # silence collection warning from test framework

def __init__(self):
self.id = 12345




class _TestResult: class _TestResult:
Expand Down Expand Up @@ -43,6 +50,61 @@ def test_overridden(self):
f = t(tpl, globals={"print": lambda x: x}) f = t(tpl, globals={"print": lambda x: x})
assert repr(f()) == "'blah\\n'" assert repr(f()) == "'blah\\n'"


def test_quotes(self):
template = 'a="$foo" <p>'
f = t(template, globals={"foo": "bar"})
assert repr(f()) == "'a=\"bar\" <p>\\n'"

def test_accessor(self):
template = 'a="$foo.id"<p>'
f = t(template, globals={"foo": TestItem()})
assert repr(f()) == "'a=\"12345\"<p>\\n'"

def test_href(self):
template = '<a href="/del/$item.id">Delete</a>'
f = t(template, globals={"item": TestItem()})
assert repr(f()) == "'<a href=\"/del/12345\">Delete</a>\\n'"


class TestParser(unittest.TestCase):
"""
Test the Parser.
Tests functions from the Parser class as if the following template were loaded:
test_template = '''$def with (back, docs)
$var title: Index
<p><a href="$back">&larr; Back to Index</a></p>
<ul>
$for path, title in docs:
<li><a href="$path">$title</a></li>
</ul>
'''
"""

def test_read_expr(self) -> None:
"""
Test Parser.read_expr() with the `text` values it would get from
`Parser.read_node(), if processing `test_template`.
"""
got = Parser().read_expr('back">&larr; Back to Index</a></p>\n')
expression_node = got[0]
assert isinstance(expression_node, ExpressionNode)
assert repr(expression_node) == "$back"
assert got[1] == '">&larr; Back to Index</a></p>\n'

got = Parser().read_expr('path">$title</a></li>\n')
expression_node = got[0]
assert isinstance(expression_node, ExpressionNode)
assert repr(expression_node) == "$path"
assert got[1] == '">$title</a></li>\n'

got = Parser().read_expr("title</a></li>\n")
expression_node = got[0]
assert isinstance(expression_node, ExpressionNode)
assert repr(expression_node) == "$title"
assert got[1] == "</a></li>\n"



class TestRender: class TestRender:
def test_template_without_ext(self, tmpdir): def test_template_without_ext(self, tmpdir):
Expand Down
3 changes: 2 additions & 1 deletion web/browser.py
@@ -1,6 +1,7 @@
"""Browser to test web applications. """Browser to test web applications.
(from web.py) (from web.py)
""" """

import os import os
import webbrowser import webbrowser
from http.cookiejar import CookieJar from http.cookiejar import CookieJar
Expand Down Expand Up @@ -46,7 +47,7 @@ def reset(self):


def build_opener(self): def build_opener(self):
"""Builds the opener using (urllib2/urllib.request).build_opener. """Builds the opener using (urllib2/urllib.request).build_opener.
Subclasses can override this function to prodive custom openers. Subclasses can override this function to provide custom openers.
""" """
return urllib_build_opener() return urllib_build_opener()


Expand Down
1 change: 1 addition & 0 deletions web/contrib/template.py
@@ -1,6 +1,7 @@
""" """
Interface to various templating engines. Interface to various templating engines.
""" """

import os.path import os.path


__all__ = ["render_cheetah", "render_genshi", "render_mako", "cache"] __all__ = ["render_cheetah", "render_genshi", "render_mako", "cache"]
Expand Down
1 change: 1 addition & 0 deletions web/db.py
Expand Up @@ -2,6 +2,7 @@
Database API Database API
(part of web.py) (part of web.py)
""" """

import ast import ast
import datetime import datetime
import os import os
Expand Down
1 change: 0 additions & 1 deletion web/net.py
Expand Up @@ -3,7 +3,6 @@
(from web.py) (from web.py)
""" """



import datetime import datetime
import re import re
import socket import socket
Expand Down
94 changes: 51 additions & 43 deletions web/template.py
Expand Up @@ -31,11 +31,15 @@
import ast import ast
import builtins import builtins
import glob import glob
import itertools
import os import os
import sys import sys
import token
import tokenize import tokenize
from functools import partial from functools import partial


from more_itertools import peekable

from .net import websafe from .net import websafe
from .utils import re_compile, safestr, safeunicode, storage from .utils import re_compile, safestr, safeunicode, storage
from .webapi import config from .webapi import config
Expand Down Expand Up @@ -242,7 +246,7 @@ def read_keyword(self, text):
line, text = splitline(text) line, text = splitline(text)
return StatementNode(line.strip() + "\n"), text return StatementNode(line.strip() + "\n"), text


def read_expr(self, text, escape=True): def read_expr(self, text, escape=True): # noqa: C901, PLR0915
"""Reads a python expression from the text and returns the expression and remaining text. """Reads a python expression from the text and returns the expression and remaining text.
expr -> simple_expr | paren_expr expr -> simple_expr | paren_expr
Expand Down Expand Up @@ -271,10 +275,10 @@ def simple_expr():
extended_expr() extended_expr()


def identifier(): def identifier():
next(tokens) return next(tokens)


def extended_expr(): def extended_expr():
lookahead = tokens.lookahead() lookahead = tokens.peek()
if lookahead is None: if lookahead is None:
return return
elif lookahead.value == ".": elif lookahead.value == ".":
Expand All @@ -288,7 +292,7 @@ def extended_expr():
def attr_access(): def attr_access():
from token import NAME # python token constants from token import NAME # python token constants


if tokens.lookahead2().type == NAME: if tokens[1].type == NAME:
next(tokens) # consume dot next(tokens) # consume dot
identifier() identifier()
extended_expr() extended_expr()
Expand All @@ -297,7 +301,7 @@ def paren_expr():
begin = next(tokens).value begin = next(tokens).value
end = parens[begin] end = parens[begin]
while True: while True:
if tokens.lookahead().value in parens: if tokens.peek().value in parens:
paren_expr() paren_expr()
else: else:
t = next(tokens) t = next(tokens)
Expand All @@ -306,57 +310,61 @@ def paren_expr():


parens = {"(": ")", "[": "]", "{": "}"} parens = {"(": ")", "[": "]", "{": "}"}


def get_tokens(text): def get_tokens(text: str):
"""tokenize text using python tokenizer. """tokenize text using python tokenizer.
Python tokenizer ignores spaces, but they might be important in some cases. Python tokenizer ignores spaces, but they might be important in some cases.
This function introduces dummy space tokens when it identifies any ignored space. This function introduces dummy space tokens when it identifies any ignored space.
Each token is a storage object containing type, value, begin and end. Each token is a storage object containing type, value, begin and end.
""" """
i = iter([text])
readline = lambda: next(i)
end = None
for t in tokenize.generate_tokens(readline):
t = storage(type=t[0], value=t[1], begin=t[2], end=t[3])
if end is not None and end != t.begin:
_, x1 = end
_, x2 = t.begin
yield storage(type=-1, value=text[x1:x2], begin=end, end=t.begin)
end = t.end
yield t

class BetterIter:
"""Iterator like object with 2 support for 2 look aheads."""

def __init__(self, items):
self.iteritems = iter(items)
self.items = []
self.position = 0
self.current_item = None


def lookahead(self): def tokenize_text(input_text):
if len(self.items) <= self.position: i = iter([input_text])
self.items.append(self._next()) readline = lambda: next(i)
return self.items[self.position] end = None
for t in tokenize.generate_tokens(readline):
t = storage(type=t[0], value=t[1], begin=t[2], end=t[3])
if end is not None and end != t.begin:
_, x1 = end
_, x2 = t.begin
yield storage(
type=-1, value=input_text[x1:x2], begin=end, end=t.begin
)
end = t.end
yield t

try:
yield from tokenize_text(text)
except tokenize.TokenError as e:
# Things like unterminated string literals or EOF in multi-line literals will raise exceptions
# tokenize the error free portion, then return an error token with the rest of the text
error_pos = e.args[1][1] - 1
fixed_text = text[0:error_pos]
yield from itertools.chain(
tokenize_text(fixed_text),
error_token_generator(text, error_pos + 1, len(text)),
)

def error_token_generator(text, start, end):
yield storage(
type=token.ERRORTOKEN, value=text[start:], begin=start, end=end
)


def _next(self): class peekable2(peekable):
try: """
return next(self.iteritems) A peekable class which caches the last item returned by next()
except StopIteration: """
return None


def lookahead2(self): def __init__(self, iterable):
if len(self.items) <= self.position + 1: super().__init__(iterable)
self.items.append(self._next()) self.current_item = None
return self.items[self.position + 1]


def __next__(self): def __next__(self):
self.current_item = self.lookahead() self.current_item = super().__next__()
self.position += 1
return self.current_item return self.current_item


tokens = BetterIter(get_tokens(text)) tokens = peekable2(get_tokens(text))


if tokens.lookahead().value in parens: if tokens.peek().value in parens:
paren_expr() paren_expr()
else: else:
simple_expr() simple_expr()
Expand Down
1 change: 1 addition & 0 deletions web/test.py
@@ -1,6 +1,7 @@
"""test utilities """test utilities
(part of web.py) (part of web.py)
""" """

import doctest import doctest
import sys import sys
import unittest import unittest
Expand Down

0 comments on commit d364932

Please sign in to comment.