From 8799b53da4839c0647501049a1e6da2d07d6f1ce Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 12:43:36 +0100 Subject: [PATCH 01/27] replace utcfromtimezone with fromtimezone(x, utc).replace(tzinfo=None) --- src/twisted/python/_tzhelper.py | 13 +++++++++---- src/twisted/python/log.py | 10 +++++++--- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/twisted/python/_tzhelper.py b/src/twisted/python/_tzhelper.py index c4abf5c6241..2b841889d75 100644 --- a/src/twisted/python/_tzhelper.py +++ b/src/twisted/python/_tzhelper.py @@ -6,7 +6,12 @@ Time zone utilities. """ -from datetime import datetime as DateTime, timedelta as TimeDelta, tzinfo as TZInfo +from datetime import ( + datetime as DateTime, + timedelta as TimeDelta, + timezone, + tzinfo as TZInfo, +) from typing import Optional __all__ = [ @@ -68,9 +73,9 @@ def fromLocalTimeStamp(cls, timeStamp: float) -> "FixedOffsetTimeZone": Create a time zone with a fixed offset corresponding to a time stamp in the system's locally configured time zone. """ - offset = DateTime.fromtimestamp(timeStamp) - DateTime.utcfromtimestamp( - timeStamp - ) + offset = DateTime.fromtimestamp(timeStamp) - DateTime.fromtimestamp( + timeStamp, timezone.utc + ).replace(tzinfo=None) return cls(offset) def utcoffset(self, dt: Optional[DateTime]) -> TimeDelta: diff --git a/src/twisted/python/log.py b/src/twisted/python/log.py index 266940115bb..4392486bbee 100644 --- a/src/twisted/python/log.py +++ b/src/twisted/python/log.py @@ -11,7 +11,7 @@ import time import warnings from abc import ABC, abstractmethod -from datetime import datetime +from datetime import datetime, timezone from typing import Any, BinaryIO, Dict, Optional, cast from zope.interface import Interface @@ -490,7 +490,9 @@ def getTimezoneOffset(self, when): @return: The number of seconds offset from UTC. West is positive, east is negative. """ - offset = datetime.utcfromtimestamp(when) - datetime.fromtimestamp(when) + offset = datetime.fromtimestamp(when, timezone.utc).replace( + tzinfo=None + ) - datetime.fromtimestamp(when) return offset.days * (60 * 60 * 24) + offset.seconds def formatTime(self, when): @@ -512,7 +514,9 @@ def formatTime(self, when): return datetime.fromtimestamp(when).strftime(self.timeFormat) tzOffset = -self.getTimezoneOffset(when) - when = datetime.utcfromtimestamp(when + tzOffset) + when = datetime.fromtimestamp(when + tzOffset, timezone.utc).replace( + tzinfo=None + ) tzHour = abs(int(tzOffset / 60 / 60)) tzMin = abs(int(tzOffset / 60 % 60)) if tzOffset < 0: From dd1a3fa6dbf7148053d05eb7505079d2c8185d7d Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 12:47:16 +0100 Subject: [PATCH 02/27] add newsfragment --- src/twisted/newsfragments/11908.bugfix | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 src/twisted/newsfragments/11908.bugfix diff --git a/src/twisted/newsfragments/11908.bugfix b/src/twisted/newsfragments/11908.bugfix new file mode 100644 index 00000000000..c38333bb0c2 --- /dev/null +++ b/src/twisted/newsfragments/11908.bugfix @@ -0,0 +1,2 @@ +utcfromtimestamp has been deprecated since Python 3.12, +use fromtimestamp(x, timezone.utc).replace(tzinfo=None) instead. From aec69ad86601a3d21a66d26563151bce72e680ac Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 12:51:00 +0100 Subject: [PATCH 03/27] test on 3.12 --- .github/workflows/test.yaml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 496f6efa90d..38c0787239f 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -69,7 +69,7 @@ jobs: # The Python version on which the job is executed. # We need at least one value here, so we go with latest Python version # that we support.. - python-version: ['3.10'] + python-version: ['3.11'] # Just use the default OS. runs-on: [''] # Human readable short description for this job. @@ -124,8 +124,11 @@ jobs: # Just Python 3.9 with default settings. - python-version: '3.9' - # Just Python 3.11 with default settings. - - python-version: '3.11' + # Just Python 3.10 with default settings. + - python-version: '3.10' + + # Just Python 3.12 with default settings. + - python-version: '3.12' # Newest macOS and newest Python supported versions. - python-version: '3.11' @@ -184,6 +187,7 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + allow-prereleases: true - name: Get pip cache dir id: pip-cache From 084d956ca7424586df19ce6e660f11df555fd6c0 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 12:55:14 +0100 Subject: [PATCH 04/27] add newsfragment --- src/twisted/newsfragments/11857.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 src/twisted/newsfragments/11857.feature diff --git a/src/twisted/newsfragments/11857.feature b/src/twisted/newsfragments/11857.feature new file mode 100644 index 00000000000..ef930305dd5 --- /dev/null +++ b/src/twisted/newsfragments/11857.feature @@ -0,0 +1 @@ +The CI suite was updated to execute the tests using a Python 3.12 pre-release From 58b711788940b7590949fd178e5e8b7725e22c53 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 14:05:29 +0100 Subject: [PATCH 05/27] asyncio.iscoroutine no longer accepts generators on 3.12 --- src/twisted/internet/defer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/twisted/internet/defer.py b/src/twisted/internet/defer.py index bc04edc8142..c1e59550ae0 100644 --- a/src/twisted/internet/defer.py +++ b/src/twisted/internet/defer.py @@ -9,6 +9,7 @@ """ from __future__ import annotations +import inspect import traceback import warnings from abc import ABC, abstractmethod @@ -1341,8 +1342,9 @@ def main(reactor): @raise ValueError: If C{coro} is not a coroutine or generator. """ - # asyncio.iscoroutine identifies generators as coroutines, too. - if iscoroutine(coro): + # asyncio.iscoroutine <3.12 identifies generators as coroutines, too. + # for >=3.12 we need to check isgenerator also + if iscoroutine(coro) or inspect.isgenerator(coro): return _cancellableInlineCallbacks(coro) raise NotACoroutineError(f"{coro!r} is not a coroutine") From f8390e3cb8d60981b3a4d25e44205d330f58e347 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 14:05:58 +0100 Subject: [PATCH 06/27] tokenize no longer accepts \0 bytes in source code in 3.12 --- src/twisted/persisted/aot.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/twisted/persisted/aot.py b/src/twisted/persisted/aot.py index 204728058cd..d7c12a9cc03 100644 --- a/src/twisted/persisted/aot.py +++ b/src/twisted/persisted/aot.py @@ -231,13 +231,17 @@ def indentify(s): out = [] stack = [] l = ["", s] + + def readline(): + return l.pop().replace("\0", "") + for ( tokenType, tokenString, (startRow, startColumn), (endRow, endColumn), logicalLine, - ) in tokenize(l.pop): + ) in tokenize(readline): if tokenString in ["[", "(", "{"]: stack.append(tokenString) elif tokenString in ["]", ")", "}"]: From d8edf4cb1085e2b88353931b1b3851a2f68ff041 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 14:06:29 +0100 Subject: [PATCH 07/27] tempfile.mkdtemp() always returns absolute paths in 3.12 --- src/twisted/trial/_synctest.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/twisted/trial/_synctest.py b/src/twisted/trial/_synctest.py index 226cf24801d..74f9b7b4221 100644 --- a/src/twisted/trial/_synctest.py +++ b/src/twisted/trial/_synctest.py @@ -1328,7 +1328,8 @@ def mktemp(self): ) if not os.path.exists(base): os.makedirs(base) - dirname = tempfile.mkdtemp("", "", base) + # workaround https://github.com/python/cpython/issues/51574 + dirname = os.path.relpath(tempfile.mkdtemp("", "", base)) return os.path.join(dirname, "temp") def _getSuppress(self): From 7b9d4d9232fc8e49140409a30037d230ca820931 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 14:25:35 +0100 Subject: [PATCH 08/27] work around builtins.sum accurancy increase on 3.12 --- src/twisted/test/test_task.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/twisted/test/test_task.py b/src/twisted/test/test_task.py index 49a224734af..61e53c9e113 100644 --- a/src/twisted/test/test_task.py +++ b/src/twisted/test/test_task.py @@ -467,6 +467,13 @@ def test_withCountFloatingPointBoundary(self): for x in range(count): clock.advance(interval) + # work around https://github.com/python/cpython/issues/100425 on py312 + def sum(items): + total = 0.0 + for item in items: + total += item + return total + # There is still an epsilon of inaccuracy here; 0.1 is not quite # exactly 1/10 in binary, so we need to push our clock over the # threshold. From 4cf97c53baed82fedbef7dfc4fbc700ec5658791 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 14:31:38 +0100 Subject: [PATCH 09/27] Update src/twisted/internet/defer.py --- src/twisted/internet/defer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/twisted/internet/defer.py b/src/twisted/internet/defer.py index c1e59550ae0..b33264f9d4a 100644 --- a/src/twisted/internet/defer.py +++ b/src/twisted/internet/defer.py @@ -1344,6 +1344,7 @@ def main(reactor): """ # asyncio.iscoroutine <3.12 identifies generators as coroutines, too. # for >=3.12 we need to check isgenerator also + # see https://github.com/python/cpython/issues/102748 if iscoroutine(coro) or inspect.isgenerator(coro): return _cancellableInlineCallbacks(coro) raise NotACoroutineError(f"{coro!r} is not a coroutine") From e903eea932565fcc82cba099343fa4b47aa5fd4d Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 14:32:42 +0100 Subject: [PATCH 10/27] Update src/twisted/persisted/aot.py --- src/twisted/persisted/aot.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/twisted/persisted/aot.py b/src/twisted/persisted/aot.py index d7c12a9cc03..68c20ed0cc3 100644 --- a/src/twisted/persisted/aot.py +++ b/src/twisted/persisted/aot.py @@ -233,6 +233,8 @@ def indentify(s): l = ["", s] def readline(): + # tokenize no longer supports null bytes in source code since 3.12 + # see https://github.com/python/cpython/issues/102856 return l.pop().replace("\0", "") for ( From c65da07015c87f058c2ed27aadf5165be344b104 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 14:45:02 +0100 Subject: [PATCH 11/27] add py311 and py312 trove classifiers --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index ded186654e7..59b1d48b04c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,8 @@ classifiers = [ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ] dependencies = [ "zope.interface >= 5", From 1c67617d82a06878e66249a51f8978c565718708 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 15:38:14 +0100 Subject: [PATCH 12/27] Revert "Update src/twisted/persisted/aot.py" This reverts commit e903eea932565fcc82cba099343fa4b47aa5fd4d. --- src/twisted/persisted/aot.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/twisted/persisted/aot.py b/src/twisted/persisted/aot.py index 68c20ed0cc3..d7c12a9cc03 100644 --- a/src/twisted/persisted/aot.py +++ b/src/twisted/persisted/aot.py @@ -233,8 +233,6 @@ def indentify(s): l = ["", s] def readline(): - # tokenize no longer supports null bytes in source code since 3.12 - # see https://github.com/python/cpython/issues/102856 return l.pop().replace("\0", "") for ( From 3b3fa36602487bdac3a2339c3a73865c7c1bded8 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 15:38:16 +0100 Subject: [PATCH 13/27] Revert "tokenize no longer accepts \0 bytes in source code in 3.12" This reverts commit f8390e3cb8d60981b3a4d25e44205d330f58e347. --- src/twisted/persisted/aot.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/twisted/persisted/aot.py b/src/twisted/persisted/aot.py index d7c12a9cc03..204728058cd 100644 --- a/src/twisted/persisted/aot.py +++ b/src/twisted/persisted/aot.py @@ -231,17 +231,13 @@ def indentify(s): out = [] stack = [] l = ["", s] - - def readline(): - return l.pop().replace("\0", "") - for ( tokenType, tokenString, (startRow, startColumn), (endRow, endColumn), logicalLine, - ) in tokenize(readline): + ) in tokenize(l.pop): if tokenString in ["[", "(", "{"]: stack.append(tokenString) elif tokenString in ["]", ")", "}"]: From d1760b150ab77958f500606b06f33f03362cb3c8 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 15:54:49 +0100 Subject: [PATCH 14/27] vendor tokenize and token for py3.12 --- src/twisted/persisted/_token.py | 141 ++++++ src/twisted/persisted/_tokenize.py | 692 +++++++++++++++++++++++++++++ src/twisted/persisted/aot.py | 2 +- src/twisted/test/test_persisted.py | 6 + 4 files changed, 840 insertions(+), 1 deletion(-) create mode 100644 src/twisted/persisted/_token.py create mode 100644 src/twisted/persisted/_tokenize.py diff --git a/src/twisted/persisted/_token.py b/src/twisted/persisted/_token.py new file mode 100644 index 00000000000..3c807059b23 --- /dev/null +++ b/src/twisted/persisted/_token.py @@ -0,0 +1,141 @@ +""" +Token constants. +vendored from https://github.com/python/cpython/blob/6b825c1b8a14460641ca6f1647d83005c68199aa/Lib/token.py +Licence: https://docs.python.org/3/license.html +""" +# Auto-generated by Tools/scripts/generate_token.py + +__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF'] + +ENDMARKER = 0 +NAME = 1 +NUMBER = 2 +STRING = 3 +NEWLINE = 4 +INDENT = 5 +DEDENT = 6 +LPAR = 7 +RPAR = 8 +LSQB = 9 +RSQB = 10 +COLON = 11 +COMMA = 12 +SEMI = 13 +PLUS = 14 +MINUS = 15 +STAR = 16 +SLASH = 17 +VBAR = 18 +AMPER = 19 +LESS = 20 +GREATER = 21 +EQUAL = 22 +DOT = 23 +PERCENT = 24 +LBRACE = 25 +RBRACE = 26 +EQEQUAL = 27 +NOTEQUAL = 28 +LESSEQUAL = 29 +GREATEREQUAL = 30 +TILDE = 31 +CIRCUMFLEX = 32 +LEFTSHIFT = 33 +RIGHTSHIFT = 34 +DOUBLESTAR = 35 +PLUSEQUAL = 36 +MINEQUAL = 37 +STAREQUAL = 38 +SLASHEQUAL = 39 +PERCENTEQUAL = 40 +AMPEREQUAL = 41 +VBAREQUAL = 42 +CIRCUMFLEXEQUAL = 43 +LEFTSHIFTEQUAL = 44 +RIGHTSHIFTEQUAL = 45 +DOUBLESTAREQUAL = 46 +DOUBLESLASH = 47 +DOUBLESLASHEQUAL = 48 +AT = 49 +ATEQUAL = 50 +RARROW = 51 +ELLIPSIS = 52 +COLONEQUAL = 53 +OP = 54 +AWAIT = 55 +ASYNC = 56 +TYPE_IGNORE = 57 +TYPE_COMMENT = 58 +SOFT_KEYWORD = 59 +# These aren't used by the C tokenizer but are needed for tokenize.py +ERRORTOKEN = 60 +COMMENT = 61 +NL = 62 +ENCODING = 63 +N_TOKENS = 64 +# Special definitions for cooperation with parser +NT_OFFSET = 256 + +tok_name = {value: name + for name, value in globals().items() + if isinstance(value, int) and not name.startswith('_')} +__all__.extend(tok_name.values()) + +EXACT_TOKEN_TYPES = { + '!=': NOTEQUAL, + '%': PERCENT, + '%=': PERCENTEQUAL, + '&': AMPER, + '&=': AMPEREQUAL, + '(': LPAR, + ')': RPAR, + '*': STAR, + '**': DOUBLESTAR, + '**=': DOUBLESTAREQUAL, + '*=': STAREQUAL, + '+': PLUS, + '+=': PLUSEQUAL, + ',': COMMA, + '-': MINUS, + '-=': MINEQUAL, + '->': RARROW, + '.': DOT, + '...': ELLIPSIS, + '/': SLASH, + '//': DOUBLESLASH, + '//=': DOUBLESLASHEQUAL, + '/=': SLASHEQUAL, + ':': COLON, + ':=': COLONEQUAL, + ';': SEMI, + '<': LESS, + '<<': LEFTSHIFT, + '<<=': LEFTSHIFTEQUAL, + '<=': LESSEQUAL, + '=': EQUAL, + '==': EQEQUAL, + '>': GREATER, + '>=': GREATEREQUAL, + '>>': RIGHTSHIFT, + '>>=': RIGHTSHIFTEQUAL, + '@': AT, + '@=': ATEQUAL, + '[': LSQB, + ']': RSQB, + '^': CIRCUMFLEX, + '^=': CIRCUMFLEXEQUAL, + '{': LBRACE, + '|': VBAR, + '|=': VBAREQUAL, + '}': RBRACE, + '~': TILDE, +} + +def ISTERMINAL(x): + return x < NT_OFFSET + +def ISNONTERMINAL(x): + return x >= NT_OFFSET + +def ISEOF(x): + return x == ENDMARKER diff --git a/src/twisted/persisted/_tokenize.py b/src/twisted/persisted/_tokenize.py new file mode 100644 index 00000000000..18cb452f6c0 --- /dev/null +++ b/src/twisted/persisted/_tokenize.py @@ -0,0 +1,692 @@ +"""Tokenization help for Python programs. +vendored from https://github.com/python/cpython/blob/6b825c1b8a14460641ca6f1647d83005c68199aa/Lib/tokenize.py +Licence: https://docs.python.org/3/license.html + +tokenize(readline) is a generator that breaks a stream of bytes into +Python tokens. It decodes the bytes according to PEP-0263 for +determining source file encoding. + +It accepts a readline-like method which is called repeatedly to get the +next line of input (or b"" for EOF). It generates 5-tuples with these +members: + + the token type (see token.py) + the token (a string) + the starting (row, column) indices of the token (a 2-tuple of ints) + the ending (row, column) indices of the token (a 2-tuple of ints) + the original line (string) + +It is designed to match the working of the Python tokenizer exactly, except +that it produces COMMENT tokens for comments and gives type OP for all +operators. Additionally, all token lists start with an ENCODING token +which tells you which encoding was used to decode the bytes stream. +""" + +__author__ = 'Ka-Ping Yee ' +__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, ' + 'Skip Montanaro, Raymond Hettinger, Trent Nelson, ' + 'Michael Foord') +from builtins import open as _builtin_open +from codecs import lookup, BOM_UTF8 +import collections +import functools +from io import TextIOWrapper +import itertools as _itertools +import re +import sys +from ._token import * +from ._token import EXACT_TOKEN_TYPES + +cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) +blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) + + +class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')): + def __repr__(self): + annotated_type = '%d (%s)' % (self.type, tok_name[self.type]) + return ('TokenInfo(type=%s, string=%r, start=%r, end=%r, line=%r)' % + self._replace(type=annotated_type)) + + @property + def exact_type(self): + if self.type == OP and self.string in EXACT_TOKEN_TYPES: + return EXACT_TOKEN_TYPES[self.string] + else: + return self.type + +def group(*choices): return '(' + '|'.join(choices) + ')' +def any(*choices): return group(*choices) + '*' +def maybe(*choices): return group(*choices) + '?' + +# Note: we use unicode matching for names ("\w") but ascii matching for +# number literals. +Whitespace = r'[ \f\t]*' +Comment = r'#[^\r\n]*' +Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment) +Name = r'\w+' + +Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+' +Binnumber = r'0[bB](?:_?[01])+' +Octnumber = r'0[oO](?:_?[0-7])+' +Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)' +Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) +Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*' +Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?', + r'\.[0-9](?:_?[0-9])*') + maybe(Exponent) +Expfloat = r'[0-9](?:_?[0-9])*' + Exponent +Floatnumber = group(Pointfloat, Expfloat) +Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]') +Number = group(Imagnumber, Floatnumber, Intnumber) + +# Return the empty string, plus all of the valid string prefixes. +def _all_string_prefixes(): + # The valid string prefixes. Only contain the lower case versions, + # and don't contain any permutations (include 'fr', but not + # 'rf'). The various permutations will be generated. + _valid_string_prefixes = ['b', 'r', 'u', 'f', 'br', 'fr'] + # if we add binary f-strings, add: ['fb', 'fbr'] + result = {''} + for prefix in _valid_string_prefixes: + for t in _itertools.permutations(prefix): + # create a list with upper and lower versions of each + # character + for u in _itertools.product(*[(c, c.upper()) for c in t]): + result.add(''.join(u)) + return result + +@functools.lru_cache +def _compile(expr): + return re.compile(expr, re.UNICODE) + +# Note that since _all_string_prefixes includes the empty string, +# StringPrefix can be the empty string (making it optional). +StringPrefix = group(*_all_string_prefixes()) + +# Tail end of ' string. +Single = r"[^'\\]*(?:\\.[^'\\]*)*'" +# Tail end of " string. +Double = r'[^"\\]*(?:\\.[^"\\]*)*"' +# Tail end of ''' string. +Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" +# Tail end of """ string. +Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' +Triple = group(StringPrefix + "'''", StringPrefix + '"""') +# Single-line ' or " string. +String = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'", + StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"') + +# Sorting in reverse order puts the long operators before their prefixes. +# Otherwise if = came before ==, == would get recognized as two instances +# of =. +Special = group(*map(re.escape, sorted(EXACT_TOKEN_TYPES, reverse=True))) +Funny = group(r'\r?\n', Special) + +PlainToken = group(Number, Funny, String, Name) +Token = Ignore + PlainToken + +# First (or only) line of ' or " string. +ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + + group("'", r'\\\r?\n'), + StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + + group('"', r'\\\r?\n')) +PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple) +PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name) + +# For a given string prefix plus quotes, endpats maps it to a regex +# to match the remainder of that string. _prefix can be empty, for +# a normal single or triple quoted string (with no prefix). +endpats = {} +for _prefix in _all_string_prefixes(): + endpats[_prefix + "'"] = Single + endpats[_prefix + '"'] = Double + endpats[_prefix + "'''"] = Single3 + endpats[_prefix + '"""'] = Double3 +del _prefix + +# A set of all of the single and triple quoted string prefixes, +# including the opening quotes. +single_quoted = set() +triple_quoted = set() +for t in _all_string_prefixes(): + for u in (t + '"', t + "'"): + single_quoted.add(u) + for u in (t + '"""', t + "'''"): + triple_quoted.add(u) +del t, u + +tabsize = 8 + +class TokenError(Exception): pass + +class StopTokenizing(Exception): pass + + +class Untokenizer: + + def __init__(self): + self.tokens = [] + self.prev_row = 1 + self.prev_col = 0 + self.encoding = None + + def add_whitespace(self, start): + row, col = start + if row < self.prev_row or row == self.prev_row and col < self.prev_col: + raise ValueError("start ({},{}) precedes previous end ({},{})" + .format(row, col, self.prev_row, self.prev_col)) + row_offset = row - self.prev_row + if row_offset: + self.tokens.append("\\\n" * row_offset) + self.prev_col = 0 + col_offset = col - self.prev_col + if col_offset: + self.tokens.append(" " * col_offset) + + def untokenize(self, iterable): + it = iter(iterable) + indents = [] + startline = False + for t in it: + if len(t) == 2: + self.compat(t, it) + break + tok_type, token, start, end, line = t + if tok_type == ENCODING: + self.encoding = token + continue + if tok_type == ENDMARKER: + break + if tok_type == INDENT: + indents.append(token) + continue + elif tok_type == DEDENT: + indents.pop() + self.prev_row, self.prev_col = end + continue + elif tok_type in (NEWLINE, NL): + startline = True + elif startline and indents: + indent = indents[-1] + if start[1] >= len(indent): + self.tokens.append(indent) + self.prev_col = len(indent) + startline = False + self.add_whitespace(start) + self.tokens.append(token) + self.prev_row, self.prev_col = end + if tok_type in (NEWLINE, NL): + self.prev_row += 1 + self.prev_col = 0 + return "".join(self.tokens) + + def compat(self, token, iterable): + indents = [] + toks_append = self.tokens.append + startline = token[0] in (NEWLINE, NL) + prevstring = False + + for tok in _itertools.chain([token], iterable): + toknum, tokval = tok[:2] + if toknum == ENCODING: + self.encoding = tokval + continue + + if toknum in (NAME, NUMBER): + tokval += ' ' + + # Insert a space between two consecutive strings + if toknum == STRING: + if prevstring: + tokval = ' ' + tokval + prevstring = True + else: + prevstring = False + + if toknum == INDENT: + indents.append(tokval) + continue + elif toknum == DEDENT: + indents.pop() + continue + elif toknum in (NEWLINE, NL): + startline = True + elif startline and indents: + toks_append(indents[-1]) + startline = False + toks_append(tokval) + + +def untokenize(iterable): + """Transform tokens back into Python source code. + It returns a bytes object, encoded using the ENCODING + token, which is the first token sequence output by tokenize. + + Each element returned by the iterable must be a token sequence + with at least two elements, a token number and token value. If + only two tokens are passed, the resulting output is poor. + + Round-trip invariant for full input: + Untokenized source will match input source exactly + + Round-trip invariant for limited input: + # Output bytes will tokenize back to the input + t1 = [tok[:2] for tok in tokenize(f.readline)] + newcode = untokenize(t1) + readline = BytesIO(newcode).readline + t2 = [tok[:2] for tok in tokenize(readline)] + assert t1 == t2 + """ + ut = Untokenizer() + out = ut.untokenize(iterable) + if ut.encoding is not None: + out = out.encode(ut.encoding) + return out + + +def _get_normal_name(orig_enc): + """Imitates get_normal_name in tokenizer.c.""" + # Only care about the first 12 characters. + enc = orig_enc[:12].lower().replace("_", "-") + if enc == "utf-8" or enc.startswith("utf-8-"): + return "utf-8" + if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \ + enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")): + return "iso-8859-1" + return orig_enc + +def detect_encoding(readline): + """ + The detect_encoding() function is used to detect the encoding that should + be used to decode a Python source file. It requires one argument, readline, + in the same way as the tokenize() generator. + + It will call readline a maximum of twice, and return the encoding used + (as a string) and a list of any lines (left as bytes) it has read in. + + It detects the encoding from the presence of a utf-8 bom or an encoding + cookie as specified in pep-0263. If both a bom and a cookie are present, + but disagree, a SyntaxError will be raised. If the encoding cookie is an + invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found, + 'utf-8-sig' is returned. + + If no encoding is specified, then the default of 'utf-8' will be returned. + """ + try: + filename = readline.__self__.name + except AttributeError: + filename = None + bom_found = False + encoding = None + default = 'utf-8' + def read_or_stop(): + try: + return readline() + except StopIteration: + return b'' + + def find_cookie(line): + try: + # Decode as UTF-8. Either the line is an encoding declaration, + # in which case it should be pure ASCII, or it must be UTF-8 + # per default encoding. + line_string = line.decode('utf-8') + except UnicodeDecodeError: + msg = "invalid or missing encoding declaration" + if filename is not None: + msg = '{} for {!r}'.format(msg, filename) + raise SyntaxError(msg) + + match = cookie_re.match(line_string) + if not match: + return None + encoding = _get_normal_name(match.group(1)) + try: + codec = lookup(encoding) + except LookupError: + # This behaviour mimics the Python interpreter + if filename is None: + msg = "unknown encoding: " + encoding + else: + msg = "unknown encoding for {!r}: {}".format(filename, + encoding) + raise SyntaxError(msg) + + if bom_found: + if encoding != 'utf-8': + # This behaviour mimics the Python interpreter + if filename is None: + msg = 'encoding problem: utf-8' + else: + msg = 'encoding problem for {!r}: utf-8'.format(filename) + raise SyntaxError(msg) + encoding += '-sig' + return encoding + + first = read_or_stop() + if first.startswith(BOM_UTF8): + bom_found = True + first = first[3:] + default = 'utf-8-sig' + if not first: + return default, [] + + encoding = find_cookie(first) + if encoding: + return encoding, [first] + if not blank_re.match(first): + return default, [first] + + second = read_or_stop() + if not second: + return default, [first] + + encoding = find_cookie(second) + if encoding: + return encoding, [first, second] + + return default, [first, second] + + +def open(filename): + """Open a file in read only mode using the encoding detected by + detect_encoding(). + """ + buffer = _builtin_open(filename, 'rb') + try: + encoding, lines = detect_encoding(buffer.readline) + buffer.seek(0) + text = TextIOWrapper(buffer, encoding, line_buffering=True) + text.mode = 'r' + return text + except: + buffer.close() + raise + + +def tokenize(readline): + """ + The tokenize() generator requires one argument, readline, which + must be a callable object which provides the same interface as the + readline() method of built-in file objects. Each call to the function + should return one line of input as bytes. Alternatively, readline + can be a callable function terminating with StopIteration: + readline = open(myfile, 'rb').__next__ # Example of alternate readline + + The generator produces 5-tuples with these members: the token type; the + token string; a 2-tuple (srow, scol) of ints specifying the row and + column where the token begins in the source; a 2-tuple (erow, ecol) of + ints specifying the row and column where the token ends in the source; + and the line on which the token was found. The line passed is the + physical line. + + The first token sequence will always be an ENCODING token + which tells you which encoding was used to decode the bytes stream. + """ + encoding, consumed = detect_encoding(readline) + empty = _itertools.repeat(b"") + rl_gen = _itertools.chain(consumed, iter(readline, b""), empty) + return _tokenize(rl_gen.__next__, encoding) + + +def _tokenize(readline, encoding): + lnum = parenlev = continued = 0 + numchars = '0123456789' + contstr, needcont = '', 0 + contline = None + indents = [0] + + if encoding is not None: + if encoding == "utf-8-sig": + # BOM will already have been stripped. + encoding = "utf-8" + yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '') + last_line = b'' + line = b'' + while True: # loop over lines in stream + try: + # We capture the value of the line variable here because + # readline uses the empty string '' to signal end of input, + # hence `line` itself will always be overwritten at the end + # of this loop. + last_line = line + line = readline() + except StopIteration: + line = b'' + + if encoding is not None: + line = line.decode(encoding) + lnum += 1 + pos, max = 0, len(line) + + if contstr: # continued string + if not line: + raise TokenError("EOF in multi-line string", strstart) + endmatch = endprog.match(line) + if endmatch: + pos = end = endmatch.end(0) + yield TokenInfo(STRING, contstr + line[:end], + strstart, (lnum, end), contline + line) + contstr, needcont = '', 0 + contline = None + elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n': + yield TokenInfo(ERRORTOKEN, contstr + line, + strstart, (lnum, len(line)), contline) + contstr = '' + contline = None + continue + else: + contstr = contstr + line + contline = contline + line + continue + + elif parenlev == 0 and not continued: # new statement + if not line: break + column = 0 + while pos < max: # measure leading whitespace + if line[pos] == ' ': + column += 1 + elif line[pos] == '\t': + column = (column//tabsize + 1)*tabsize + elif line[pos] == '\f': + column = 0 + else: + break + pos += 1 + if pos == max: + break + + if line[pos] in '#\r\n': # skip comments or blank lines + if line[pos] == '#': + comment_token = line[pos:].rstrip('\r\n') + yield TokenInfo(COMMENT, comment_token, + (lnum, pos), (lnum, pos + len(comment_token)), line) + pos += len(comment_token) + + yield TokenInfo(NL, line[pos:], + (lnum, pos), (lnum, len(line)), line) + continue + + if column > indents[-1]: # count indents or dedents + indents.append(column) + yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line) + while column < indents[-1]: + if column not in indents: + raise IndentationError( + "unindent does not match any outer indentation level", + ("", lnum, pos, line)) + indents = indents[:-1] + + yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line) + + else: # continued statement + if not line: + raise TokenError("EOF in multi-line statement", (lnum, 0)) + continued = 0 + + while pos < max: + pseudomatch = _compile(PseudoToken).match(line, pos) + if pseudomatch: # scan for tokens + start, end = pseudomatch.span(1) + spos, epos, pos = (lnum, start), (lnum, end), end + if start == end: + continue + token, initial = line[start:end], line[start] + + if (initial in numchars or # ordinary number + (initial == '.' and token != '.' and token != '...')): + yield TokenInfo(NUMBER, token, spos, epos, line) + elif initial in '\r\n': + if parenlev > 0: + yield TokenInfo(NL, token, spos, epos, line) + else: + yield TokenInfo(NEWLINE, token, spos, epos, line) + + elif initial == '#': + assert not token.endswith("\n") + yield TokenInfo(COMMENT, token, spos, epos, line) + + elif token in triple_quoted: + endprog = _compile(endpats[token]) + endmatch = endprog.match(line, pos) + if endmatch: # all on one line + pos = endmatch.end(0) + token = line[start:pos] + yield TokenInfo(STRING, token, spos, (lnum, pos), line) + else: + strstart = (lnum, start) # multiple lines + contstr = line[start:] + contline = line + break + + # Check up to the first 3 chars of the token to see if + # they're in the single_quoted set. If so, they start + # a string. + # We're using the first 3, because we're looking for + # "rb'" (for example) at the start of the token. If + # we switch to longer prefixes, this needs to be + # adjusted. + # Note that initial == token[:1]. + # Also note that single quote checking must come after + # triple quote checking (above). + elif (initial in single_quoted or + token[:2] in single_quoted or + token[:3] in single_quoted): + if token[-1] == '\n': # continued string + strstart = (lnum, start) + # Again, using the first 3 chars of the + # token. This is looking for the matching end + # regex for the correct type of quote + # character. So it's really looking for + # endpats["'"] or endpats['"'], by trying to + # skip string prefix characters, if any. + endprog = _compile(endpats.get(initial) or + endpats.get(token[1]) or + endpats.get(token[2])) + contstr, needcont = line[start:], 1 + contline = line + break + else: # ordinary string + yield TokenInfo(STRING, token, spos, epos, line) + + elif initial.isidentifier(): # ordinary name + yield TokenInfo(NAME, token, spos, epos, line) + elif initial == '\\': # continued stmt + continued = 1 + else: + if initial in '([{': + parenlev += 1 + elif initial in ')]}': + parenlev -= 1 + yield TokenInfo(OP, token, spos, epos, line) + else: + yield TokenInfo(ERRORTOKEN, line[pos], + (lnum, pos), (lnum, pos+1), line) + pos += 1 + + # Add an implicit NEWLINE if the input doesn't end in one + if last_line and last_line[-1] not in '\r\n' and not last_line.strip().startswith("#"): + yield TokenInfo(NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '') + for indent in indents[1:]: # pop remaining indent levels + yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '') + yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '') + + +def generate_tokens(readline): + """Tokenize a source reading Python code as unicode strings. + + This has the same API as tokenize(), except that it expects the *readline* + callable to return str objects instead of bytes. + """ + return _tokenize(readline, None) + +def main(): + import argparse + + # Helper error handling routines + def perror(message): + sys.stderr.write(message) + sys.stderr.write('\n') + + def error(message, filename=None, location=None): + if location: + args = (filename,) + location + (message,) + perror("%s:%d:%d: error: %s" % args) + elif filename: + perror("%s: error: %s" % (filename, message)) + else: + perror("error: %s" % message) + sys.exit(1) + + # Parse the arguments and options + parser = argparse.ArgumentParser(prog='python -m tokenize') + parser.add_argument(dest='filename', nargs='?', + metavar='filename.py', + help='the file to tokenize; defaults to stdin') + parser.add_argument('-e', '--exact', dest='exact', action='store_true', + help='display token names using the exact type') + args = parser.parse_args() + + try: + # Tokenize the input + if args.filename: + filename = args.filename + with _builtin_open(filename, 'rb') as f: + tokens = list(tokenize(f.readline)) + else: + filename = "" + tokens = _tokenize(sys.stdin.readline, None) + + # Output the tokenization + for token in tokens: + token_type = token.type + if args.exact: + token_type = token.exact_type + token_range = "%d,%d-%d,%d:" % (token.start + token.end) + print("%-20s%-15s%-15r" % + (token_range, tok_name[token_type], token.string)) + except IndentationError as err: + line, column = err.args[1][1:3] + error(err.args[0], filename, (line, column)) + except TokenError as err: + line, column = err.args[1] + error(err.args[0], filename, (line, column)) + except SyntaxError as err: + error(err, filename) + except OSError as err: + error(err) + except KeyboardInterrupt: + print("interrupted\n") + except Exception as err: + perror("unexpected error: %s" % err) + raise + +def _generate_tokens_from_c_tokenizer(source): + """Tokenize a source reading Python code as unicode strings using the internal C tokenizer""" + import _tokenize as c_tokenizer + for info in c_tokenizer.TokenizerIter(source): + tok, type, lineno, end_lineno, col_off, end_col_off, line = info + yield TokenInfo(type, tok, (lineno, col_off), (end_lineno, end_col_off), line) + + +if __name__ == "__main__": + main() diff --git a/src/twisted/persisted/aot.py b/src/twisted/persisted/aot.py index 204728058cd..57bf56d95e0 100644 --- a/src/twisted/persisted/aot.py +++ b/src/twisted/persisted/aot.py @@ -13,11 +13,11 @@ import copyreg as copy_reg import re import types -from tokenize import generate_tokens as tokenize from twisted.persisted import crefutil from twisted.python import log, reflect from twisted.python.compat import _constructMethod +from ._tokenize import generate_tokens as tokenize ########################### # Abstract Object Classes # diff --git a/src/twisted/test/test_persisted.py b/src/twisted/test/test_persisted.py index 72354b33f38..1563bbfb452 100644 --- a/src/twisted/test/test_persisted.py +++ b/src/twisted/test/test_persisted.py @@ -446,6 +446,12 @@ def test_circularTuple(self): self.assertIs(oj[0][0], oj) self.assertEqual(oj[0][1], 4321) + def testIndentify(self): + self.assertEqual( + aot.jellyToSource({"hello": {"world": []}}), + "app={\n 'hello':{\n 'world':[],\n },\n }", + ) + class CrefUtilTests(TestCase): """ From 07bd957d224b20f64e97c3058c94a0f293623568 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 15:56:10 +0100 Subject: [PATCH 15/27] fix __all__ and apply pre-commit --- src/twisted/persisted/_token.py | 107 +++---- src/twisted/persisted/_tokenize.py | 476 +++++++++++++++++++++-------- 2 files changed, 397 insertions(+), 186 deletions(-) diff --git a/src/twisted/persisted/_token.py b/src/twisted/persisted/_token.py index 3c807059b23..44f88c341c7 100644 --- a/src/twisted/persisted/_token.py +++ b/src/twisted/persisted/_token.py @@ -5,7 +5,7 @@ """ # Auto-generated by Tools/scripts/generate_token.py -__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF'] +__all__ = ["tok_name", "ISTERMINAL", "ISNONTERMINAL", "ISEOF"] ENDMARKER = 0 NAME = 1 @@ -76,66 +76,71 @@ # Special definitions for cooperation with parser NT_OFFSET = 256 -tok_name = {value: name - for name, value in globals().items() - if isinstance(value, int) and not name.startswith('_')} +tok_name = { + value: name + for name, value in globals().items() + if isinstance(value, int) and not name.startswith("_") +} __all__.extend(tok_name.values()) EXACT_TOKEN_TYPES = { - '!=': NOTEQUAL, - '%': PERCENT, - '%=': PERCENTEQUAL, - '&': AMPER, - '&=': AMPEREQUAL, - '(': LPAR, - ')': RPAR, - '*': STAR, - '**': DOUBLESTAR, - '**=': DOUBLESTAREQUAL, - '*=': STAREQUAL, - '+': PLUS, - '+=': PLUSEQUAL, - ',': COMMA, - '-': MINUS, - '-=': MINEQUAL, - '->': RARROW, - '.': DOT, - '...': ELLIPSIS, - '/': SLASH, - '//': DOUBLESLASH, - '//=': DOUBLESLASHEQUAL, - '/=': SLASHEQUAL, - ':': COLON, - ':=': COLONEQUAL, - ';': SEMI, - '<': LESS, - '<<': LEFTSHIFT, - '<<=': LEFTSHIFTEQUAL, - '<=': LESSEQUAL, - '=': EQUAL, - '==': EQEQUAL, - '>': GREATER, - '>=': GREATEREQUAL, - '>>': RIGHTSHIFT, - '>>=': RIGHTSHIFTEQUAL, - '@': AT, - '@=': ATEQUAL, - '[': LSQB, - ']': RSQB, - '^': CIRCUMFLEX, - '^=': CIRCUMFLEXEQUAL, - '{': LBRACE, - '|': VBAR, - '|=': VBAREQUAL, - '}': RBRACE, - '~': TILDE, + "!=": NOTEQUAL, + "%": PERCENT, + "%=": PERCENTEQUAL, + "&": AMPER, + "&=": AMPEREQUAL, + "(": LPAR, + ")": RPAR, + "*": STAR, + "**": DOUBLESTAR, + "**=": DOUBLESTAREQUAL, + "*=": STAREQUAL, + "+": PLUS, + "+=": PLUSEQUAL, + ",": COMMA, + "-": MINUS, + "-=": MINEQUAL, + "->": RARROW, + ".": DOT, + "...": ELLIPSIS, + "/": SLASH, + "//": DOUBLESLASH, + "//=": DOUBLESLASHEQUAL, + "/=": SLASHEQUAL, + ":": COLON, + ":=": COLONEQUAL, + ";": SEMI, + "<": LESS, + "<<": LEFTSHIFT, + "<<=": LEFTSHIFTEQUAL, + "<=": LESSEQUAL, + "=": EQUAL, + "==": EQEQUAL, + ">": GREATER, + ">=": GREATEREQUAL, + ">>": RIGHTSHIFT, + ">>=": RIGHTSHIFTEQUAL, + "@": AT, + "@=": ATEQUAL, + "[": LSQB, + "]": RSQB, + "^": CIRCUMFLEX, + "^=": CIRCUMFLEXEQUAL, + "{": LBRACE, + "|": VBAR, + "|=": VBAREQUAL, + "}": RBRACE, + "~": TILDE, } + def ISTERMINAL(x): return x < NT_OFFSET + def ISNONTERMINAL(x): return x >= NT_OFFSET + def ISEOF(x): return x == ENDMARKER diff --git a/src/twisted/persisted/_tokenize.py b/src/twisted/persisted/_tokenize.py index 18cb452f6c0..8d42d618950 100644 --- a/src/twisted/persisted/_tokenize.py +++ b/src/twisted/persisted/_tokenize.py @@ -22,30 +22,184 @@ which tells you which encoding was used to decode the bytes stream. """ -__author__ = 'Ka-Ping Yee ' -__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, ' - 'Skip Montanaro, Raymond Hettinger, Trent Nelson, ' - 'Michael Foord') -from builtins import open as _builtin_open -from codecs import lookup, BOM_UTF8 +__author__ = "Ka-Ping Yee " +__credits__ = ( + "GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, " + "Skip Montanaro, Raymond Hettinger, Trent Nelson, " + "Michael Foord" +) import collections import functools -from io import TextIOWrapper import itertools as _itertools import re import sys -from ._token import * -from ._token import EXACT_TOKEN_TYPES - -cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) -blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) - +from builtins import open as _builtin_open +from codecs import BOM_UTF8, lookup +from io import TextIOWrapper -class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')): +from ._token import ( + AMPER, + AMPEREQUAL, + ASYNC, + AT, + ATEQUAL, + AWAIT, + CIRCUMFLEX, + CIRCUMFLEXEQUAL, + COLON, + COLONEQUAL, + COMMA, + COMMENT, + DEDENT, + DOT, + DOUBLESLASH, + DOUBLESLASHEQUAL, + DOUBLESTAR, + DOUBLESTAREQUAL, + ELLIPSIS, + ENCODING, + ENDMARKER, + EQEQUAL, + EQUAL, + ERRORTOKEN, + EXACT_TOKEN_TYPES, + GREATER, + GREATEREQUAL, + INDENT, + ISEOF, + ISNONTERMINAL, + ISTERMINAL, + LBRACE, + LEFTSHIFT, + LEFTSHIFTEQUAL, + LESS, + LESSEQUAL, + LPAR, + LSQB, + MINEQUAL, + MINUS, + N_TOKENS, + NAME, + NEWLINE, + NL, + NOTEQUAL, + NT_OFFSET, + NUMBER, + OP, + PERCENT, + PERCENTEQUAL, + PLUS, + PLUSEQUAL, + RARROW, + RBRACE, + RIGHTSHIFT, + RIGHTSHIFTEQUAL, + RPAR, + RSQB, + SEMI, + SLASH, + SLASHEQUAL, + SOFT_KEYWORD, + STAR, + STAREQUAL, + STRING, + TILDE, + TYPE_COMMENT, + TYPE_IGNORE, + VBAR, + VBAREQUAL, + tok_name, +) + +__all__ = [ + "tok_name", + "ISTERMINAL", + "ISNONTERMINAL", + "ISEOF", + "ENDMARKER", + "NAME", + "NUMBER", + "STRING", + "NEWLINE", + "INDENT", + "DEDENT", + "LPAR", + "RPAR", + "LSQB", + "RSQB", + "COLON", + "COMMA", + "SEMI", + "PLUS", + "MINUS", + "STAR", + "SLASH", + "VBAR", + "AMPER", + "LESS", + "GREATER", + "EQUAL", + "DOT", + "PERCENT", + "LBRACE", + "RBRACE", + "EQEQUAL", + "NOTEQUAL", + "LESSEQUAL", + "GREATEREQUAL", + "TILDE", + "CIRCUMFLEX", + "LEFTSHIFT", + "RIGHTSHIFT", + "DOUBLESTAR", + "PLUSEQUAL", + "MINEQUAL", + "STAREQUAL", + "SLASHEQUAL", + "PERCENTEQUAL", + "AMPEREQUAL", + "VBAREQUAL", + "CIRCUMFLEXEQUAL", + "LEFTSHIFTEQUAL", + "RIGHTSHIFTEQUAL", + "DOUBLESTAREQUAL", + "DOUBLESLASH", + "DOUBLESLASHEQUAL", + "AT", + "ATEQUAL", + "RARROW", + "ELLIPSIS", + "COLONEQUAL", + "OP", + "AWAIT", + "ASYNC", + "TYPE_IGNORE", + "TYPE_COMMENT", + "SOFT_KEYWORD", + "ERRORTOKEN", + "COMMENT", + "NL", + "ENCODING", + "N_TOKENS", + "NT_OFFSET", + "tokenize", + "generate_tokens", + "detect_encoding", + "untokenize", + "TokenInfo", +] + +cookie_re = re.compile(r"^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)", re.ASCII) +blank_re = re.compile(br"^[ \t\f]*(?:[#\r\n]|$)", re.ASCII) + + +class TokenInfo(collections.namedtuple("TokenInfo", "type string start end line")): def __repr__(self): - annotated_type = '%d (%s)' % (self.type, tok_name[self.type]) - return ('TokenInfo(type=%s, string=%r, start=%r, end=%r, line=%r)' % - self._replace(type=annotated_type)) + annotated_type = "%d (%s)" % (self.type, tok_name[self.type]) + return ( + "TokenInfo(type=%s, string=%r, start=%r, end=%r, line=%r)" + % self._replace(type=annotated_type) + ) @property def exact_type(self): @@ -54,28 +208,38 @@ def exact_type(self): else: return self.type -def group(*choices): return '(' + '|'.join(choices) + ')' -def any(*choices): return group(*choices) + '*' -def maybe(*choices): return group(*choices) + '?' + +def group(*choices): + return "(" + "|".join(choices) + ")" + + +def any(*choices): + return group(*choices) + "*" + + +def maybe(*choices): + return group(*choices) + "?" + # Note: we use unicode matching for names ("\w") but ascii matching for # number literals. -Whitespace = r'[ \f\t]*' -Comment = r'#[^\r\n]*' -Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment) -Name = r'\w+' - -Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+' -Binnumber = r'0[bB](?:_?[01])+' -Octnumber = r'0[oO](?:_?[0-7])+' -Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)' +Whitespace = r"[ \f\t]*" +Comment = r"#[^\r\n]*" +Ignore = Whitespace + any(r"\\\r?\n" + Whitespace) + maybe(Comment) +Name = r"\w+" + +Hexnumber = r"0[xX](?:_?[0-9a-fA-F])+" +Binnumber = r"0[bB](?:_?[01])+" +Octnumber = r"0[oO](?:_?[0-7])+" +Decnumber = r"(?:0(?:_?0)*|[1-9](?:_?[0-9])*)" Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) -Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*' -Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?', - r'\.[0-9](?:_?[0-9])*') + maybe(Exponent) -Expfloat = r'[0-9](?:_?[0-9])*' + Exponent +Exponent = r"[eE][-+]?[0-9](?:_?[0-9])*" +Pointfloat = group( + r"[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?", r"\.[0-9](?:_?[0-9])*" +) + maybe(Exponent) +Expfloat = r"[0-9](?:_?[0-9])*" + Exponent Floatnumber = group(Pointfloat, Expfloat) -Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]') +Imagnumber = group(r"[0-9](?:_?[0-9])*[jJ]", Floatnumber + r"[jJ]") Number = group(Imagnumber, Floatnumber, Intnumber) # Return the empty string, plus all of the valid string prefixes. @@ -83,21 +247,23 @@ def _all_string_prefixes(): # The valid string prefixes. Only contain the lower case versions, # and don't contain any permutations (include 'fr', but not # 'rf'). The various permutations will be generated. - _valid_string_prefixes = ['b', 'r', 'u', 'f', 'br', 'fr'] + _valid_string_prefixes = ["b", "r", "u", "f", "br", "fr"] # if we add binary f-strings, add: ['fb', 'fbr'] - result = {''} + result = {""} for prefix in _valid_string_prefixes: for t in _itertools.permutations(prefix): # create a list with upper and lower versions of each # character for u in _itertools.product(*[(c, c.upper()) for c in t]): - result.add(''.join(u)) + result.add("".join(u)) return result + @functools.lru_cache def _compile(expr): return re.compile(expr, re.UNICODE) + # Note that since _all_string_prefixes includes the empty string, # StringPrefix can be the empty string (making it optional). StringPrefix = group(*_all_string_prefixes()) @@ -112,24 +278,26 @@ def _compile(expr): Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' Triple = group(StringPrefix + "'''", StringPrefix + '"""') # Single-line ' or " string. -String = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'", - StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"') +String = group( + StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'", + StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"', +) # Sorting in reverse order puts the long operators before their prefixes. # Otherwise if = came before ==, == would get recognized as two instances # of =. Special = group(*map(re.escape, sorted(EXACT_TOKEN_TYPES, reverse=True))) -Funny = group(r'\r?\n', Special) +Funny = group(r"\r?\n", Special) PlainToken = group(Number, Funny, String, Name) Token = Ignore + PlainToken # First (or only) line of ' or " string. -ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + - group("'", r'\\\r?\n'), - StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + - group('"', r'\\\r?\n')) -PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple) +ContStr = group( + StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + group("'", r"\\\r?\n"), + StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + group('"', r"\\\r?\n"), +) +PseudoExtras = group(r"\\\r?\n|\Z", Comment, Triple) PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name) # For a given string prefix plus quotes, endpats maps it to a regex @@ -156,13 +324,16 @@ def _compile(expr): tabsize = 8 -class TokenError(Exception): pass -class StopTokenizing(Exception): pass +class TokenError(Exception): + pass -class Untokenizer: +class StopTokenizing(Exception): + pass + +class Untokenizer: def __init__(self): self.tokens = [] self.prev_row = 1 @@ -172,8 +343,11 @@ def __init__(self): def add_whitespace(self, start): row, col = start if row < self.prev_row or row == self.prev_row and col < self.prev_col: - raise ValueError("start ({},{}) precedes previous end ({},{})" - .format(row, col, self.prev_row, self.prev_col)) + raise ValueError( + "start ({},{}) precedes previous end ({},{})".format( + row, col, self.prev_row, self.prev_col + ) + ) row_offset = row - self.prev_row if row_offset: self.tokens.append("\\\n" * row_offset) @@ -232,12 +406,12 @@ def compat(self, token, iterable): continue if toknum in (NAME, NUMBER): - tokval += ' ' + tokval += " " # Insert a space between two consecutive strings if toknum == STRING: if prevstring: - tokval = ' ' + tokval + tokval = " " + tokval prevstring = True else: prevstring = False @@ -289,11 +463,13 @@ def _get_normal_name(orig_enc): enc = orig_enc[:12].lower().replace("_", "-") if enc == "utf-8" or enc.startswith("utf-8-"): return "utf-8" - if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \ - enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")): + if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or enc.startswith( + ("latin-1-", "iso-8859-1-", "iso-latin-1-") + ): return "iso-8859-1" return orig_enc + def detect_encoding(readline): """ The detect_encoding() function is used to detect the encoding that should @@ -317,23 +493,24 @@ def detect_encoding(readline): filename = None bom_found = False encoding = None - default = 'utf-8' + default = "utf-8" + def read_or_stop(): try: return readline() except StopIteration: - return b'' + return b"" def find_cookie(line): try: # Decode as UTF-8. Either the line is an encoding declaration, # in which case it should be pure ASCII, or it must be UTF-8 # per default encoding. - line_string = line.decode('utf-8') + line_string = line.decode("utf-8") except UnicodeDecodeError: msg = "invalid or missing encoding declaration" if filename is not None: - msg = '{} for {!r}'.format(msg, filename) + msg = "{} for {!r}".format(msg, filename) raise SyntaxError(msg) match = cookie_re.match(line_string) @@ -347,26 +524,25 @@ def find_cookie(line): if filename is None: msg = "unknown encoding: " + encoding else: - msg = "unknown encoding for {!r}: {}".format(filename, - encoding) + msg = "unknown encoding for {!r}: {}".format(filename, encoding) raise SyntaxError(msg) if bom_found: - if encoding != 'utf-8': + if encoding != "utf-8": # This behaviour mimics the Python interpreter if filename is None: - msg = 'encoding problem: utf-8' + msg = "encoding problem: utf-8" else: - msg = 'encoding problem for {!r}: utf-8'.format(filename) + msg = "encoding problem for {!r}: utf-8".format(filename) raise SyntaxError(msg) - encoding += '-sig' + encoding += "-sig" return encoding first = read_or_stop() if first.startswith(BOM_UTF8): bom_found = True first = first[3:] - default = 'utf-8-sig' + default = "utf-8-sig" if not first: return default, [] @@ -391,12 +567,12 @@ def open(filename): """Open a file in read only mode using the encoding detected by detect_encoding(). """ - buffer = _builtin_open(filename, 'rb') + buffer = _builtin_open(filename, "rb") try: encoding, lines = detect_encoding(buffer.readline) buffer.seek(0) text = TextIOWrapper(buffer, encoding, line_buffering=True) - text.mode = 'r' + text.mode = "r" return text except: buffer.close() @@ -430,8 +606,8 @@ def tokenize(readline): def _tokenize(readline, encoding): lnum = parenlev = continued = 0 - numchars = '0123456789' - contstr, needcont = '', 0 + numchars = "0123456789" + contstr, needcont = "", 0 contline = None indents = [0] @@ -439,10 +615,10 @@ def _tokenize(readline, encoding): if encoding == "utf-8-sig": # BOM will already have been stripped. encoding = "utf-8" - yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '') - last_line = b'' - line = b'' - while True: # loop over lines in stream + yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), "") + last_line = b"" + line = b"" + while True: # loop over lines in stream try: # We capture the value of the line variable here because # readline uses the empty string '' to signal end of input, @@ -451,27 +627,29 @@ def _tokenize(readline, encoding): last_line = line line = readline() except StopIteration: - line = b'' + line = b"" if encoding is not None: line = line.decode(encoding) lnum += 1 pos, max = 0, len(line) - if contstr: # continued string + if contstr: # continued string if not line: raise TokenError("EOF in multi-line string", strstart) endmatch = endprog.match(line) if endmatch: pos = end = endmatch.end(0) - yield TokenInfo(STRING, contstr + line[:end], - strstart, (lnum, end), contline + line) - contstr, needcont = '', 0 + yield TokenInfo( + STRING, contstr + line[:end], strstart, (lnum, end), contline + line + ) + contstr, needcont = "", 0 contline = None - elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n': - yield TokenInfo(ERRORTOKEN, contstr + line, - strstart, (lnum, len(line)), contline) - contstr = '' + elif needcont and line[-2:] != "\\\n" and line[-3:] != "\\\r\n": + yield TokenInfo( + ERRORTOKEN, contstr + line, strstart, (lnum, len(line)), contline + ) + contstr = "" contline = None continue else: @@ -480,14 +658,15 @@ def _tokenize(readline, encoding): continue elif parenlev == 0 and not continued: # new statement - if not line: break + if not line: + break column = 0 - while pos < max: # measure leading whitespace - if line[pos] == ' ': + while pos < max: # measure leading whitespace + if line[pos] == " ": column += 1 - elif line[pos] == '\t': - column = (column//tabsize + 1)*tabsize - elif line[pos] == '\f': + elif line[pos] == "\t": + column = (column // tabsize + 1) * tabsize + elif line[pos] == "\f": column = 0 else: break @@ -495,65 +674,71 @@ def _tokenize(readline, encoding): if pos == max: break - if line[pos] in '#\r\n': # skip comments or blank lines - if line[pos] == '#': - comment_token = line[pos:].rstrip('\r\n') - yield TokenInfo(COMMENT, comment_token, - (lnum, pos), (lnum, pos + len(comment_token)), line) + if line[pos] in "#\r\n": # skip comments or blank lines + if line[pos] == "#": + comment_token = line[pos:].rstrip("\r\n") + yield TokenInfo( + COMMENT, + comment_token, + (lnum, pos), + (lnum, pos + len(comment_token)), + line, + ) pos += len(comment_token) - yield TokenInfo(NL, line[pos:], - (lnum, pos), (lnum, len(line)), line) + yield TokenInfo(NL, line[pos:], (lnum, pos), (lnum, len(line)), line) continue - if column > indents[-1]: # count indents or dedents + if column > indents[-1]: # count indents or dedents indents.append(column) yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line) while column < indents[-1]: if column not in indents: raise IndentationError( "unindent does not match any outer indentation level", - ("", lnum, pos, line)) + ("", lnum, pos, line), + ) indents = indents[:-1] - yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line) + yield TokenInfo(DEDENT, "", (lnum, pos), (lnum, pos), line) - else: # continued statement + else: # continued statement if not line: raise TokenError("EOF in multi-line statement", (lnum, 0)) continued = 0 while pos < max: pseudomatch = _compile(PseudoToken).match(line, pos) - if pseudomatch: # scan for tokens + if pseudomatch: # scan for tokens start, end = pseudomatch.span(1) spos, epos, pos = (lnum, start), (lnum, end), end if start == end: continue token, initial = line[start:end], line[start] - if (initial in numchars or # ordinary number - (initial == '.' and token != '.' and token != '...')): + if initial in numchars or ( # ordinary number + initial == "." and token != "." and token != "..." + ): yield TokenInfo(NUMBER, token, spos, epos, line) - elif initial in '\r\n': + elif initial in "\r\n": if parenlev > 0: yield TokenInfo(NL, token, spos, epos, line) else: yield TokenInfo(NEWLINE, token, spos, epos, line) - elif initial == '#': + elif initial == "#": assert not token.endswith("\n") yield TokenInfo(COMMENT, token, spos, epos, line) elif token in triple_quoted: endprog = _compile(endpats[token]) endmatch = endprog.match(line, pos) - if endmatch: # all on one line + if endmatch: # all on one line pos = endmatch.end(0) token = line[start:pos] yield TokenInfo(STRING, token, spos, (lnum, pos), line) else: - strstart = (lnum, start) # multiple lines + strstart = (lnum, start) # multiple lines contstr = line[start:] contline = line break @@ -568,10 +753,12 @@ def _tokenize(readline, encoding): # Note that initial == token[:1]. # Also note that single quote checking must come after # triple quote checking (above). - elif (initial in single_quoted or - token[:2] in single_quoted or - token[:3] in single_quoted): - if token[-1] == '\n': # continued string + elif ( + initial in single_quoted + or token[:2] in single_quoted + or token[:3] in single_quoted + ): + if token[-1] == "\n": # continued string strstart = (lnum, start) # Again, using the first 3 chars of the # token. This is looking for the matching end @@ -579,36 +766,45 @@ def _tokenize(readline, encoding): # character. So it's really looking for # endpats["'"] or endpats['"'], by trying to # skip string prefix characters, if any. - endprog = _compile(endpats.get(initial) or - endpats.get(token[1]) or - endpats.get(token[2])) + endprog = _compile( + endpats.get(initial) + or endpats.get(token[1]) + or endpats.get(token[2]) + ) contstr, needcont = line[start:], 1 contline = line break - else: # ordinary string + else: # ordinary string yield TokenInfo(STRING, token, spos, epos, line) - elif initial.isidentifier(): # ordinary name + elif initial.isidentifier(): # ordinary name yield TokenInfo(NAME, token, spos, epos, line) - elif initial == '\\': # continued stmt + elif initial == "\\": # continued stmt continued = 1 else: - if initial in '([{': + if initial in "([{": parenlev += 1 - elif initial in ')]}': + elif initial in ")]}": parenlev -= 1 yield TokenInfo(OP, token, spos, epos, line) else: - yield TokenInfo(ERRORTOKEN, line[pos], - (lnum, pos), (lnum, pos+1), line) + yield TokenInfo( + ERRORTOKEN, line[pos], (lnum, pos), (lnum, pos + 1), line + ) pos += 1 # Add an implicit NEWLINE if the input doesn't end in one - if last_line and last_line[-1] not in '\r\n' and not last_line.strip().startswith("#"): - yield TokenInfo(NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '') - for indent in indents[1:]: # pop remaining indent levels - yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '') - yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '') + if ( + last_line + and last_line[-1] not in "\r\n" + and not last_line.strip().startswith("#") + ): + yield TokenInfo( + NEWLINE, "", (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), "" + ) + for indent in indents[1:]: # pop remaining indent levels + yield TokenInfo(DEDENT, "", (lnum, 0), (lnum, 0), "") + yield TokenInfo(ENDMARKER, "", (lnum, 0), (lnum, 0), "") def generate_tokens(readline): @@ -619,13 +815,14 @@ def generate_tokens(readline): """ return _tokenize(readline, None) + def main(): import argparse # Helper error handling routines def perror(message): sys.stderr.write(message) - sys.stderr.write('\n') + sys.stderr.write("\n") def error(message, filename=None, location=None): if location: @@ -638,19 +835,27 @@ def error(message, filename=None, location=None): sys.exit(1) # Parse the arguments and options - parser = argparse.ArgumentParser(prog='python -m tokenize') - parser.add_argument(dest='filename', nargs='?', - metavar='filename.py', - help='the file to tokenize; defaults to stdin') - parser.add_argument('-e', '--exact', dest='exact', action='store_true', - help='display token names using the exact type') + parser = argparse.ArgumentParser(prog="python -m tokenize") + parser.add_argument( + dest="filename", + nargs="?", + metavar="filename.py", + help="the file to tokenize; defaults to stdin", + ) + parser.add_argument( + "-e", + "--exact", + dest="exact", + action="store_true", + help="display token names using the exact type", + ) args = parser.parse_args() try: # Tokenize the input if args.filename: filename = args.filename - with _builtin_open(filename, 'rb') as f: + with _builtin_open(filename, "rb") as f: tokens = list(tokenize(f.readline)) else: filename = "" @@ -662,8 +867,7 @@ def error(message, filename=None, location=None): if args.exact: token_type = token.exact_type token_range = "%d,%d-%d,%d:" % (token.start + token.end) - print("%-20s%-15s%-15r" % - (token_range, tok_name[token_type], token.string)) + print("%-20s%-15s%-15r" % (token_range, tok_name[token_type], token.string)) except IndentationError as err: line, column = err.args[1][1:3] error(err.args[0], filename, (line, column)) @@ -680,9 +884,11 @@ def error(message, filename=None, location=None): perror("unexpected error: %s" % err) raise + def _generate_tokens_from_c_tokenizer(source): """Tokenize a source reading Python code as unicode strings using the internal C tokenizer""" import _tokenize as c_tokenizer + for info in c_tokenizer.TokenizerIter(source): tok, type, lineno, end_lineno, col_off, end_col_off, line = info yield TokenInfo(type, tok, (lineno, col_off), (end_lineno, end_col_off), line) From f86129f3665093a341065f05545fd8dd27c90466 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 15:58:28 +0100 Subject: [PATCH 16/27] flake8 _tokenize --- src/twisted/persisted/_tokenize.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/twisted/persisted/_tokenize.py b/src/twisted/persisted/_tokenize.py index 8d42d618950..9ee4265c451 100644 --- a/src/twisted/persisted/_tokenize.py +++ b/src/twisted/persisted/_tokenize.py @@ -242,6 +242,7 @@ def maybe(*choices): Imagnumber = group(r"[0-9](?:_?[0-9])*[jJ]", Floatnumber + r"[jJ]") Number = group(Imagnumber, Floatnumber, Intnumber) + # Return the empty string, plus all of the valid string prefixes. def _all_string_prefixes(): # The valid string prefixes. Only contain the lower case versions, @@ -518,7 +519,7 @@ def find_cookie(line): return None encoding = _get_normal_name(match.group(1)) try: - codec = lookup(encoding) + lookup(encoding) except LookupError: # This behaviour mimics the Python interpreter if filename is None: @@ -574,7 +575,7 @@ def open(filename): text = TextIOWrapper(buffer, encoding, line_buffering=True) text.mode = "r" return text - except: + except BaseException: buffer.close() raise @@ -605,6 +606,8 @@ def tokenize(readline): def _tokenize(readline, encoding): + strstart = None + endprog = None lnum = parenlev = continued = 0 numchars = "0123456789" contstr, needcont = "", 0 From c2611595e1f460e139eb1b1ff945bca16c426e78 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 16:03:33 +0100 Subject: [PATCH 17/27] Update src/twisted/persisted/aot.py --- src/twisted/persisted/aot.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/twisted/persisted/aot.py b/src/twisted/persisted/aot.py index 57bf56d95e0..9089dad54da 100644 --- a/src/twisted/persisted/aot.py +++ b/src/twisted/persisted/aot.py @@ -17,6 +17,8 @@ from twisted.persisted import crefutil from twisted.python import log, reflect from twisted.python.compat import _constructMethod +# tokenize from py3.11 is vendored to work around https://github.com/python/cpython/issues/105238 +# on 3.12 from ._tokenize import generate_tokens as tokenize ########################### From 985e695f0b30462130d7ffa9d2e75d4b79c90679 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 16 Aug 2023 15:05:09 +0000 Subject: [PATCH 18/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/twisted/persisted/aot.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/twisted/persisted/aot.py b/src/twisted/persisted/aot.py index 9089dad54da..6eee42ddeb7 100644 --- a/src/twisted/persisted/aot.py +++ b/src/twisted/persisted/aot.py @@ -17,8 +17,9 @@ from twisted.persisted import crefutil from twisted.python import log, reflect from twisted.python.compat import _constructMethod + # tokenize from py3.11 is vendored to work around https://github.com/python/cpython/issues/105238 -# on 3.12 +# on 3.12 from ._tokenize import generate_tokens as tokenize ########################### From 2ed22ac87706981085ec675b189098f6e0405eb7 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 16:16:53 +0100 Subject: [PATCH 19/27] fix _tokenize mypy --- src/twisted/persisted/_tokenize.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/twisted/persisted/_tokenize.py b/src/twisted/persisted/_tokenize.py index 9ee4265c451..64358b06ff4 100644 --- a/src/twisted/persisted/_tokenize.py +++ b/src/twisted/persisted/_tokenize.py @@ -287,7 +287,7 @@ def _compile(expr): # Sorting in reverse order puts the long operators before their prefixes. # Otherwise if = came before ==, == would get recognized as two instances # of =. -Special = group(*map(re.escape, sorted(EXACT_TOKEN_TYPES, reverse=True))) +Special = group(*(re.escape(x) for x in sorted(EXACT_TOKEN_TYPES, reverse=True))) Funny = group(r"\r?\n", Special) PlainToken = group(Number, Funny, String, Name) @@ -888,14 +888,5 @@ def error(message, filename=None, location=None): raise -def _generate_tokens_from_c_tokenizer(source): - """Tokenize a source reading Python code as unicode strings using the internal C tokenizer""" - import _tokenize as c_tokenizer - - for info in c_tokenizer.TokenizerIter(source): - tok, type, lineno, end_lineno, col_off, end_col_off, line = info - yield TokenInfo(type, tok, (lineno, col_off), (end_lineno, end_col_off), line) - - if __name__ == "__main__": main() From dbb48d6c9229a87371980179a8327395ead7766a Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 16:19:28 +0100 Subject: [PATCH 20/27] fix tokenize on py3.7 --- src/twisted/persisted/_tokenize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/twisted/persisted/_tokenize.py b/src/twisted/persisted/_tokenize.py index 64358b06ff4..48de6d1c482 100644 --- a/src/twisted/persisted/_tokenize.py +++ b/src/twisted/persisted/_tokenize.py @@ -260,7 +260,7 @@ def _all_string_prefixes(): return result -@functools.lru_cache +@functools.lru_cache(None) def _compile(expr): return re.compile(expr, re.UNICODE) From c710f0b73ddddefbeb61165e8cfbae647737c409 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 16:21:34 +0100 Subject: [PATCH 21/27] warn about _tokenize re aot Co-authored-by: Adi Roiban --- src/twisted/persisted/_token.py | 4 ++++ src/twisted/persisted/_tokenize.py | 7 ++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/twisted/persisted/_token.py b/src/twisted/persisted/_token.py index 44f88c341c7..0a0453212f1 100644 --- a/src/twisted/persisted/_token.py +++ b/src/twisted/persisted/_token.py @@ -1,4 +1,8 @@ """ +FIXME:https://github.com/twisted/twisted/issues/3843 +This can be removed once t.persisted.aot is removed. +New code should not make use of this. + Token constants. vendored from https://github.com/python/cpython/blob/6b825c1b8a14460641ca6f1647d83005c68199aa/Lib/token.py Licence: https://docs.python.org/3/license.html diff --git a/src/twisted/persisted/_tokenize.py b/src/twisted/persisted/_tokenize.py index 48de6d1c482..9b568eb90ff 100644 --- a/src/twisted/persisted/_tokenize.py +++ b/src/twisted/persisted/_tokenize.py @@ -1,4 +1,9 @@ -"""Tokenization help for Python programs. +""" +FIXME:https://github.com/twisted/twisted/issues/3843 +This can be removed once t.persisted.aot is removed. +New code should not make use of this. + +Tokenization help for Python programs. vendored from https://github.com/python/cpython/blob/6b825c1b8a14460641ca6f1647d83005c68199aa/Lib/tokenize.py Licence: https://docs.python.org/3/license.html From 6610c4fac5d1cedd8e4a39a93d093a07508ec82e Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 16:22:12 +0100 Subject: [PATCH 22/27] Update src/twisted/test/test_task.py Co-authored-by: Adi Roiban --- src/twisted/test/test_task.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/twisted/test/test_task.py b/src/twisted/test/test_task.py index 61e53c9e113..96796d2b47b 100644 --- a/src/twisted/test/test_task.py +++ b/src/twisted/test/test_task.py @@ -467,8 +467,13 @@ def test_withCountFloatingPointBoundary(self): for x in range(count): clock.advance(interval) - # work around https://github.com/python/cpython/issues/100425 on py312 - def sum(items): + def sum_compat(items): + """ + Make sure the result is more precise. + On Python 3.11 or older this can be a float with ~ 0.00001 + in precision difference. + See: https://github.com/python/cpython/issues/100425 + """ total = 0.0 for item in items: total += item From 2eb25e73f186ffb2fe7332397e11ca0bd4473b59 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 16:22:44 +0100 Subject: [PATCH 23/27] Update test_task.py --- src/twisted/test/test_task.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/twisted/test/test_task.py b/src/twisted/test/test_task.py index 96796d2b47b..2b21225f428 100644 --- a/src/twisted/test/test_task.py +++ b/src/twisted/test/test_task.py @@ -482,7 +482,7 @@ def sum_compat(items): # There is still an epsilon of inaccuracy here; 0.1 is not quite # exactly 1/10 in binary, so we need to push our clock over the # threshold. - epsilon = timespan - sum([interval] * count) + epsilon = timespan - sum_compat([interval] * count) clock.advance(epsilon) secondsValue = clock.seconds() # The following two assertions are here to ensure that if the values of @@ -498,7 +498,7 @@ def sum_compat(items): f"{secondsValue} should be greater than or equal to {timespan}", ) - self.assertEqual(sum(accumulator), count) + self.assertEqual(sum_compat(accumulator), count) self.assertNotIn(0, accumulator) def test_withCountIntervalZero(self): From e38335d67dcbc446dcfcd93886abb66565eb8df1 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 16:23:04 +0100 Subject: [PATCH 24/27] Update src/twisted/trial/_synctest.py Co-authored-by: Adi Roiban --- src/twisted/trial/_synctest.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/twisted/trial/_synctest.py b/src/twisted/trial/_synctest.py index 74f9b7b4221..a0e57156599 100644 --- a/src/twisted/trial/_synctest.py +++ b/src/twisted/trial/_synctest.py @@ -1328,7 +1328,10 @@ def mktemp(self): ) if not os.path.exists(base): os.makedirs(base) - # workaround https://github.com/python/cpython/issues/51574 + # With 3.11 or older mkdtemp returns a relative path. + # With newer it is absolute. + # Here we make sure we always handle a relative path. + # See https://github.com/python/cpython/issues/51574 dirname = os.path.relpath(tempfile.mkdtemp("", "", base)) return os.path.join(dirname, "temp") From 592574ee69ca50163667beb05627b048ec620efe Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 16:23:13 +0100 Subject: [PATCH 25/27] Update src/twisted/test/test_persisted.py Co-authored-by: Adi Roiban --- src/twisted/test/test_persisted.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/twisted/test/test_persisted.py b/src/twisted/test/test_persisted.py index 1563bbfb452..37df754ac98 100644 --- a/src/twisted/test/test_persisted.py +++ b/src/twisted/test/test_persisted.py @@ -447,10 +447,16 @@ def test_circularTuple(self): self.assertEqual(oj[0][1], 4321) def testIndentify(self): + """ + The generated serialization is indented. + """ self.assertEqual( aot.jellyToSource({"hello": {"world": []}}), - "app={\n 'hello':{\n 'world':[],\n },\n }", - ) + """app={ + 'hello':{ + 'world':[], + }, + }""") class CrefUtilTests(TestCase): From 13270eb131f91569a15ebd0a0a327db1c1ee40ad Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 16 Aug 2023 15:25:04 +0000 Subject: [PATCH 26/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/twisted/test/test_persisted.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/twisted/test/test_persisted.py b/src/twisted/test/test_persisted.py index 37df754ac98..a53ea4eb6cc 100644 --- a/src/twisted/test/test_persisted.py +++ b/src/twisted/test/test_persisted.py @@ -456,7 +456,8 @@ def testIndentify(self): 'hello':{ 'world':[], }, - }""") + }""", + ) class CrefUtilTests(TestCase): From 61cae87c54d60773e1db9d6e39580dc32312102d Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 16 Aug 2023 16:37:03 +0100 Subject: [PATCH 27/27] fix testIndentify --- src/twisted/test/test_persisted.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/twisted/test/test_persisted.py b/src/twisted/test/test_persisted.py index a53ea4eb6cc..2f778da1577 100644 --- a/src/twisted/test/test_persisted.py +++ b/src/twisted/test/test_persisted.py @@ -7,6 +7,7 @@ import io import pickle import sys +import textwrap # Twisted Imports from twisted.persisted import aot, crefutil, styles @@ -452,11 +453,14 @@ def testIndentify(self): """ self.assertEqual( aot.jellyToSource({"hello": {"world": []}}), - """app={ - 'hello':{ - 'world':[], - }, - }""", + textwrap.dedent( + """\ + app={ + 'hello':{ + 'world':[], + }, + }""", + ), )