-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This expands on the stdlib parser, allowing additional number syntaxes, literal dates & times, inline comments, trailing commas, and additional customization hooks.
- Loading branch information
Showing
14 changed files
with
898 additions
and
237 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from .decoder import decode, decode as loads # noqa: F401 | ||
from .encoder import encode, encode as dumps # noqa: F401 | ||
from .exc import DecodeError # noqa: F401 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,174 @@ | ||
"""JSONesque Decoder | ||
In addition to standard JSON, the JSONesque decoder also supports/ | ||
handles the following: | ||
- *All* scanning methods can be overridden if some additional | ||
customization is required | ||
- An object converter can be specified to convert plain Python dicts | ||
parsed from JSON into specialized objects; by default, objects will | ||
be converted to :class:`scanner.JSONObject`, which allows items to be | ||
accessed with either dotted or bracket notation | ||
- A fallback scanner method can be provided to handle additional types | ||
of values | ||
- Trailing commas | ||
- Line comments starting with // | ||
- Any valid Python int or float: | ||
- Literal binary, octal, and hex values | ||
- Underscore separators in numbers | ||
- Unary plus operator | ||
- Literal (unquoted) dates & times: | ||
- 2021-06 | ||
- 2021-06-23 | ||
- 2021-06-23T12:00 | ||
- 2021-06-23T12:00Z | ||
- 2021-06-23T12:00-07:00 | ||
- 12:00 (today's date at noon) | ||
.. note:: For dates and times, when a time zone isn't specified, the | ||
local time zone will be used. | ||
Examples:: | ||
>>> decode("+1") | ||
1 | ||
>>> decode("1_000") | ||
1000 | ||
>>> decode("[0b11]") | ||
[3] | ||
>>> d = decode("2021-06-24") | ||
>>> d.timetuple()[:5] | ||
(2021, 6, 24, 0, 0) | ||
>>> d.tzinfo | ||
tzlocal() | ||
>>> decode(" [1, 2 , 3 , ] ") | ||
[1, 2, 3] | ||
>>> decode("[[]]") | ||
[[]] | ||
""" | ||
from typing import Any, Callable, Optional, Tuple, Union | ||
|
||
from . import scanner | ||
from .exc import ExtraneousData | ||
|
||
|
||
def decode( | ||
string: str, | ||
*, | ||
strict: bool = True, | ||
scan_object: Callable = scanner.scan_object, | ||
object_converter: Callable = scanner.JSONObject, | ||
scan_array: Callable = scanner.scan_array, | ||
scan_string: Callable = scanner.scan_string, | ||
scan_date: Callable = scanner.scan_date, | ||
scan_number: Callable = scanner.scan_number, | ||
fallback_scanner: Optional[Callable] = None, | ||
disable_extras: bool = False, | ||
ignore_extra_data: bool = False, | ||
) -> Union[Any, Tuple[Any, int]]: | ||
"""Scan JSONesque string and return a Python object. | ||
The type of the object is determined by the ``object_converter`` | ||
callable. By default, JSON objects are converted to simple Python | ||
namespace objects that allow attributes to be accessed via dotted or | ||
bracket notation. These objects can be converted to plain dicts with | ||
``dict(obj)`` or you can use ``object_converter=None`` to get back a | ||
plain dicts. | ||
A different object converter can be passed to customize object | ||
creation, perhaps based on a type field:: | ||
def converter(obj): | ||
if "__type__" in obj: | ||
# Convert to type based on __type__ | ||
... | ||
# Don't convert since no type was specified | ||
return obj | ||
When errors are encountered, various kinds of exceptions are | ||
thrown. These all derive from :class:`DecodeError`, which in turn | ||
derives from the builtin :class:`ValueError`. | ||
Examples:: | ||
>>> import arrow, math | ||
>>> decode("") is None | ||
True | ||
>>> d = decode("2021-06-23") | ||
>>> d.timetuple()[:5] | ||
(2021, 6, 23, 0, 0) | ||
>>> t = arrow.now() | ||
>>> d = decode("12:00") | ||
>>> d.timetuple()[:5] == (t.year, t.month, t.day, 12, 0) | ||
True | ||
>>> d = decode("2021-06-23T12:00") | ||
>>> d.timetuple()[:6] | ||
(2021, 6, 23, 12, 0, 0) | ||
>>> decode("[inf, nan]") | ||
[inf, nan] | ||
>>> decode("E") == math.e | ||
True | ||
>>> (decode("π"), decode("PI")) == (math.pi, math.pi) | ||
True | ||
>>> (decode("τ"), decode("TAU")) == (math.tau, math.tau) | ||
True | ||
>>> decode("0"), decode("+0"), decode("-0"), decode("000") | ||
(0, 0, 0, 0) | ||
>>> decode("1"), decode("+1"), decode("-1") | ||
(1, 1, -1) | ||
>>> decode("1.0"), decode("+1.0"), decode("-1.0") | ||
(1.0, 1.0, -1.0) | ||
>>> decode("0b11"), decode("0o11"), decode("0x11") | ||
(3, 9, 17) | ||
>>> decode("{}", object_converter=None), decode("[]") | ||
({}, []) | ||
>>> decode("[0b11, 11, 0x11]") | ||
[3, 11, 17] | ||
When the ``ignore_extra_data`` flag is set, a tuple will be | ||
returned containing 1) a Python object representing the part of the | ||
JSON string that was successfully parsed and 2) the index in the | ||
JSON string where the extra data starts. In most cases, extra data | ||
indicates an error, but this functionality can be used intentionally | ||
include extra data: | ||
>>> decode('{} # ignored', object_converter=None, ignore_extra_data=True) | ||
({}, 3) | ||
An advanced/esoteric feature for use where additional customization | ||
is required is the ``fallback_scanner``. This is a callable that | ||
accepts a :class:`Scanner` instance, the JSON string, and the | ||
current index/position and returns a Python value along with the | ||
next index/position in the JSON string. See the scanners in | ||
:mod:`jsonesque.scanner` for examples. | ||
""" | ||
instance = scanner.Scanner( | ||
strict=strict, | ||
scan_object=scan_object, | ||
object_converter=object_converter, | ||
scan_array=scan_array, | ||
scan_string=scan_string, | ||
scan_date=scan_date, | ||
scan_number=scan_number, | ||
disable_extras=disable_extras, | ||
fallback_scanner=fallback_scanner, | ||
) | ||
obj, i = instance.scan(string) | ||
if ignore_extra_data: | ||
return obj, i | ||
elif i != len(string): | ||
raise ExtraneousData(string, i) | ||
return obj |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
"""JSONesque Encoder | ||
For now, this just passes through to the stdlib | ||
:class:`json.JSONEncoder` and :func:`json.dumps`. | ||
""" | ||
import json | ||
|
||
|
||
encode = json.dumps |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
class DecodeError(ValueError): | ||
def __init__(self, string, position, message): | ||
line = string.count("\n", 0, position) + 1 | ||
column = position - string.rfind("\n", 0, position) | ||
super_message = ( | ||
f"{message} at line {line} column {column} (position {position})" | ||
) | ||
super().__init__(super_message) | ||
self.string = string | ||
self.message = message | ||
self.position = position | ||
self.line = line | ||
self.column = column | ||
|
||
|
||
class ExpectedBracket(DecodeError): | ||
def __init__(self, string, position, bracket, message=None): | ||
if message is None: | ||
message = f"Expected bracket `{bracket}`" | ||
super().__init__(string, position, message) | ||
self.bracket = bracket | ||
|
||
|
||
class ExpectedDelimiter(DecodeError): | ||
def __init__(self, string, position, delimiter, message=None): | ||
if message is None: | ||
message = f"Expected delimiter `{delimiter}`" | ||
super().__init__(string, position, message) | ||
self.delimiter = delimiter | ||
|
||
|
||
class ExpectedKey(DecodeError): | ||
def __init__(self, string, position, message="Expected key"): | ||
super().__init__(string, position, message) | ||
|
||
|
||
class ExpectedValue(DecodeError): | ||
def __init__(self, string, position, message="Expected value"): | ||
super().__init__(string, position, message) | ||
|
||
|
||
class ExtraneousData(DecodeError): | ||
def __init__( | ||
self, | ||
string, | ||
position, | ||
message="Extraneous data (likely indicating a malformed JSON document)", | ||
): | ||
super().__init__(string, position, message) | ||
|
||
|
||
class UnexpectedToken(DecodeError): | ||
def __init__(self, string, position, token, message=None): | ||
if message is None: | ||
message = f"Unexpected token `{token}`" | ||
super().__init__(string, position, message) | ||
self.token = token | ||
|
||
|
||
class UnknownToken(DecodeError): | ||
def __init__(self, string, position, token, message=None): | ||
if message is None: | ||
message = f"Unknown token `{token}`" | ||
super().__init__(string, position, message) | ||
self.token = token | ||
|
||
|
||
class UnmatchedBracket(DecodeError): | ||
def __init__(self, string, bracket, position, message=None): | ||
if message is None: | ||
message = f"Unmatched bracket `{bracket}`" | ||
super().__init__(string, position, message) | ||
self.bracket = bracket |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import types | ||
|
||
|
||
class JSONObject(types.SimpleNamespace): | ||
"""A simple bucket of attributes. | ||
JSON objects will be converted to this type by default. | ||
.. note:: In cases where you generally need/want to work with dicts, | ||
pass ``None`` as the ``object_converter`` to :class:`Scanner` | ||
and/or its callers. | ||
Items can be accessed via dotted or bracket notation:: | ||
>>> obj = JSONObject(x=1) | ||
>>> obj.x | ||
1 | ||
>>> obj["x"] | ||
1 | ||
Objects can be converted dicts by calling `dict` on them:: | ||
>>> obj = JSONObject(x=1) | ||
>>> dict(obj) | ||
{'x': 1} | ||
""" | ||
|
||
def __getitem__(self, name): | ||
return self.__dict__.__getitem__(name) | ||
|
||
def __setitem__(self, name, value): | ||
self.__dict__[name] = value | ||
|
||
def __iter__(self): | ||
return iter(self.__dict__.items()) |
Oops, something went wrong.