Skip to content

Commit

Permalink
Add custom JSON parser
Browse files Browse the repository at this point in the history
This expands on the stdlib parser, allowing additional number syntaxes,
literal dates & times, inline comments, trailing commas, and additional
customization hooks.
  • Loading branch information
wylee committed Jun 24, 2021
1 parent 3788bf7 commit 00e173b
Show file tree
Hide file tree
Showing 14 changed files with 898 additions and 237 deletions.
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ readme = "README.md"
keywords = ["django", "settings", "local", "env", "environment"]

packages = [
{ include = "local_settings", from = "src" }
{ include = "local_settings", from = "src" },
{ include = "jsonesque", from = "src" },
]

include = [
Expand Down
3 changes: 3 additions & 0 deletions src/jsonesque/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .decoder import decode, decode as loads # noqa: F401
from .encoder import encode, encode as dumps # noqa: F401
from .exc import DecodeError # noqa: F401
174 changes: 174 additions & 0 deletions src/jsonesque/decoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
"""JSONesque Decoder
In addition to standard JSON, the JSONesque decoder also supports/
handles the following:
- *All* scanning methods can be overridden if some additional
customization is required
- An object converter can be specified to convert plain Python dicts
parsed from JSON into specialized objects; by default, objects will
be converted to :class:`scanner.JSONObject`, which allows items to be
accessed with either dotted or bracket notation
- A fallback scanner method can be provided to handle additional types
of values
- Trailing commas
- Line comments starting with //
- Any valid Python int or float:
- Literal binary, octal, and hex values
- Underscore separators in numbers
- Unary plus operator
- Literal (unquoted) dates & times:
- 2021-06
- 2021-06-23
- 2021-06-23T12:00
- 2021-06-23T12:00Z
- 2021-06-23T12:00-07:00
- 12:00 (today's date at noon)
.. note:: For dates and times, when a time zone isn't specified, the
local time zone will be used.
Examples::
>>> decode("+1")
1
>>> decode("1_000")
1000
>>> decode("[0b11]")
[3]
>>> d = decode("2021-06-24")
>>> d.timetuple()[:5]
(2021, 6, 24, 0, 0)
>>> d.tzinfo
tzlocal()
>>> decode(" [1, 2 , 3 , ] ")
[1, 2, 3]
>>> decode("[[]]")
[[]]
"""
from typing import Any, Callable, Optional, Tuple, Union

from . import scanner
from .exc import ExtraneousData


def decode(
string: str,
*,
strict: bool = True,
scan_object: Callable = scanner.scan_object,
object_converter: Callable = scanner.JSONObject,
scan_array: Callable = scanner.scan_array,
scan_string: Callable = scanner.scan_string,
scan_date: Callable = scanner.scan_date,
scan_number: Callable = scanner.scan_number,
fallback_scanner: Optional[Callable] = None,
disable_extras: bool = False,
ignore_extra_data: bool = False,
) -> Union[Any, Tuple[Any, int]]:
"""Scan JSONesque string and return a Python object.
The type of the object is determined by the ``object_converter``
callable. By default, JSON objects are converted to simple Python
namespace objects that allow attributes to be accessed via dotted or
bracket notation. These objects can be converted to plain dicts with
``dict(obj)`` or you can use ``object_converter=None`` to get back a
plain dicts.
A different object converter can be passed to customize object
creation, perhaps based on a type field::
def converter(obj):
if "__type__" in obj:
# Convert to type based on __type__
...
# Don't convert since no type was specified
return obj
When errors are encountered, various kinds of exceptions are
thrown. These all derive from :class:`DecodeError`, which in turn
derives from the builtin :class:`ValueError`.
Examples::
>>> import arrow, math
>>> decode("") is None
True
>>> d = decode("2021-06-23")
>>> d.timetuple()[:5]
(2021, 6, 23, 0, 0)
>>> t = arrow.now()
>>> d = decode("12:00")
>>> d.timetuple()[:5] == (t.year, t.month, t.day, 12, 0)
True
>>> d = decode("2021-06-23T12:00")
>>> d.timetuple()[:6]
(2021, 6, 23, 12, 0, 0)
>>> decode("[inf, nan]")
[inf, nan]
>>> decode("E") == math.e
True
>>> (decode("π"), decode("PI")) == (math.pi, math.pi)
True
>>> (decode("τ"), decode("TAU")) == (math.tau, math.tau)
True
>>> decode("0"), decode("+0"), decode("-0"), decode("000")
(0, 0, 0, 0)
>>> decode("1"), decode("+1"), decode("-1")
(1, 1, -1)
>>> decode("1.0"), decode("+1.0"), decode("-1.0")
(1.0, 1.0, -1.0)
>>> decode("0b11"), decode("0o11"), decode("0x11")
(3, 9, 17)
>>> decode("{}", object_converter=None), decode("[]")
({}, [])
>>> decode("[0b11, 11, 0x11]")
[3, 11, 17]
When the ``ignore_extra_data`` flag is set, a tuple will be
returned containing 1) a Python object representing the part of the
JSON string that was successfully parsed and 2) the index in the
JSON string where the extra data starts. In most cases, extra data
indicates an error, but this functionality can be used intentionally
include extra data:
>>> decode('{} # ignored', object_converter=None, ignore_extra_data=True)
({}, 3)
An advanced/esoteric feature for use where additional customization
is required is the ``fallback_scanner``. This is a callable that
accepts a :class:`Scanner` instance, the JSON string, and the
current index/position and returns a Python value along with the
next index/position in the JSON string. See the scanners in
:mod:`jsonesque.scanner` for examples.
"""
instance = scanner.Scanner(
strict=strict,
scan_object=scan_object,
object_converter=object_converter,
scan_array=scan_array,
scan_string=scan_string,
scan_date=scan_date,
scan_number=scan_number,
disable_extras=disable_extras,
fallback_scanner=fallback_scanner,
)
obj, i = instance.scan(string)
if ignore_extra_data:
return obj, i
elif i != len(string):
raise ExtraneousData(string, i)
return obj
10 changes: 10 additions & 0 deletions src/jsonesque/encoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
"""JSONesque Encoder
For now, this just passes through to the stdlib
:class:`json.JSONEncoder` and :func:`json.dumps`.
"""
import json


encode = json.dumps
73 changes: 73 additions & 0 deletions src/jsonesque/exc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
class DecodeError(ValueError):
def __init__(self, string, position, message):
line = string.count("\n", 0, position) + 1
column = position - string.rfind("\n", 0, position)
super_message = (
f"{message} at line {line} column {column} (position {position})"
)
super().__init__(super_message)
self.string = string
self.message = message
self.position = position
self.line = line
self.column = column


class ExpectedBracket(DecodeError):
def __init__(self, string, position, bracket, message=None):
if message is None:
message = f"Expected bracket `{bracket}`"
super().__init__(string, position, message)
self.bracket = bracket


class ExpectedDelimiter(DecodeError):
def __init__(self, string, position, delimiter, message=None):
if message is None:
message = f"Expected delimiter `{delimiter}`"
super().__init__(string, position, message)
self.delimiter = delimiter


class ExpectedKey(DecodeError):
def __init__(self, string, position, message="Expected key"):
super().__init__(string, position, message)


class ExpectedValue(DecodeError):
def __init__(self, string, position, message="Expected value"):
super().__init__(string, position, message)


class ExtraneousData(DecodeError):
def __init__(
self,
string,
position,
message="Extraneous data (likely indicating a malformed JSON document)",
):
super().__init__(string, position, message)


class UnexpectedToken(DecodeError):
def __init__(self, string, position, token, message=None):
if message is None:
message = f"Unexpected token `{token}`"
super().__init__(string, position, message)
self.token = token


class UnknownToken(DecodeError):
def __init__(self, string, position, token, message=None):
if message is None:
message = f"Unknown token `{token}`"
super().__init__(string, position, message)
self.token = token


class UnmatchedBracket(DecodeError):
def __init__(self, string, bracket, position, message=None):
if message is None:
message = f"Unmatched bracket `{bracket}`"
super().__init__(string, position, message)
self.bracket = bracket
36 changes: 36 additions & 0 deletions src/jsonesque/obj.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import types


class JSONObject(types.SimpleNamespace):
"""A simple bucket of attributes.
JSON objects will be converted to this type by default.
.. note:: In cases where you generally need/want to work with dicts,
pass ``None`` as the ``object_converter`` to :class:`Scanner`
and/or its callers.
Items can be accessed via dotted or bracket notation::
>>> obj = JSONObject(x=1)
>>> obj.x
1
>>> obj["x"]
1
Objects can be converted dicts by calling `dict` on them::
>>> obj = JSONObject(x=1)
>>> dict(obj)
{'x': 1}
"""

def __getitem__(self, name):
return self.__dict__.__getitem__(name)

def __setitem__(self, name, value):
self.__dict__[name] = value

def __iter__(self):
return iter(self.__dict__.items())

0 comments on commit 00e173b

Please sign in to comment.