Permalink
Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 278 lines (247 sloc) 8.89 KB
"""
Implementation of JSONDecoder
"""
import re
from scanner import Scanner, pattern
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
def _floatconstants():
import struct
import sys
_BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
if sys.byteorder != 'big':
# NOTE(kgibbs): These lines must be added to make this file work under
# Python 2.2, which is commonly used at Google.
bytes_list = list(_BYTES)
bytes_first_half = bytes_list[:8]; bytes_first_half.reverse()
bytes_second_half = bytes_list[8:]; bytes_second_half.reverse()
_BYTES = "".join(bytes_first_half + bytes_second_half)
# NOTE(kgibbs): End changes.
nan, inf = struct.unpack('dd', _BYTES)
return nan, inf, -inf
NaN, PosInf, NegInf = _floatconstants()
def linecol(doc, pos):
lineno = doc.count('\n', 0, pos) + 1
if lineno == 1:
colno = pos
else:
colno = pos - doc.rindex('\n', 0, pos)
return lineno, colno
def errmsg(msg, doc, pos, end=None):
lineno, colno = linecol(doc, pos)
if end is None:
return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
endlineno, endcolno = linecol(doc, end)
return '%s: line %d column %d - line %d column %d (char %d - %d)' % (
msg, lineno, colno, endlineno, endcolno, pos, end)
_CONSTANTS = {
'-Infinity': NegInf,
'Infinity': PosInf,
'NaN': NaN,
'true': True,
'false': False,
'null': None,
}
def JSONConstant(match, context, c=_CONSTANTS):
return c[match.group(0)], None
pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant)
def JSONNumber(match, context):
match = JSONNumber.regex.match(match.string, *match.span())
integer, frac, exp = match.groups()
if frac or exp:
res = float(integer + (frac or '') + (exp or ''))
else:
res = int(integer)
return res, None
pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)
STRINGCHUNK = re.compile(r'(.*?)(["\\])', FLAGS)
BACKSLASH = {
'"': u'"', '\\': u'\\', '/': u'/',
'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
}
DEFAULT_ENCODING = "utf-8"
def scanstring(s, end, encoding=None, _b=BACKSLASH, _m=STRINGCHUNK.match):
if encoding is None:
encoding = DEFAULT_ENCODING
chunks = []
_append = chunks.append
begin = end - 1
while 1:
chunk = _m(s, end)
if chunk is None:
raise ValueError(
errmsg("Unterminated string starting at", s, begin))
end = chunk.end()
content, terminator = chunk.groups()
if content:
if not isinstance(content, unicode):
content = unicode(content, encoding)
_append(content)
if terminator == '"':
break
try:
esc = s[end]
except IndexError:
raise ValueError(
errmsg("Unterminated string starting at", s, begin))
if esc != 'u':
try:
m = _b[esc]
except KeyError:
raise ValueError(
errmsg("Invalid \\escape: %r" % (esc,), s, end))
end += 1
else:
esc = s[end + 1:end + 5]
try:
m = unichr(int(esc, 16))
if len(esc) != 4 or not esc.isalnum():
raise ValueError
except ValueError:
raise ValueError(errmsg("Invalid \\uXXXX escape", s, end))
end += 5
_append(m)
return u''.join(chunks), end
def JSONString(match, context):
encoding = getattr(context, 'encoding', None)
return scanstring(match.string, match.end(), encoding)
pattern(r'"')(JSONString)
WHITESPACE = re.compile(r'\s*', FLAGS)
def JSONObject(match, context, _w=WHITESPACE.match):
pairs = {}
s = match.string
end = _w(s, match.end()).end()
nextchar = s[end:end + 1]
# trivial empty object
if nextchar == '}':
return pairs, end + 1
if nextchar != '"':
raise ValueError(errmsg("Expecting property name", s, end))
end += 1
encoding = getattr(context, 'encoding', None)
while True:
key, end = scanstring(s, end, encoding)
end = _w(s, end).end()
if s[end:end + 1] != ':':
raise ValueError(errmsg("Expecting : delimiter", s, end))
end = _w(s, end + 1).end()
try:
value, end = JSONScanner.iterscan(s, idx=end).next()
except StopIteration:
raise ValueError(errmsg("Expecting object", s, end))
pairs[key] = value
end = _w(s, end).end()
nextchar = s[end:end + 1]
end += 1
if nextchar == '}':
break
if nextchar != ',':
raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
end = _w(s, end).end()
nextchar = s[end:end + 1]
end += 1
if nextchar != '"':
raise ValueError(errmsg("Expecting property name", s, end - 1))
object_hook = getattr(context, 'object_hook', None)
if object_hook is not None:
pairs = object_hook(pairs)
return pairs, end
pattern(r'{')(JSONObject)
def JSONArray(match, context, _w=WHITESPACE.match):
values = []
s = match.string
end = _w(s, match.end()).end()
# look-ahead for trivial empty array
nextchar = s[end:end + 1]
if nextchar == ']':
return values, end + 1
while True:
try:
value, end = JSONScanner.iterscan(s, idx=end).next()
except StopIteration:
raise ValueError(errmsg("Expecting object", s, end))
values.append(value)
end = _w(s, end).end()
nextchar = s[end:end + 1]
end += 1
if nextchar == ']':
break
if nextchar != ',':
raise ValueError(errmsg("Expecting , delimiter", s, end))
end = _w(s, end).end()
return values, end
pattern(r'\[')(JSONArray)
ANYTHING = [
JSONObject,
JSONArray,
JSONString,
JSONConstant,
JSONNumber,
]
JSONScanner = Scanner(ANYTHING)
class JSONDecoder(object):
"""
Simple JSON <http://json.org> decoder
Performs the following translations in decoding:
+---------------+-------------------+
| JSON | Python |
+===============+===================+
| object | dict |
+---------------+-------------------+
| array | list |
+---------------+-------------------+
| string | unicode |
+---------------+-------------------+
| number (int) | int, long |
+---------------+-------------------+
| number (real) | float |
+---------------+-------------------+
| true | True |
+---------------+-------------------+
| false | False |
+---------------+-------------------+
| null | None |
+---------------+-------------------+
It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
their corresponding ``float`` values, which is outside the JSON spec.
"""
_scanner = Scanner(ANYTHING)
__all__ = ['__init__', 'decode', 'raw_decode']
def __init__(self, encoding=None, object_hook=None):
"""
``encoding`` determines the encoding used to interpret any ``str``
objects decoded by this instance (utf-8 by default). It has no
effect when decoding ``unicode`` objects.
Note that currently only encodings that are a superset of ASCII work,
strings of other encodings should be passed in as ``unicode``.
``object_hook``, if specified, will be called with the result
of every JSON object decoded and its return value will be used in
place of the given ``dict``. This can be used to provide custom
deserializations (e.g. to support JSON-RPC class hinting).
"""
self.encoding = encoding
self.object_hook = object_hook
def decode(self, s, _w=WHITESPACE.match):
"""
Return the Python representation of ``s`` (a ``str`` or ``unicode``
instance containing a JSON document)
"""
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
end = _w(s, end).end()
if end != len(s):
raise ValueError(errmsg("Extra data", s, end, len(s)))
return obj
def raw_decode(self, s, **kw):
"""
Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
with a JSON document) and return a 2-tuple of the Python
representation and the index in ``s`` where the document ended.
This can be used to decode a JSON document from a string that may
have extraneous data at the end.
"""
kw.setdefault('context', self)
try:
obj, end = self._scanner.iterscan(s, **kw).next()
except StopIteration:
raise ValueError("No JSON object could be decoded")
return obj, end
__all__ = ['JSONDecoder']