Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

new reject_bytes option to raise on bytes #266

Merged
merged 10 commits into from
May 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions lib/ultrajson.h
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,10 @@ typedef struct __JSONObjectEncoder
This is not valid JSON. */
int allowNan;

/*
If true, bytes are rejected. */
int rejectBytes;

/*
Private pointer to be used by the caller. Passed as encoder_prv in JSONTypeContext */
void *prv;
Expand Down
25 changes: 20 additions & 5 deletions python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -525,11 +525,19 @@ static void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc, JSONObject
return;
}
else
if (PyBytes_Check(obj))
if (UNLIKELY(PyBytes_Check(obj)))
{
PRINTMARK();
pc->PyTypeToJSON = PyStringToUTF8; tc->type = JT_UTF8;
return;
if (enc->rejectBytes)
{
PyErr_Format (PyExc_TypeError, "reject_bytes is on and '%s' is bytes", PyBytes_AS_STRING(obj));
goto INVALID;
}
else
{
pc->PyTypeToJSON = PyStringToUTF8; tc->type = JT_UTF8;
return;
}
}
else
if (PyUnicode_Check(obj))
Expand Down Expand Up @@ -739,7 +747,7 @@ static char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)

PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
{
static char *kwlist[] = { "obj", "ensure_ascii", "encode_html_chars", "escape_forward_slashes", "sort_keys", "indent", "allow_nan", NULL };
static char *kwlist[] = { "obj", "ensure_ascii", "encode_html_chars", "escape_forward_slashes", "sort_keys", "indent", "allow_nan", "reject_bytes", NULL };

char buffer[65536];
char *ret;
Expand All @@ -751,6 +759,7 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
PyObject *oescapeForwardSlashes = NULL;
PyObject *osortKeys = NULL;
int allowNan = -1;
int orejectBytes = -1;

JSONObjectEncoder encoder =
{
Expand All @@ -776,13 +785,14 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
0, //sortKeys
0, //indent
1, //allowNan
1, //rejectBytes
NULL, //prv
};


PRINTMARK();

if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOOOpi", kwlist, &oinput, &oensureAscii, &oencodeHTMLChars, &oescapeForwardSlashes, &osortKeys, &encoder.indent, &allowNan))
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOOOpii", kwlist, &oinput, &oensureAscii, &oencodeHTMLChars, &oescapeForwardSlashes, &osortKeys, &encoder.indent, &allowNan, &orejectBytes))
{
return NULL;
}
Expand Down Expand Up @@ -818,6 +828,11 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
csNan = "NaN";
}

if (orejectBytes != -1)
{
encoder.rejectBytes = orejectBytes;
}


dconv_d2s_init(DCONV_D2S_EMIT_TRAILING_DECIMAL_POINT | DCONV_D2S_EMIT_TRAILING_ZERO_AFTER_POINT,
csInf, csNan, 'e', DCONV_DECIMAL_IN_SHORTEST_LOW, DCONV_DECIMAL_IN_SHORTEST_HIGH, 0, 0);
Expand Down
3 changes: 2 additions & 1 deletion python/ujson.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ PyObject* JSONFileToObj(PyObject* self, PyObject *args, PyObject *kwargs);
#define ENCODER_HELP_TEXT "Use ensure_ascii=false to output UTF-8. " \
"Set encode_html_chars=True to encode < > & as unicode escape sequences. "\
"Set escape_forward_slashes=False to prevent escaping / characters." \
"Set allow_nan=False to raise an exception when NaN or Inf would be serialized."
"Set allow_nan=False to raise an exception when NaN or Inf would be serialized." \
"Set reject_bytes=True to raise TypeError on bytes."

static PyMethodDef ujsonMethods[] = {
{"encode", (PyCFunction) objToJSON, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursively into JSON. " ENCODER_HELP_TEXT},
Expand Down
21 changes: 19 additions & 2 deletions tests/test_ujson.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ def test_decode_dict():
def test_encode_unicode_4_bytes_utf8_fail():
test_input = b"\xfd\xbf\xbf\xbf\xbf\xbf"
with pytest.raises(OverflowError):
ujson.encode(test_input)
ujson.encode(test_input, reject_bytes=False)


def test_encode_null_character():
Expand Down Expand Up @@ -380,7 +380,7 @@ def test_encode_big_escape():
for x in range(10):
base = "\u00e5".encode()
test_input = base * 1024 * 1024 * 2
ujson.encode(test_input)
ujson.encode(test_input, reject_bytes=False)


def test_decode_big_escape():
Expand Down Expand Up @@ -757,6 +757,23 @@ def test_loads(test_input, expected):
assert ujson.loads(test_input) == expected


def test_reject_bytes_default():
data = {"a": b"b"}
with pytest.raises(TypeError):
ujson.dumps(data)


def test_reject_bytes_true():
data = {"a": b"b"}
with pytest.raises(TypeError):
ujson.dumps(data, reject_bytes=True)


def test_reject_bytes_false():
data = {"a": b"b"}
assert ujson.dumps(data, reject_bytes=False) == '{"a":"b"}'


"""
def test_decode_numeric_int_frc_overflow():
input = "X.Y"
Expand Down