Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generalize benchmarks #532

Closed
wants to merge 25 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
c9df712
Fix handling of surrogates on encoding
JustAnotherArchivist Apr 17, 2022
92b044d
Allow str and None values for indent
Erotemic Apr 4, 2022
5c05078
Use older PyObject_call API
Erotemic Apr 5, 2022
c71d9ab
Allow custom indent
Erotemic Apr 5, 2022
d82654b
Debug code
Erotemic Apr 11, 2022
4c68a0b
Differentiate integer vs explicit indent
Erotemic Apr 18, 2022
993b262
remove printf
Erotemic Apr 18, 2022
4ae8a0b
Use PyUnicode_AsEncodedString
Erotemic Apr 18, 2022
70e9085
Use PyUnicode_AsEncodedString
Erotemic Apr 18, 2022
6861525
Enable all agree checks
Erotemic Apr 18, 2022
183d863
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 18, 2022
fa84e27
remove flake8 long lines
Erotemic Apr 18, 2022
84ca3d1
Merge branch 'nonint_indent' of github.com:Erotemic/ultrajson into no…
Erotemic Apr 18, 2022
5853a4f
remove compat tests
Erotemic Apr 18, 2022
ed8de3c
Fix negative allocation
Erotemic Apr 18, 2022
520f2ec
remove non portable min/max
Erotemic Apr 18, 2022
1e0885d
Remove max in favor of indentEnabled
Erotemic Apr 20, 2022
2cc3544
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 20, 2022
75895fc
Fix -1 length issue and revert extra spaces in tests
Erotemic Apr 20, 2022
35bb31a
Generalize the way new json modules can be added to existing benchmarks
Erotemic Apr 20, 2022
cbd3019
Proof of concept for graphical benchmarks
Erotemic Apr 20, 2022
9864596
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 20, 2022
85260e6
Cleanup openskill code
Erotemic Apr 21, 2022
75cd254
Fix merge issue
Erotemic Apr 21, 2022
1ccacdd
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 21, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions lib/ultrajson.h
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,10 @@ typedef struct __JSONObjectEncoder

/*
Configuration for spaces of indent */
int indent;
int indentLength;
const char* indentChars;
int indentIsSpace; // encodes if the indent is given in indentChars or if it should just be pure spaces
int indentEnabled; // the user can request an indent of length 0. This encodes if the indent is enabled or not.

/*
If true, NaN will be encoded as a string matching the Python standard library's JSON behavior.
Expand Down Expand Up @@ -300,18 +303,21 @@ obj - An anonymous type representing the object
enc - Function definitions for querying JSOBJ type
buffer - Preallocated buffer to store result in. If NULL function allocates own buffer
cbBuffer - Length of buffer (ignored if buffer is NULL)
outLen - Will store the length of the encoded string

Returns:
Encoded JSON object as a null terminated char string.
Encoded JSON object as a char string.

NOTE:
If the supplied buffer wasn't enough to hold the result the function will allocate a new buffer.
Life cycle of the provided buffer must still be handled by caller.

If the return value doesn't equal the specified buffer caller must release the memory using
JSONObjectEncoder.free or free() as specified when calling this function.

If an error occurs during encoding, NULL is returned and no outLen is stored.
*/
EXPORTFUNCTION char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *buffer, size_t cbBuffer);
EXPORTFUNCTION char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *buffer, size_t cbBuffer, size_t *outLen);

typedef struct __JSONObjectDecoder
{
Expand Down
38 changes: 25 additions & 13 deletions lib/ultrajsonenc.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ Numeric decoder derived from from TCL library
#define snprintf sprintf_s
#endif



/*
Worst cases being:

Expand Down Expand Up @@ -544,16 +546,27 @@ static FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char* begin, ch

static void Buffer_AppendIndentNewlineUnchecked(JSONObjectEncoder *enc)
{
if (enc->indent > 0) Buffer_AppendCharUnchecked(enc, '\n');
if (enc->indentEnabled) Buffer_AppendCharUnchecked(enc, '\n');
}

static void Buffer_AppendIndentUnchecked(JSONObjectEncoder *enc, JSINT32 value)
{
int i;
if (enc->indent > 0)
while (value-- > 0)
for (i = 0; i < enc->indent; i++)
Buffer_AppendCharUnchecked(enc, ' ');
if (enc->indentEnabled)
{
if (enc->indentIsSpace == 1)
{
while (value-- > 0)
for (i = 0; i < enc->indentLength; i++)
Buffer_AppendCharUnchecked(enc, ' ');
}
else
{
while (value-- > 0)
for (i = 0; i < enc->indentLength; i++)
Buffer_AppendCharUnchecked(enc, enc->indentChars[i]);
}
}
}

static void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value)
Expand Down Expand Up @@ -655,7 +668,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c
Buffer_AppendCharUnchecked(enc, '\"');

Buffer_AppendCharUnchecked (enc, ':');
if (enc->indent)
if (enc->indentEnabled)
{
Buffer_AppendCharUnchecked (enc, ' ');
}
Expand Down Expand Up @@ -698,7 +711,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c
while (enc->iterNext(obj, &tc))
{
// The extra 2 bytes cover the comma and (optional) newline.
Buffer_Reserve (enc, enc->indent * (enc->level + 1) + 2);
Buffer_Reserve (enc, enc->indentLength * (enc->level + 1) + 2);

if (count > 0)
{
Expand All @@ -725,7 +738,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c

if (count > 0) {
// Reserve space for the indentation plus the newline.
Buffer_Reserve (enc, enc->indent * enc->level + 1);
Buffer_Reserve (enc, enc->indentLength * enc->level + 1);
Buffer_AppendIndentNewlineUnchecked (enc);
Buffer_AppendIndentUnchecked (enc, enc->level);
}
Expand All @@ -743,7 +756,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c
while ((res = enc->iterNext(obj, &tc)))
{
// The extra 2 bytes cover the comma and optional newline.
Buffer_Reserve (enc, enc->indent * (enc->level + 1) + 2);
Buffer_Reserve (enc, enc->indentLength * (enc->level + 1) + 2);

if(res < 0)
{
Expand Down Expand Up @@ -778,7 +791,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c
enc->iterEnd(obj, &tc);

if (count > 0) {
Buffer_Reserve (enc, enc->indent * enc->level + 1);
Buffer_Reserve (enc, enc->indentLength * enc->level + 1);
Buffer_AppendIndentNewlineUnchecked (enc);
Buffer_AppendIndentUnchecked (enc, enc->level);
}
Expand Down Expand Up @@ -905,7 +918,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c
enc->level--;
}

char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, size_t _cbBuffer)
char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, size_t _cbBuffer, size_t *_outLen)
{
enc->malloc = enc->malloc ? enc->malloc : malloc;
enc->free = enc->free ? enc->free : free;
Expand Down Expand Up @@ -941,12 +954,11 @@ char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, size_t

encode (obj, enc, NULL, 0);

Buffer_Reserve(enc, 1);
if (enc->errorMsg)
{
return NULL;
}
Buffer_AppendCharUnchecked(enc, '\0');

*_outLen = enc->offset - enc->start;
return enc->start;
}
115 changes: 98 additions & 17 deletions python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,29 +114,39 @@ static void *PyStringToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, siz
return PyBytes_AsString(obj);
}

static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
static char *PyUnicodeToUTF8Raw(JSOBJ _obj, size_t *_outLen, PyObject *bytesObj)
{
/*
Converts the PyUnicode object to char* whose size is stored in _outLen.
This conversion may require the creation of an intermediate PyBytes object.
In that case, the returned char* is in fact the internal buffer of that PyBytes object,
and when the char* buffer is no longer needed, the bytesObj must be DECREF'd.
*/
PyObject *obj = (PyObject *) _obj;
PyObject *newObj;

#ifndef Py_LIMITED_API
if (PyUnicode_IS_COMPACT_ASCII(obj))
{
Py_ssize_t len;
char *data = PyUnicode_AsUTF8AndSize(obj, &len);
const char *data = PyUnicode_AsUTF8AndSize(obj, &len);
*_outLen = len;
return data;
}
#endif
newObj = PyUnicode_AsUTF8String(obj);
if(!newObj)

bytesObj = PyUnicode_AsEncodedString (obj, "utf-8", "surrogatepass");
if (!bytesObj)
{
return NULL;
}

GET_TC(tc)->newObj = newObj;
*_outLen = PyBytes_Size(bytesObj);
return PyBytes_AsString(bytesObj);
}

*_outLen = PyBytes_Size(newObj);
return PyBytes_AsString(newObj);
static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
{
return PyUnicodeToUTF8Raw(_obj, _outLen, GET_TC(tc)->newObj);
}

static void *PyRawJSONToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
Expand Down Expand Up @@ -240,7 +250,7 @@ static int Dict_iterNext(JSOBJ obj, JSONTypeContext *tc)
if (PyUnicode_Check(GET_TC(tc)->itemName))
{
itemNameTmp = GET_TC(tc)->itemName;
GET_TC(tc)->itemName = PyUnicode_AsUTF8String (GET_TC(tc)->itemName);
GET_TC(tc)->itemName = PyUnicode_AsEncodedString (GET_TC(tc)->itemName, "utf-8", "surrogatepass");
Py_DECREF(itemNameTmp);
}
else
Expand All @@ -263,7 +273,7 @@ static int Dict_iterNext(JSOBJ obj, JSONTypeContext *tc)
return -1;
}
itemNameTmp = GET_TC(tc)->itemName;
GET_TC(tc)->itemName = PyUnicode_AsUTF8String (GET_TC(tc)->itemName);
GET_TC(tc)->itemName = PyUnicode_AsEncodedString (GET_TC(tc)->itemName, "utf-8", "surrogatepass");
Py_DECREF(itemNameTmp);
}
PRINTMARK();
Expand Down Expand Up @@ -332,7 +342,7 @@ static int SortedDict_iterNext(JSOBJ obj, JSONTypeContext *tc)
// Subject the key to the same type restrictions and conversions as in Dict_iterGetValue.
if (PyUnicode_Check(key))
{
key = PyUnicode_AsUTF8String(key);
key = PyUnicode_AsEncodedString(key, "utf-8", "surrogatepass");
}
else if (!PyBytes_Check(key))
{
Expand All @@ -342,7 +352,7 @@ static int SortedDict_iterNext(JSOBJ obj, JSONTypeContext *tc)
goto error;
}
keyTmp = key;
key = PyUnicode_AsUTF8String(key);
key = PyUnicode_AsEncodedString(key, "utf-8", "surrogatepass");
Py_DECREF(keyTmp);
}
else
Expand Down Expand Up @@ -754,6 +764,32 @@ static char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
return GET_TC(tc)->iterGetName(obj, tc, outLen);
}


static const char *_PyUnicodeToChars(PyObject *obj, int *_outLen)
{
// helper for indent only
// an error occurs when the return is NULL and _outLen is 0
PyObject *newObj;
/*#ifndef Py_LIMITED_API*/
if (PyUnicode_IS_COMPACT_ASCII(obj))
{
Py_ssize_t len = 0;
const char *data = PyUnicode_AsUTF8AndSize(obj, &len);
*_outLen = (int) len;
return data;
}
/*#endif*/
newObj = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
if(!newObj)
{
*_outLen = 0;
return NULL;
}

*_outLen = PyBytes_Size(newObj);
return PyBytes_AsString(newObj);
}

PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
{
static char *kwlist[] = { "obj", "ensure_ascii", "encode_html_chars", "escape_forward_slashes", "sort_keys", "indent", "allow_nan", "reject_bytes", "default", NULL };
Expand All @@ -768,8 +804,10 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
PyObject *oescapeForwardSlashes = NULL;
PyObject *osortKeys = NULL;
PyObject *odefaultFn = NULL;
PyObject *oindent = NULL;
int allowNan = -1;
int orejectBytes = -1;
size_t retLen;

JSONObjectEncoder encoder =
{
Expand All @@ -792,7 +830,10 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
0, //encodeHTMLChars
1, //escapeForwardSlashes
0, //sortKeys
0, //indent
0, //indentLength
NULL, //indentChars
0, // indentIsSpace
0, // indentEnabled
1, //allowNan
1, //rejectBytes
NULL, //prv
Expand All @@ -801,7 +842,7 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)

PRINTMARK();

if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOOOiiiO", kwlist, &oinput, &oensureAscii, &oencodeHTMLChars, &oescapeForwardSlashes, &osortKeys, &encoder.indent, &allowNan, &orejectBytes, &odefaultFn))
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOOOOiiO", kwlist, &oinput, &oensureAscii, &oencodeHTMLChars, &oescapeForwardSlashes, &osortKeys, &oindent, &allowNan, &orejectBytes, &odefaultFn))
{
return NULL;
}
Expand All @@ -826,6 +867,47 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
encoder.sortKeys = 1;
}

if (oindent != NULL)
{
// Handle multiple input types
if (oindent == Py_None)
{
// Case where the indent is specified as None
// This should be exactly the same as if oindent is NULL
encoder.indentLength = 0;
}
else if (PyLong_Check(oindent))
{
// Case where the indent is specified as an integer
// In this case the indent characters should only be
// space chars - i.e. chr(32)
encoder.indentLength = PyLong_AsLong(oindent);
encoder.indentIsSpace = 1;
encoder.indentEnabled = 1;
if (encoder.indentLength < 0)
{
encoder.indentLength = 0;
}
}
else if (PyUnicode_Check(oindent))
{
// Case where custom UTF-8 indent is specified.
encoder.indentLength = -1; // set to -1 to indicate an error
encoder.indentChars = _PyUnicodeToChars(oindent, &encoder.indentLength);
encoder.indentEnabled = 1;
if(encoder.indentChars == NULL && encoder.indentLength == -1)
{
PyErr_SetString(PyExc_ValueError, "malformed indent");
return NULL;
}
}
else
{
PyErr_Format (PyExc_TypeError, "expected integer, None, or str indent");
return NULL;
}
}

if (allowNan != -1)
{
encoder.allowNan = allowNan;
Expand Down Expand Up @@ -853,7 +935,7 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
csInf, csNan, 'e', DCONV_DECIMAL_IN_SHORTEST_LOW, DCONV_DECIMAL_IN_SHORTEST_HIGH, 0, 0);

PRINTMARK();
ret = JSON_EncodeObject (oinput, &encoder, buffer, sizeof (buffer));
ret = JSON_EncodeObject (oinput, &encoder, buffer, sizeof (buffer), &retLen);
PRINTMARK();

dconv_d2s_free(&encoder.d2s);
Expand All @@ -874,15 +956,14 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
return NULL;
}

newobj = PyUnicode_FromString (ret);
newobj = PyUnicode_DecodeUTF8(ret, retLen, "surrogatepass");

if (ret != buffer)
{
encoder.free (ret);
}

PRINTMARK();

return newobj;
}

Expand Down
Loading