Skip to content

Commit 5216c96

Browse files
committed
Bug#35640263: Refactoring of json_diff/json_binary to enable HW
support for partial json [2/6, remove parameter, noclose] Json_path::parse_path() takes an unnecessary error handler argument. It takes the argument because it uses Json_dom::parse_dom() internally, and parse_dom() takes an error handler because it may raise an error if the JSON document is too deep. However, parse_path() uses parse_json() only for parsing member names, which are simple string scalars, so it does not need to handle nested JSON documents. This patch makes parse_path() invoke RapidJSON directly, with a handler that rejects everything except string scalars, and removes the "depth_handler" parameter in the signature of parse_path(). Change-Id: I0cf61cdcd0cf57b38d19f23bfdc1ecf1eeeec2bf
1 parent bf5d943 commit 5216c96

File tree

8 files changed

+119
-83
lines changed

8 files changed

+119
-83
lines changed

client/json_client_library_main.cc

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,22 @@
2525
Json_dom::seek, Json_dom::parse, json_binary::parse_binary,
2626
json_binary::serialize functions are visible and can be called.
2727
*/
28+
29+
#include <cassert>
2830
#include <cstring>
2931
#include <iostream>
32+
#include <memory>
33+
#include <new>
3034
#include <string>
35+
#include <string_view>
3136

37+
#include "field_types.h"
38+
#include "mysql/components/services/bits/psi_bits.h"
39+
#include "mysql_time.h"
3240
#include "sql-common/json_binary.h"
3341
#include "sql-common/json_dom.h"
3442
#include "sql-common/json_path.h"
35-
36-
#include "sql-common/json_error_handler.h"
43+
#include "sql-common/my_decimal.h"
3744
#include "sql_string.h"
3845

3946
void CoutDefaultDepthHandler() { std::cout << "Doc too deep"; }
@@ -55,8 +62,7 @@ int main() {
5562
const char *json_path = R"($**."512")";
5663
Json_path path(PSI_NOT_INSTRUMENTED);
5764
size_t bad_index;
58-
parse_path(std::strlen(json_path), json_path, &path, &bad_index,
59-
[] { assert(false); });
65+
parse_path(std::strlen(json_path), json_path, &path, &bad_index);
6066

6167
Json_wrapper wr(&o);
6268
wr.set_alias();

sql-common/json_diff.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ bool Json_diff_vector::read_binary(const char **from, const TABLE *table,
346346
size_t bad_index;
347347
DBUG_PRINT("info", ("path='%.*s'", (int)path_length, p));
348348
if (parse_path(path_length, pointer_cast<const char *>(p), &path,
349-
&bad_index, JsonDepthErrorHandler))
349+
&bad_index))
350350
goto corrupted;
351351
p += path_length;
352352
length -= path_length;

sql-common/json_path.cc

Lines changed: 58 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,15 @@
3333
#include "my_rapidjson_size_t.h" // IWYU pragma: keep
3434

3535
#include <assert.h>
36-
#include <rapidjson/encodings.h>
37-
#include <rapidjson/memorystream.h> // rapidjson::MemoryStream
3836
#include <stddef.h>
3937
#include <algorithm> // any_of
40-
#include <memory> // unique_ptr
4138
#include <string>
39+
#include <string_view>
40+
41+
#include <rapidjson/encodings.h>
42+
#include <rapidjson/error/error.h>
43+
#include <rapidjson/memorystream.h>
44+
#include <rapidjson/reader.h>
4245

4346
#include "my_inttypes.h"
4447
#include "mysql/strings/m_ctype.h"
@@ -47,7 +50,6 @@
4750
#include "sql/sql_const.h" // STRING_BUFFER_USUAL_SIZE
4851
#include "sql_string.h" // String
4952
#include "string_with_len.h"
50-
#include "template_utils.h" // down_cast
5153

5254
namespace {
5355

@@ -64,15 +66,15 @@ class Stream;
6466

6567
} // namespace
6668

67-
static bool is_ecmascript_identifier(const std::string &name);
69+
static bool is_ecmascript_identifier(const std::string_view &name);
6870
static bool is_digit(unsigned codepoint);
6971
static bool is_whitespace(char);
7072

71-
static bool parse_path(Stream *, Json_path *, const JsonErrorHandler &);
72-
static bool parse_path_leg(Stream *, Json_path *, const JsonErrorHandler &);
73+
static bool parse_path(Stream *, Json_path *);
74+
static bool parse_path_leg(Stream *, Json_path *);
7375
static bool parse_ellipsis_leg(Stream *, Json_path *);
7476
static bool parse_array_leg(Stream *, Json_path *);
75-
static bool parse_member_leg(Stream *, Json_path *, const JsonErrorHandler &);
77+
static bool parse_member_leg(Stream *, Json_path *);
7678

7779
static bool append_array_index(String *buf, size_t index, bool from_end) {
7880
if (!from_end) return buf->append_ulonglong(index);
@@ -253,10 +255,9 @@ class Stream {
253255

254256
/** Top level parsing factory method */
255257
bool parse_path(size_t path_length, const char *path_expression,
256-
Json_path *path, size_t *bad_index,
257-
const JsonErrorHandler &depth_handler) {
258+
Json_path *path, size_t *bad_index) {
258259
Stream stream(path_expression, path_length);
259-
if (parse_path(&stream, path, depth_handler)) {
260+
if (parse_path(&stream, path)) {
260261
*bad_index = stream.position() - path_expression;
261262
return true;
262263
}
@@ -275,13 +276,10 @@ static inline bool is_whitespace(char ch) {
275276
276277
@param[in,out] stream The stream to read the path expression from.
277278
@param[in,out] path The Json_path object to fill.
278-
@param[in] depth_handler Pointer to a function that should handle error
279-
occurred when depth is exceeded.
280279
281280
@return true on error, false on success
282281
*/
283-
static bool parse_path(Stream *stream, Json_path *path,
284-
const JsonErrorHandler &depth_handler) {
282+
static bool parse_path(Stream *stream, Json_path *path) {
285283
path->clear();
286284

287285
// the first non-whitespace character must be $
@@ -291,7 +289,7 @@ static bool parse_path(Stream *stream, Json_path *path,
291289
// now add the legs
292290
stream->skip_whitespace();
293291
while (!stream->exhausted()) {
294-
if (parse_path_leg(stream, path, depth_handler)) return true;
292+
if (parse_path_leg(stream, path)) return true;
295293
stream->skip_whitespace();
296294
}
297295

@@ -308,18 +306,15 @@ static bool parse_path(Stream *stream, Json_path *path,
308306
309307
@param[in,out] stream The stream to read the path expression from.
310308
@param[in,out] path The Json_path object to fill.
311-
@param[in] depth_handler Pointer to a function that should handle error
312-
occurred when depth is exceeded.
313309
314310
@return true on error, false on success
315311
*/
316-
static bool parse_path_leg(Stream *stream, Json_path *path,
317-
const JsonErrorHandler &depth_handler) {
312+
static bool parse_path_leg(Stream *stream, Json_path *path) {
318313
switch (stream->peek()) {
319314
case BEGIN_ARRAY:
320315
return parse_array_leg(stream, path);
321316
case BEGIN_MEMBER:
322-
return parse_member_leg(stream, path, depth_handler);
317+
return parse_member_leg(stream, path);
323318
case WILDCARD:
324319
return parse_ellipsis_leg(stream, path);
325320
default:
@@ -539,6 +534,28 @@ static const char *find_end_of_member_name(const char *start, const char *end) {
539534
return std::find_if(str, end, is_terminator);
540535
}
541536

537+
namespace {
538+
/// A RapidJSON handler which accepts a scalar string and nothing else.
539+
class MemberNameHandler
540+
: public rapidjson::BaseReaderHandler<rapidjson::UTF8<>,
541+
MemberNameHandler> {
542+
public:
543+
explicit MemberNameHandler(::String *name) : m_name(name) {}
544+
545+
bool String(const char *str, size_t length, bool) {
546+
return !m_name->copy(str, length, &my_charset_utf8mb4_bin);
547+
}
548+
549+
bool Default() {
550+
assert(false);
551+
return false;
552+
}
553+
554+
private:
555+
::String *m_name;
556+
};
557+
} // namespace
558+
542559
/**
543560
Parse a quoted member name using the rapidjson parser, so that we
544561
get the name without the enclosing quotes and with any escape
@@ -549,34 +566,25 @@ static const char *find_end_of_member_name(const char *start, const char *end) {
549566
550567
@param str the input string
551568
@param len the length of the input string
552-
@param depth_handler Pointer to a function that should handle error
553-
occurred when depth is exceeded.
554-
@return a Json_string that represents the member name, or NULL if
555-
the input string is not a valid name
569+
@param[out] name the member name
570+
@return false on success, true on error
556571
*/
557-
static std::unique_ptr<Json_string> parse_name_with_rapidjson(
558-
const char *str, size_t len, const JsonErrorHandler &depth_handler) {
559-
Json_dom_ptr dom = Json_dom::parse(
560-
str, len, [](const char *, size_t) {}, depth_handler);
561-
562-
if (dom == nullptr || dom->json_type() != enum_json_type::J_STRING)
563-
return nullptr;
564-
565-
return std::unique_ptr<Json_string>(down_cast<Json_string *>(dom.release()));
572+
bool parse_name_with_rapidjson(const char *str, size_t len, String *name) {
573+
MemberNameHandler handler(name);
574+
rapidjson::MemoryStream stream(str, len);
575+
rapidjson::Reader reader;
576+
return !reader.Parse<rapidjson::kParseDefaultFlags>(stream, handler);
566577
}
567578

568579
/**
569580
Parses a single member leg and appends it to a Json_path object.
570581
571582
@param[in,out] stream The stream to read the path expression from.
572583
@param[in,out] path The Json_path object to fill.
573-
@param[in] depth_handler Pointer to a function that should handle error
574-
occurred when depth is exceeded.
575584
576585
@return true on error, false on success
577586
*/
578-
static bool parse_member_leg(Stream *stream, Json_path *path,
579-
const JsonErrorHandler &depth_handler) {
587+
static bool parse_member_leg(Stream *stream, Json_path *path) {
580588
// advance past the .
581589
assert(stream->peek() == BEGIN_MEMBER);
582590
stream->skip(1);
@@ -596,15 +604,16 @@ static bool parse_member_leg(Stream *stream, Json_path *path,
596604
const bool was_quoted = (*key_start == DOUBLE_QUOTE);
597605
stream->skip(key_end - key_start);
598606

599-
std::unique_ptr<Json_string> jstr;
607+
StringBuffer<STRING_BUFFER_USUAL_SIZE> name;
600608

601609
if (was_quoted) {
602610
/*
603611
Send the quoted name through the parser to unquote and
604612
unescape it.
605613
*/
606-
jstr = parse_name_with_rapidjson(key_start, key_end - key_start,
607-
depth_handler);
614+
if (parse_name_with_rapidjson(key_start, key_end - key_start, &name)) {
615+
return true;
616+
}
608617
} else {
609618
/*
610619
An unquoted name may contain escape sequences. Wrap it in
@@ -616,17 +625,18 @@ static bool parse_member_leg(Stream *stream, Json_path *path,
616625
strbuff.append(key_start, key_end - key_start) ||
617626
strbuff.append(DOUBLE_QUOTE))
618627
return true; /* purecov: inspected */
619-
jstr = parse_name_with_rapidjson(strbuff.ptr(), strbuff.length(),
620-
depth_handler);
628+
if (parse_name_with_rapidjson(strbuff.ptr(), strbuff.length(), &name)) {
629+
return true;
630+
}
621631
}
622632

623-
if (jstr == nullptr) return true;
624-
625633
// unquoted names must be valid ECMAScript identifiers
626-
if (!was_quoted && !is_ecmascript_identifier(jstr->value())) return true;
634+
if (!was_quoted && !is_ecmascript_identifier({name.ptr(), name.length()})) {
635+
return true;
636+
}
627637

628638
// Looking good.
629-
if (path->append(Json_path_leg(jstr->value())))
639+
if (path->append(Json_path_leg(name.ptr(), name.length())))
630640
return true; /* purecov: inspected */
631641
}
632642

@@ -711,7 +721,7 @@ static bool is_connector_punctuation(unsigned codepoint) {
711721
712722
@return True if the name is a valid ECMAScript identifier. False otherwise.
713723
*/
714-
static bool is_ecmascript_identifier(const std::string &name) {
724+
static bool is_ecmascript_identifier(const std::string_view &name) {
715725
// An empty string is not a valid identifier.
716726
if (name.empty()) return false;
717727

sql-common/json_path.h

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,13 @@
3434
#include <assert.h>
3535
#include <stddef.h>
3636
#include <algorithm>
37-
#include <functional>
3837
#include <new>
3938
#include <string>
4039
#include <utility>
4140

4241
#include "my_alloc.h" // MEM_ROOT
43-
// assert
44-
#include "my_inttypes.h"
45-
#include "my_sys.h"
42+
#include "mysql/psi/psi_memory.h"
4643
#include "prealloced_array.h" // Prealloced_array
47-
#include "sql-common/json_error_handler.h"
4844

4945
class String;
5046

@@ -476,13 +472,10 @@ class Json_path_clone final : public Json_seekable_path {
476472
@param[in] path_expression The string form of the path expression.
477473
@param[out] path The Json_path object to be initialized.
478474
@param[out] bad_index If null is returned, the parsing failed around here.
479-
@param[in] depth_handler Pointer to a function that should handle error
480-
occurred when depth is exceeded.
481475
@return false on success, true on error
482476
*/
483477
bool parse_path(size_t path_length, const char *path_expression,
484-
Json_path *path, size_t *bad_index,
485-
const JsonErrorHandler &depth_handler);
478+
Json_path *path, size_t *bad_index);
486479

487480
/**
488481
A helper function that uses the above one as workhorse. Entry point for

sql/item_json_func.cc

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,22 +25,20 @@
2525
#include "sql/item_json_func.h"
2626

2727
#include <assert.h>
28-
#include <stdint.h>
29-
#include <string.h>
3028
#include <algorithm> // std::fill
31-
#include <cassert>
29+
#include <cstring>
3230
#include <limits>
3331
#include <memory>
3432
#include <new>
3533
#include <string>
3634
#include <utility>
35+
#include <vector>
3736

3837
#include "decimal.h"
3938
#include "field_types.h" // enum_field_types
4039
#include "lex_string.h"
41-
#include "m_string.h"
4240
#include "my_alloc.h"
43-
41+
#include "my_dbug.h"
4442
#include "my_sys.h"
4543
#include "mysql/mysql_lex_string.h"
4644
#include "mysql/strings/m_ctype.h"
@@ -53,11 +51,12 @@
5351
#include "sql-common/json_syntax_check.h"
5452
#include "sql-common/my_decimal.h"
5553
#include "sql/current_thd.h" // current_thd
54+
#include "sql/debug_sync.h"
5655
#include "sql/error_handler.h"
5756
#include "sql/field.h"
57+
#include "sql/field_common_properties.h"
5858
#include "sql/item_cmpfunc.h" // Item_func_like
5959
#include "sql/item_create.h"
60-
#include "sql/item_subselect.h"
6160
#include "sql/json_schema.h"
6261
#include "sql/parser_yystype.h"
6362
#include "sql/psi_memory_key.h" // key_memory_JSON
@@ -550,8 +549,7 @@ bool parse_path(const String &path_value, bool forbid_wildcards,
550549

551550
// OK, we have a string encoded in utf-8. Does it parse?
552551
size_t bad_idx = 0;
553-
if (parse_path(path_length, path_chars, json_path, &bad_idx,
554-
JsonDepthErrorHandler)) {
552+
if (parse_path(path_length, path_chars, json_path, &bad_idx)) {
555553
/*
556554
Issue an error message. The last argument is no longer used, but kept to
557555
avoid changing error message format.

sql/item_json_func.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,14 @@
2626
#include <assert.h>
2727
#include <sys/types.h>
2828

29-
#include <cstddef>
3029
#include <cstdint>
3130
#include <memory>
3231
#include <utility> // std::forward
3332

33+
#include "field_types.h"
34+
#include "my_alloc.h"
3435
#include "my_inttypes.h"
36+
#include "my_table_map.h"
3537
#include "my_time.h"
3638
#include "mysql/strings/m_ctype.h"
3739
#include "mysql/udf_registration_types.h"
@@ -53,7 +55,9 @@
5355

5456
class Json_schema_validator;
5557
class Json_array;
58+
class Json_diff_vector;
5659
class Json_dom;
60+
class Json_object;
5761
class Json_scalar_holder;
5862
class Json_wrapper;
5963
class PT_item_list;

0 commit comments

Comments
 (0)