Skip to content

Commit aa35800

Browse files
author
Nimita Joshi
committed
Merge branch 'mysql-8.4' into mysql-trunk
Change-Id: If097734470881a8b1f0fbaa74e4fee0ca2a31888
2 parents b19cf63 + 17e355b commit aa35800

File tree

13 files changed

+181
-46
lines changed

13 files changed

+181
-46
lines changed

mysql-test/suite/json/r/array_index.result

+60-1
Original file line numberDiff line numberDiff line change
@@ -3426,10 +3426,69 @@ test.t analyze status OK
34263426
EXPLAIN FORMAT = tree SELECT * FROM t WHERE doc_id >= 3 and doc_id < 4;
34273427
EXPLAIN
34283428
-> Filter: ((t.doc_id >= 3) and (t.doc_id < 4)) (rows=1)
3429-
-> Index range scan (Multi-Range Read) on t using mv_doc_id over (3 <= doc_id < 4) (rows=1)
3429+
-> Index range scan on t using mv_doc_id over (3 <= doc_id < 4) (rows=1)
34303430

34313431
SELECT * FROM t WHERE doc_id >= 3 and doc_id < 4;
34323432
id doc_id doc
34333433
6 3 {"val": [1]}
34343434
DROP TABLE t;
34353435
# restart:
3436+
#
3437+
# Bug#33334911 - Multi-valued index performance is too slow
3438+
# (Access too many rows than required)
3439+
#
3440+
CREATE TABLE t1 (
3441+
id INT NOT NULL,
3442+
f1 enum('T1','T2','T3') NOT NULL,
3443+
f2 INT NOT NULL,
3444+
json_value json DEFAULT (json_array()),
3445+
PRIMARY KEY (id),
3446+
KEY idx (f1 ,(CAST(json_extract(json_value,_utf8mb4'$') AS UNSIGNED ARRAY)), f2)
3447+
);
3448+
INSERT INTO t1 VALUES (1, 'T2', 15, '[1, 3, 5, 7]');
3449+
INSERT INTO t1 VALUES (2, 'T2', 25, '[10, 20, 30, 40]');
3450+
INSERT INTO t1 VALUES (3, 'T2', 35, '[15, 25, 35, 45]');
3451+
INSERT INTO t1 VALUES (4, 'T2', 45, '[100, 200, 300, 400]');
3452+
INSERT INTO t1 VALUES (5, 'T2', 55, '[120, 220, 320, 420]');
3453+
INSERT INTO t1 VALUES (6, 'T2', 65, '[140, 240, 340, 440]');
3454+
INSERT INTO t1 VALUES (7, 'T2', 9, '[100, 200, 300, 400]');
3455+
INSERT INTO t1 VALUES (8, 'T2', 9, '[100, 200, 300, 400]');
3456+
INSERT INTO t1 VALUES (9, 'T2', 9, '[100, 200, 300, 400]');
3457+
INSERT INTO t1 VALUES (10, 'T2', 9, '[100, 200, 300, 400]');
3458+
ANALYZE TABLE t1;
3459+
Table Op Msg_type Msg_text
3460+
test.t1 analyze status OK
3461+
EXPLAIN SELECT COUNT(*) FROM t1
3462+
WHERE f1 IN ('T2') AND JSON_OVERLAPS(json_value->'$', CAST('[100]' AS JSON)) AND f2>=10;
3463+
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
3464+
1 SIMPLE t1 NULL range idx idx 14 NULL 1 100.00 Using where
3465+
Warnings:
3466+
Note 1003 /* select#1 */ select count(0) AS `COUNT(*)` from `test`.`t1` where ((`test`.`t1`.`f1` = 'T2') and json_overlaps(cast(json_extract(`json_value`,_utf8mb4'$') as unsigned array),json'[100]') and (`test`.`t1`.`f2` >= 10))
3467+
FLUSH STATUS;
3468+
SELECT COUNT(*) FROM t1
3469+
WHERE f1 IN ('T2') AND JSON_OVERLAPS(json_value->'$', CAST('[100]' AS JSON)) AND f2>=10;
3470+
COUNT(*)
3471+
1
3472+
SHOW STATUS LIKE 'Handler_read%';
3473+
Variable_name Value
3474+
Handler_read_first 0
3475+
Handler_read_key 1
3476+
Handler_read_last 0
3477+
Handler_read_next 1
3478+
Handler_read_prev 0
3479+
Handler_read_rnd 0
3480+
Handler_read_rnd_next 0
3481+
SET @save_opt=@@optimizer_switch;
3482+
SET @@optimizer_switch="mrr_cost_based=off";
3483+
EXPLAIN SELECT COUNT(*) FROM t1
3484+
WHERE f1 IN ('T2') AND JSON_OVERLAPS(json_value->'$', CAST('[100]' AS JSON)) AND f2>=10;
3485+
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
3486+
1 SIMPLE t1 NULL range idx idx 14 NULL 1 100.00 Using where
3487+
Warnings:
3488+
Note 1003 /* select#1 */ select count(0) AS `COUNT(*)` from `test`.`t1` where ((`test`.`t1`.`f1` = 'T2') and json_overlaps(cast(json_extract(`json_value`,_utf8mb4'$') as unsigned array),json'[100]') and (`test`.`t1`.`f2` >= 10))
3489+
SELECT COUNT(*) FROM t1
3490+
WHERE f1 IN ('T2') AND JSON_OVERLAPS(json_value->'$', CAST('[100]' AS JSON)) AND f2>=10;
3491+
COUNT(*)
3492+
1
3493+
SET @@optimizer_switch=@save_opt;
3494+
DROP TABLE t1;

mysql-test/suite/json/t/array_index.test

+45
Original file line numberDiff line numberDiff line change
@@ -2033,3 +2033,48 @@ DROP TABLE t;
20332033
# at the very end of the test.
20342034
--let $restart_parameters= restart:
20352035
--source include/restart_mysqld.inc
2036+
2037+
--echo #
2038+
--echo # Bug#33334911 - Multi-valued index performance is too slow
2039+
--echo # (Access too many rows than required)
2040+
--echo #
2041+
2042+
CREATE TABLE t1 (
2043+
id INT NOT NULL,
2044+
f1 enum('T1','T2','T3') NOT NULL,
2045+
f2 INT NOT NULL,
2046+
json_value json DEFAULT (json_array()),
2047+
PRIMARY KEY (id),
2048+
KEY idx (f1 ,(CAST(json_extract(json_value,_utf8mb4'$') AS UNSIGNED ARRAY)), f2)
2049+
);
2050+
2051+
INSERT INTO t1 VALUES (1, 'T2', 15, '[1, 3, 5, 7]');
2052+
INSERT INTO t1 VALUES (2, 'T2', 25, '[10, 20, 30, 40]');
2053+
INSERT INTO t1 VALUES (3, 'T2', 35, '[15, 25, 35, 45]');
2054+
INSERT INTO t1 VALUES (4, 'T2', 45, '[100, 200, 300, 400]');
2055+
INSERT INTO t1 VALUES (5, 'T2', 55, '[120, 220, 320, 420]');
2056+
INSERT INTO t1 VALUES (6, 'T2', 65, '[140, 240, 340, 440]');
2057+
INSERT INTO t1 VALUES (7, 'T2', 9, '[100, 200, 300, 400]');
2058+
INSERT INTO t1 VALUES (8, 'T2', 9, '[100, 200, 300, 400]');
2059+
INSERT INTO t1 VALUES (9, 'T2', 9, '[100, 200, 300, 400]');
2060+
INSERT INTO t1 VALUES (10, 'T2', 9, '[100, 200, 300, 400]');
2061+
2062+
ANALYZE TABLE t1;
2063+
2064+
let $query = SELECT COUNT(*) FROM t1
2065+
WHERE f1 IN ('T2') AND JSON_OVERLAPS(json_value->'$', CAST('[100]' AS JSON)) AND f2>=10;
2066+
2067+
--eval EXPLAIN $query
2068+
2069+
FLUSH STATUS;
2070+
--eval $query
2071+
SHOW STATUS LIKE 'Handler_read%';
2072+
2073+
SET @save_opt=@@optimizer_switch;
2074+
SET @@optimizer_switch="mrr_cost_based=off";
2075+
2076+
--eval EXPLAIN $query
2077+
--eval $query
2078+
2079+
SET @@optimizer_switch=@save_opt;
2080+
DROP TABLE t1;

sql/field.cc

+3-3
Original file line numberDiff line numberDiff line change
@@ -10311,12 +10311,12 @@ int Field_typed_array::key_cmp(const uchar *key_ptr, uint key_length) const {
1031110311
m_conv_item->field->set_key_image(key_ptr, key_length);
1031210312
if (sql_scalar_to_json(m_conv_item, "<Field_typed_array::key_cmp>", &value,
1031310313
&tmp, &key, nullptr, true)) {
10314-
return -1;
10314+
return 1;
1031510315
}
1031610316

1031710317
// Compare colum value with the key.
1031810318
if (val_json(&col_val)) {
10319-
return -1;
10319+
return 1;
1032010320
}
1032110321
assert(col_val.type() == enum_json_type::J_ARRAY);
1032210322
for (uint i = 0; i < col_val.length(); i++) {
@@ -10325,7 +10325,7 @@ int Field_typed_array::key_cmp(const uchar *key_ptr, uint key_length) const {
1032510325
return 0;
1032610326
}
1032710327
}
10328-
return -1;
10328+
return 1;
1032910329
}
1033010330

1033110331
void Field_typed_array::init(TABLE *table_arg) {

sql/field.h

+12-10
Original file line numberDiff line numberDiff line change
@@ -4307,21 +4307,23 @@ class Field_typed_array final : public Field_json {
43074307
int key_cmp(const uchar *, const uchar *) const override { return -1; }
43084308
/**
43094309
* @brief This function will behave similarly to MEMBER OF json operation,
4310-
* unlike regular key_cmp. The key value will be checked against
4311-
* members of the array and the presence of the key will be considered
4312-
* as the record matching the given key. This particular definition is
4313-
* used in descending ref index scans. Descending index scan uses
4314-
* handler::ha_index_prev() function to read from the storage engine
4315-
* which does not compare the index key with the search key [unlike
4316-
* handler::ha_index_next_same()]. Hence each retrieved record needs
4317-
* to be validated to find a stop point. Refer key_cmp_if_same() and
4318-
* RefIterator<true>::Read() for more details.
4310+
* unlike regular key_cmp. Since scans on multi-valued indexes always
4311+
* go in the ascending direction, and always start on the first entry
4312+
* that is not less than the key, a record not matching the MEMBER OF
4313+
* condition is assumed to be greater than the key, so the function
4314+
* always returns 1, indicating greater than, for not found.
4315+
* This definition is used in descending ref index scans.
4316+
* Descending index scan uses handler::ha_index_prev() function to read
4317+
* from the storage engine which does not compare the index key with
4318+
* the search key [unlike handler::ha_index_next_same()]. Hence each
4319+
* retrieved record needs to be validated to find a stop point. Refer
4320+
* key_cmp_if_same() and RefIterator<true>::Read() for more details.
43194321
*
43204322
* @param key_ptr Pointer to the key
43214323
* @param key_length Key length
43224324
* @return
43234325
* 0 Key found in the record
4324-
* -1 Key not found in the record
4326+
* 1 Key not found in the record
43254327
*/
43264328
int key_cmp(const uchar *key_ptr, uint key_length) const override;
43274329
/**

sql/handler.cc

+34-11
Original file line numberDiff line numberDiff line change
@@ -6269,6 +6269,7 @@ static bool key_uses_partial_cols(TABLE *table, uint keyno) {
62696269
OUT: Size of the buffer that is expected to be actually
62706270
used, or 0 if buffer is not needed.
62716271
@param [in,out] flags A combination of HA_MRR_* flags
6272+
@param [out] force_default_mrr Force default MRR implementation
62726273
@param [out] cost Estimated cost of MRR access
62736274
62746275
@note
@@ -6289,12 +6290,14 @@ ha_rows handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
62896290
void *seq_init_param,
62906291
uint n_ranges_arg [[maybe_unused]],
62916292
uint *bufsz, uint *flags,
6293+
bool *force_default_mrr,
62926294
Cost_estimate *cost) {
62936295
KEY_MULTI_RANGE range;
62946296
range_seq_t seq_it;
62956297
ha_rows rows, total_rows = 0;
62966298
uint n_ranges = 0;
62976299
THD *thd = current_thd;
6300+
*force_default_mrr = false;
62986301

62996302
/* Default MRR implementation doesn't need buffer */
63006303
*bufsz = 0;
@@ -6315,6 +6318,17 @@ ha_rows handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
63156318
max_endp = range.end_key.length ? &range.end_key : nullptr;
63166319
}
63176320

6321+
/*
6322+
Allow multi-valued index for DS-MRR only for equality ranges.
6323+
For non-equality ranges, the storage engine might need to call
6324+
Field_typed_array::key_cmp(), which is not safe when doing an
6325+
index-only scan.
6326+
*/
6327+
if (!*force_default_mrr &&
6328+
(table->key_info[keyno].flags & HA_MULTI_VALUED_KEY) &&
6329+
!(range.range_flag & EQ_RANGE)) {
6330+
*force_default_mrr = true;
6331+
}
63186332
/*
63196333
Return HA_POS_ERROR if the specified keyno is not capable of
63206334
serving the specified range request. The cases checked for are:
@@ -7016,10 +7030,12 @@ ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
70167030
ha_rows rows;
70177031
uint def_flags = *flags;
70187032
uint def_bufsz = *bufsz;
7033+
bool force_default_mrr = false;
70197034

70207035
/* Get cost/flags/mem_usage of default MRR implementation */
70217036
rows = h->handler::multi_range_read_info_const(
7022-
keyno, seq, seq_init_param, n_ranges, &def_bufsz, &def_flags, cost);
7037+
keyno, seq, seq_init_param, n_ranges, &def_bufsz, &def_flags,
7038+
&force_default_mrr, cost);
70237039
if (rows == HA_POS_ERROR) {
70247040
/* Default implementation can't perform MRR scan => we can't either */
70257041
return rows;
@@ -7028,10 +7044,12 @@ ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
70287044
/*
70297045
If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
70307046
use the default MRR implementation (we need it for UPDATE/DELETE).
7031-
Otherwise, make a choice based on cost and mrr* flags of
7032-
@@optimizer_switch.
7047+
Also, if multi_range_read_info_const() detected that "DS_MRR" cannot
7048+
be used (E.g. Using a multi-valued index for non-equality ranges), we
7049+
are mandated to use the default implementation. Else, make a choice
7050+
based on cost and mrr* flags of @@optimizer_switch.
70337051
*/
7034-
if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
7052+
if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || force_default_mrr ||
70357053
choose_mrr_impl(keyno, rows, flags, bufsz, cost)) {
70367054
DBUG_PRINT("info", ("Default MRR implementation choosen"));
70377055
*flags = def_flags;
@@ -7583,14 +7601,19 @@ void handler::set_end_range(const key_range *range,
75837601
int handler::compare_key(key_range *range) {
75847602
int cmp = -1;
75857603
if (!range || in_range_check_pushed_down) return 0; // No max range
7586-
/*
7587-
Virtual fields are not updated during multi-valued index read in MRR.
7588-
Hence key comparison is skipped for MV index.
7589-
TODO: Disable MRR on MV index or implement a comparison logic.
7590-
*/
7591-
if (!(table->key_info[active_index].flags & HA_MULTI_VALUED_KEY)) {
7592-
cmp = key_cmp(range_key_part, range->key, range->length);
7604+
7605+
if ((table->key_info[active_index].flags & HA_MULTI_VALUED_KEY) &&
7606+
table->key_read) {
7607+
// For multi-valued indexes, key_cmp() needs to read the virtual column
7608+
// backing the index. See Field_typed_array::key_cmp(). The virtual column
7609+
// is not available during index-only scans (typically used by DS-MRR), so
7610+
// skip the end of range scan in that case, and let the SQL layer do the
7611+
// filtering. Assuming the scan is ascending, returning -1 (less than range)
7612+
// makes the scan return the row to the next layer.
7613+
assert(range_scan_direction == RANGE_SCAN_ASC);
7614+
return -1;
75937615
}
7616+
cmp = key_cmp(range_key_part, range->key, range->length);
75947617
if (!cmp) cmp = key_compare_result_on_equal;
75957618
return cmp;
75967619
}

sql/handler.h

+3-4
Original file line numberDiff line numberDiff line change
@@ -5454,10 +5454,9 @@ class handler {
54545454
double estimate_in_memory_buffer(ulonglong table_index_size) const;
54555455

54565456
public:
5457-
virtual ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
5458-
void *seq_init_param,
5459-
uint n_ranges, uint *bufsz,
5460-
uint *flags, Cost_estimate *cost);
5457+
virtual ha_rows multi_range_read_info_const(
5458+
uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param, uint n_ranges,
5459+
uint *bufsz, uint *flags, bool *force_default_mrr, Cost_estimate *cost);
54615460
virtual ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
54625461
uint *bufsz, uint *flags,
54635462
Cost_estimate *cost);

sql/range_optimizer/index_range_scan_plan.cc

+3-1
Original file line numberDiff line numberDiff line change
@@ -624,8 +624,10 @@ ha_rows check_quick_select(THD *thd, RANGE_OPT_PARAM *param, uint idx,
624624

625625
*bufsize = thd->variables.read_rnd_buff_size;
626626
// Sets is_ror_scan to false for some queries, e.g. multi-ranges
627+
bool force_default_mrr = false;
627628
rows = file->multi_range_read_info_const(keynr, &seq_if, (void *)&seq, 0,
628-
bufsize, mrr_flags, cost);
629+
bufsize, mrr_flags,
630+
&force_default_mrr, cost);
629631
if (rows != HA_POS_ERROR) {
630632
param->table->quick_rows[keynr] = rows;
631633
if (update_tbl_stats) {

storage/innobase/handler/ha_innodb.cc

+4-5
Original file line numberDiff line numberDiff line change
@@ -23744,11 +23744,10 @@ int ha_innobase::multi_range_read_next(char **range_info) {
2374423744
return (m_ds_mrr.dsmrr_next(range_info));
2374523745
}
2374623746

23747-
ha_rows ha_innobase::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
23748-
void *seq_init_param,
23749-
uint n_ranges, uint *bufsz,
23750-
uint *flags,
23751-
Cost_estimate *cost) {
23747+
ha_rows ha_innobase::multi_range_read_info_const(
23748+
uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param, uint n_ranges,
23749+
uint *bufsz, uint *flags, bool *force_default_mrr [[maybe_unused]],
23750+
Cost_estimate *cost) {
2375223751
/* See comments in ha_myisam::multi_range_read_info_const */
2375323752
m_ds_mrr.init(table);
2375423753

storage/innobase/handler/ha_innodb.h

+1
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,7 @@ class ha_innobase : public handler {
564564
ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
565565
void *seq_init_param, uint n_ranges,
566566
uint *bufsz, uint *flags,
567+
bool *force_default_mrr,
567568
Cost_estimate *cost) override;
568569

569570
/** Initialize multi range read and get information.

storage/myisam/ha_myisam.cc

+4-5
Original file line numberDiff line numberDiff line change
@@ -2025,11 +2025,10 @@ int ha_myisam::multi_range_read_next(char **range_info) {
20252025
return ds_mrr.dsmrr_next(range_info);
20262026
}
20272027

2028-
ha_rows ha_myisam::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
2029-
void *seq_init_param,
2030-
uint n_ranges, uint *bufsz,
2031-
uint *flags,
2032-
Cost_estimate *cost) {
2028+
ha_rows ha_myisam::multi_range_read_info_const(
2029+
uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param, uint n_ranges,
2030+
uint *bufsz, uint *flags, bool *force_default_mrr [[maybe_unused]],
2031+
Cost_estimate *cost) {
20332032
/*
20342033
This call is here because there is no location where this->table would
20352034
already be known.

storage/myisam/ha_myisam.h

+1
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ class ha_myisam : public handler {
196196
ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
197197
void *seq_init_param, uint n_ranges,
198198
uint *bufsz, uint *flags,
199+
bool *force_default_mrr,
199200
Cost_estimate *cost) override;
200201
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
201202
uint *bufsz, uint *flags,

storage/ndb/plugin/ha_ndbcluster.cc

+10-6
Original file line numberDiff line numberDiff line change
@@ -13304,27 +13304,31 @@ static bool read_multi_needs_scan(NDB_INDEX_TYPE cur_index_type,
1330413304

1330513305
ha_rows ha_ndbcluster::multi_range_read_info_const(
1330613306
uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param, uint n_ranges,
13307-
uint *bufsz, uint *flags, Cost_estimate *cost) {
13307+
uint *bufsz, uint *flags, bool *force_default_mrr, Cost_estimate *cost) {
1330813308
ha_rows rows;
1330913309
uint def_flags = *flags;
1331013310
uint def_bufsz = *bufsz;
1331113311

1331213312
DBUG_TRACE;
1331313313

1331413314
/* Get cost/flags/mem_usage of default MRR implementation */
13315-
rows = handler::multi_range_read_info_const(
13316-
keyno, seq, seq_init_param, n_ranges, &def_bufsz, &def_flags, cost);
13315+
rows = handler::multi_range_read_info_const(keyno, seq, seq_init_param,
13316+
n_ranges, &def_bufsz, &def_flags,
13317+
force_default_mrr, cost);
1331713318
if (unlikely(rows == HA_POS_ERROR)) {
1331813319
return rows;
1331913320
}
1332013321

1332113322
/*
1332213323
If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is
1332313324
an order to use the default MRR implementation.
13324-
Otherwise, make a choice based on requested *flags, handler
13325-
capabilities, cost and mrr* flags of @@optimizer_switch.
13325+
Also, if multi_range_read_info_const() detected that "DS_MRR" cannot
13326+
be used (E.g. Using a multi-valued index for non-equality ranges), we
13327+
are mandated to use the default implementation. Else, make a choice
13328+
based on requested *flags, handler capabilities, cost and mrr* flags
13329+
of @@optimizer_switch.
1332613330
*/
13327-
if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
13331+
if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || *force_default_mrr ||
1332813332
choose_mrr_impl(keyno, n_ranges, rows, bufsz, flags, cost)) {
1332913333
DBUG_PRINT("info", ("Default MRR implementation choosen"));
1333013334
*flags = def_flags;

storage/ndb/plugin/ha_ndbcluster.h

+1
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ class ha_ndbcluster : public handler, public Partition_handler {
187187
ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
188188
void *seq_init_param, uint n_ranges,
189189
uint *bufsz, uint *flags,
190+
bool *force_default_mrr,
190191
Cost_estimate *cost) override;
191192
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
192193
uint *bufsz, uint *flags,

0 commit comments

Comments
 (0)