Skip to content

Commit d2ab06a

Browse files
author
Nimita Joshi
committed
Bug#33334911 - Multi-valued index performance is too slow
(Access too many rows than required) Issue: The query which has "greater or less than (> or <)" condition on multi-valued index is too slow. Analysis: In case of query which has range conditions on the multi-valued index, the storage engine kept reading the rows because key comparison to check the end of the range was not getting executed. Because of this it was reading more rows than expected leading to slow execution of the query. Fix: Enable key comparison to check the end of the range in case of multi-valued index. Disable multi-valued index for DS-MRR, except for equality ranges, as virtual fields are not updated during the index read in case of DS-MRR to do the key comparison. Change-Id: I1e3019ab15b29615815c9279c85a3d7425701db0
1 parent eceeabd commit d2ab06a

File tree

13 files changed

+180
-45
lines changed

13 files changed

+180
-45
lines changed

Diff for: mysql-test/suite/json/r/array_index.result

+59
Original file line numberDiff line numberDiff line change
@@ -3360,3 +3360,62 @@ id j
33603360
4 ["7", "15", "3", "27"]
33613361
DROP TABLE test;
33623362
# restart:
3363+
#
3364+
# Bug#33334911 - Multi-valued index performance is too slow
3365+
# (Access too many rows than required)
3366+
#
3367+
CREATE TABLE t1 (
3368+
id INT NOT NULL,
3369+
f1 enum('T1','T2','T3') NOT NULL,
3370+
f2 INT NOT NULL,
3371+
json_value json DEFAULT (json_array()),
3372+
PRIMARY KEY (id),
3373+
KEY idx (f1 ,(CAST(json_extract(json_value,_utf8mb4'$') AS UNSIGNED ARRAY)), f2)
3374+
);
3375+
INSERT INTO t1 VALUES (1, 'T2', 15, '[1, 3, 5, 7]');
3376+
INSERT INTO t1 VALUES (2, 'T2', 25, '[10, 20, 30, 40]');
3377+
INSERT INTO t1 VALUES (3, 'T2', 35, '[15, 25, 35, 45]');
3378+
INSERT INTO t1 VALUES (4, 'T2', 45, '[100, 200, 300, 400]');
3379+
INSERT INTO t1 VALUES (5, 'T2', 55, '[120, 220, 320, 420]');
3380+
INSERT INTO t1 VALUES (6, 'T2', 65, '[140, 240, 340, 440]');
3381+
INSERT INTO t1 VALUES (7, 'T2', 9, '[100, 200, 300, 400]');
3382+
INSERT INTO t1 VALUES (8, 'T2', 9, '[100, 200, 300, 400]');
3383+
INSERT INTO t1 VALUES (9, 'T2', 9, '[100, 200, 300, 400]');
3384+
INSERT INTO t1 VALUES (10, 'T2', 9, '[100, 200, 300, 400]');
3385+
ANALYZE TABLE t1;
3386+
Table Op Msg_type Msg_text
3387+
test.t1 analyze status OK
3388+
EXPLAIN SELECT COUNT(*) FROM t1
3389+
WHERE f1 IN ('T2') AND JSON_OVERLAPS(json_value->'$', CAST('[100]' AS JSON)) AND f2>=10;
3390+
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
3391+
1 SIMPLE t1 NULL range idx idx 14 NULL 1 100.00 Using where
3392+
Warnings:
3393+
Note 1003 /* select#1 */ select count(0) AS `COUNT(*)` from `test`.`t1` where ((`test`.`t1`.`f1` = 'T2') and json_overlaps(cast(json_extract(`json_value`,_utf8mb4'$') as unsigned array),json'[100]') and (`test`.`t1`.`f2` >= 10))
3394+
FLUSH STATUS;
3395+
SELECT COUNT(*) FROM t1
3396+
WHERE f1 IN ('T2') AND JSON_OVERLAPS(json_value->'$', CAST('[100]' AS JSON)) AND f2>=10;
3397+
COUNT(*)
3398+
1
3399+
SHOW STATUS LIKE 'Handler_read%';
3400+
Variable_name Value
3401+
Handler_read_first 0
3402+
Handler_read_key 1
3403+
Handler_read_last 0
3404+
Handler_read_next 1
3405+
Handler_read_prev 0
3406+
Handler_read_rnd 0
3407+
Handler_read_rnd_next 0
3408+
SET @save_opt=@@optimizer_switch;
3409+
SET @@optimizer_switch="mrr_cost_based=off";
3410+
EXPLAIN SELECT COUNT(*) FROM t1
3411+
WHERE f1 IN ('T2') AND JSON_OVERLAPS(json_value->'$', CAST('[100]' AS JSON)) AND f2>=10;
3412+
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
3413+
1 SIMPLE t1 NULL range idx idx 14 NULL 1 100.00 Using where
3414+
Warnings:
3415+
Note 1003 /* select#1 */ select count(0) AS `COUNT(*)` from `test`.`t1` where ((`test`.`t1`.`f1` = 'T2') and json_overlaps(cast(json_extract(`json_value`,_utf8mb4'$') as unsigned array),json'[100]') and (`test`.`t1`.`f2` >= 10))
3416+
SELECT COUNT(*) FROM t1
3417+
WHERE f1 IN ('T2') AND JSON_OVERLAPS(json_value->'$', CAST('[100]' AS JSON)) AND f2>=10;
3418+
COUNT(*)
3419+
1
3420+
SET @@optimizer_switch=@save_opt;
3421+
DROP TABLE t1;

Diff for: mysql-test/suite/json/t/array_index.test

+45
Original file line numberDiff line numberDiff line change
@@ -1951,3 +1951,48 @@ DROP TABLE test;
19511951
# at the very end of the test.
19521952
--let $restart_parameters= restart:
19531953
--source include/restart_mysqld.inc
1954+
1955+
--echo #
1956+
--echo # Bug#33334911 - Multi-valued index performance is too slow
1957+
--echo # (Access too many rows than required)
1958+
--echo #
1959+
1960+
CREATE TABLE t1 (
1961+
id INT NOT NULL,
1962+
f1 enum('T1','T2','T3') NOT NULL,
1963+
f2 INT NOT NULL,
1964+
json_value json DEFAULT (json_array()),
1965+
PRIMARY KEY (id),
1966+
KEY idx (f1 ,(CAST(json_extract(json_value,_utf8mb4'$') AS UNSIGNED ARRAY)), f2)
1967+
);
1968+
1969+
INSERT INTO t1 VALUES (1, 'T2', 15, '[1, 3, 5, 7]');
1970+
INSERT INTO t1 VALUES (2, 'T2', 25, '[10, 20, 30, 40]');
1971+
INSERT INTO t1 VALUES (3, 'T2', 35, '[15, 25, 35, 45]');
1972+
INSERT INTO t1 VALUES (4, 'T2', 45, '[100, 200, 300, 400]');
1973+
INSERT INTO t1 VALUES (5, 'T2', 55, '[120, 220, 320, 420]');
1974+
INSERT INTO t1 VALUES (6, 'T2', 65, '[140, 240, 340, 440]');
1975+
INSERT INTO t1 VALUES (7, 'T2', 9, '[100, 200, 300, 400]');
1976+
INSERT INTO t1 VALUES (8, 'T2', 9, '[100, 200, 300, 400]');
1977+
INSERT INTO t1 VALUES (9, 'T2', 9, '[100, 200, 300, 400]');
1978+
INSERT INTO t1 VALUES (10, 'T2', 9, '[100, 200, 300, 400]');
1979+
1980+
ANALYZE TABLE t1;
1981+
1982+
let $query = SELECT COUNT(*) FROM t1
1983+
WHERE f1 IN ('T2') AND JSON_OVERLAPS(json_value->'$', CAST('[100]' AS JSON)) AND f2>=10;
1984+
1985+
--eval EXPLAIN $query
1986+
1987+
FLUSH STATUS;
1988+
--eval $query
1989+
SHOW STATUS LIKE 'Handler_read%';
1990+
1991+
SET @save_opt=@@optimizer_switch;
1992+
SET @@optimizer_switch="mrr_cost_based=off";
1993+
1994+
--eval EXPLAIN $query
1995+
--eval $query
1996+
1997+
SET @@optimizer_switch=@save_opt;
1998+
DROP TABLE t1;

Diff for: sql/field.cc

+3-3
Original file line numberDiff line numberDiff line change
@@ -9991,12 +9991,12 @@ int Field_typed_array::key_cmp(const uchar *key_ptr, uint key_length) const {
99919991
m_conv_item->field->set_key_image(key_ptr, key_length);
99929992
if (sql_scalar_to_json(m_conv_item, "<Field_typed_array::key_cmp>", &value,
99939993
&tmp, &key, nullptr, true)) {
9994-
return -1;
9994+
return 1;
99959995
}
99969996

99979997
// Compare colum value with the key.
99989998
if (val_json(&col_val)) {
9999-
return -1;
9999+
return 1;
1000010000
}
1000110001
assert(col_val.type() == enum_json_type::J_ARRAY);
1000210002
for (uint i = 0; i < col_val.length(); i++) {
@@ -10005,7 +10005,7 @@ int Field_typed_array::key_cmp(const uchar *key_ptr, uint key_length) const {
1000510005
return 0;
1000610006
}
1000710007
}
10008-
return -1;
10008+
return 1;
1000910009
}
1001010010

1001110011
void Field_typed_array::init(TABLE *table_arg) {

Diff for: sql/field.h

+12-10
Original file line numberDiff line numberDiff line change
@@ -4232,21 +4232,23 @@ class Field_typed_array final : public Field_json {
42324232
int key_cmp(const uchar *, const uchar *) const override { return -1; }
42334233
/**
42344234
* @brief This function will behave similarly to MEMBER OF json operation,
4235-
* unlike regular key_cmp. The key value will be checked against
4236-
* members of the array and the presence of the key will be considered
4237-
* as the record matching the given key. This particular definition is
4238-
* used in descending ref index scans. Descending index scan uses
4239-
* handler::ha_index_prev() function to read from the storage engine
4240-
* which does not compare the index key with the search key [unlike
4241-
* handler::ha_index_next_same()]. Hence each retrieved record needs
4242-
* to be validated to find a stop point. Refer key_cmp_if_same() and
4243-
* RefIterator<true>::Read() for more details.
4235+
* unlike regular key_cmp. Since scans on multi-valued indexes always
4236+
* go in the ascending direction, and always start on the first entry
4237+
* that is not less than the key, a record not matching the MEMBER OF
4238+
* condition is assumed to be greater than the key, so the function
4239+
* always returns 1, indicating greater than, for not found.
4240+
* This definition is used in descending ref index scans.
4241+
* Descending index scan uses handler::ha_index_prev() function to read
4242+
* from the storage engine which does not compare the index key with
4243+
* the search key [unlike handler::ha_index_next_same()]. Hence each
4244+
* retrieved record needs to be validated to find a stop point. Refer
4245+
* key_cmp_if_same() and RefIterator<true>::Read() for more details.
42444246
*
42454247
* @param key_ptr Pointer to the key
42464248
* @param key_length Key length
42474249
* @return
42484250
* 0 Key found in the record
4249-
* -1 Key not found in the record
4251+
* 1 Key not found in the record
42504252
*/
42514253
int key_cmp(const uchar *key_ptr, uint key_length) const override;
42524254
/**

Diff for: sql/handler.cc

+34-11
Original file line numberDiff line numberDiff line change
@@ -6145,6 +6145,7 @@ static bool key_uses_partial_cols(TABLE *table, uint keyno) {
61456145
OUT: Size of the buffer that is expected to be actually
61466146
used, or 0 if buffer is not needed.
61476147
@param [in,out] flags A combination of HA_MRR_* flags
6148+
@param [out] force_default_mrr Force default MRR implementation
61486149
@param [out] cost Estimated cost of MRR access
61496150
61506151
@note
@@ -6165,12 +6166,14 @@ ha_rows handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
61656166
void *seq_init_param,
61666167
uint n_ranges_arg [[maybe_unused]],
61676168
uint *bufsz, uint *flags,
6169+
bool *force_default_mrr,
61686170
Cost_estimate *cost) {
61696171
KEY_MULTI_RANGE range;
61706172
range_seq_t seq_it;
61716173
ha_rows rows, total_rows = 0;
61726174
uint n_ranges = 0;
61736175
THD *thd = current_thd;
6176+
*force_default_mrr = false;
61746177

61756178
/* Default MRR implementation doesn't need buffer */
61766179
*bufsz = 0;
@@ -6191,6 +6194,17 @@ ha_rows handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
61916194
max_endp = range.end_key.length ? &range.end_key : nullptr;
61926195
}
61936196

6197+
/*
6198+
Allow multi-valued index for DS-MRR only for equality ranges.
6199+
For non-equality ranges, the storage engine might need to call
6200+
Field_typed_array::key_cmp(), which is not safe when doing an
6201+
index-only scan.
6202+
*/
6203+
if (!*force_default_mrr &&
6204+
(table->key_info[keyno].flags & HA_MULTI_VALUED_KEY) &&
6205+
!(range.range_flag & EQ_RANGE)) {
6206+
*force_default_mrr = true;
6207+
}
61946208
/*
61956209
Return HA_POS_ERROR if the specified keyno is not capable of
61966210
serving the specified range request. The cases checked for are:
@@ -6888,9 +6902,11 @@ ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
68886902
ha_rows rows;
68896903
uint def_flags = *flags;
68906904
uint def_bufsz = *bufsz;
6905+
bool force_default_mrr = false;
68916906
/* Get cost/flags/mem_usage of default MRR implementation */
68926907
rows = h->handler::multi_range_read_info_const(
6893-
keyno, seq, seq_init_param, n_ranges, &def_bufsz, &def_flags, cost);
6908+
keyno, seq, seq_init_param, n_ranges, &def_bufsz, &def_flags,
6909+
&force_default_mrr, cost);
68946910
if (rows == HA_POS_ERROR) {
68956911
/* Default implementation can't perform MRR scan => we can't either */
68966912
return rows;
@@ -6899,10 +6915,12 @@ ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
68996915
/*
69006916
If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
69016917
use the default MRR implementation (we need it for UPDATE/DELETE).
6902-
Otherwise, make a choice based on cost and mrr* flags of
6903-
@@optimizer_switch.
6918+
Also, if multi_range_read_info_const() detected that "DS_MRR" cannot
6919+
be used (E.g. Using a multi-valued index for non-equality ranges), we
6920+
are mandated to use the default implementation. Else, make a choice
6921+
based on cost and mrr* flags of @@optimizer_switch.
69046922
*/
6905-
if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
6923+
if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || force_default_mrr ||
69066924
choose_mrr_impl(keyno, rows, flags, bufsz, cost)) {
69076925
DBUG_PRINT("info", ("Default MRR implementation choosen"));
69086926
*flags = def_flags;
@@ -7454,14 +7472,19 @@ void handler::set_end_range(const key_range *range,
74547472
int handler::compare_key(key_range *range) {
74557473
int cmp = -1;
74567474
if (!range || in_range_check_pushed_down) return 0; // No max range
7457-
/*
7458-
Virtual fields are not updated during multi-valued index read in MRR.
7459-
Hence key comparison is skipped for MV index.
7460-
TODO: Disable MRR on MV index or implement a comparison logic.
7461-
*/
7462-
if (!(table->key_info[active_index].flags & HA_MULTI_VALUED_KEY)) {
7463-
cmp = key_cmp(range_key_part, range->key, range->length);
7475+
7476+
if ((table->key_info[active_index].flags & HA_MULTI_VALUED_KEY) &&
7477+
table->key_read) {
7478+
// For multi-valued indexes, key_cmp() needs to read the virtual column
7479+
// backing the index. See Field_typed_array::key_cmp(). The virtual column
7480+
// is not available during index-only scans (typically used by DS-MRR), so
7481+
// skip the end of range scan in that case, and let the SQL layer do the
7482+
// filtering. Assuming the scan is ascending, returning -1 (less than range)
7483+
// makes the scan return the row to the next layer.
7484+
assert(range_scan_direction == RANGE_SCAN_ASC);
7485+
return -1;
74647486
}
7487+
cmp = key_cmp(range_key_part, range->key, range->length);
74657488
if (!cmp) cmp = key_compare_result_on_equal;
74667489
return cmp;
74677490
}

Diff for: sql/handler.h

+3-4
Original file line numberDiff line numberDiff line change
@@ -5176,10 +5176,9 @@ class handler {
51765176
double estimate_in_memory_buffer(ulonglong table_index_size) const;
51775177

51785178
public:
5179-
virtual ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
5180-
void *seq_init_param,
5181-
uint n_ranges, uint *bufsz,
5182-
uint *flags, Cost_estimate *cost);
5179+
virtual ha_rows multi_range_read_info_const(
5180+
uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param, uint n_ranges,
5181+
uint *bufsz, uint *flags, bool *force_default_mrr, Cost_estimate *cost);
51835182
virtual ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
51845183
uint *bufsz, uint *flags,
51855184
Cost_estimate *cost);

Diff for: sql/range_optimizer/index_range_scan_plan.cc

+3-1
Original file line numberDiff line numberDiff line change
@@ -624,8 +624,10 @@ ha_rows check_quick_select(THD *thd, RANGE_OPT_PARAM *param, uint idx,
624624

625625
*bufsize = thd->variables.read_rnd_buff_size;
626626
// Sets is_ror_scan to false for some queries, e.g. multi-ranges
627+
bool force_default_mrr = false;
627628
rows = file->multi_range_read_info_const(keynr, &seq_if, (void *)&seq, 0,
628-
bufsize, mrr_flags, cost);
629+
bufsize, mrr_flags,
630+
&force_default_mrr, cost);
629631
if (rows != HA_POS_ERROR) {
630632
param->table->quick_rows[keynr] = rows;
631633
if (update_tbl_stats) {

Diff for: storage/innobase/handler/ha_innodb.cc

+4-5
Original file line numberDiff line numberDiff line change
@@ -23427,11 +23427,10 @@ int ha_innobase::multi_range_read_next(char **range_info) {
2342723427
return (m_ds_mrr.dsmrr_next(range_info));
2342823428
}
2342923429

23430-
ha_rows ha_innobase::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
23431-
void *seq_init_param,
23432-
uint n_ranges, uint *bufsz,
23433-
uint *flags,
23434-
Cost_estimate *cost) {
23430+
ha_rows ha_innobase::multi_range_read_info_const(
23431+
uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param, uint n_ranges,
23432+
uint *bufsz, uint *flags, bool *force_default_mrr [[maybe_unused]],
23433+
Cost_estimate *cost) {
2343523434
/* See comments in ha_myisam::multi_range_read_info_const */
2343623435
m_ds_mrr.init(table);
2343723436

Diff for: storage/innobase/handler/ha_innodb.h

+1
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,7 @@ class ha_innobase : public handler {
486486
ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
487487
void *seq_init_param, uint n_ranges,
488488
uint *bufsz, uint *flags,
489+
bool *force_default_mrr,
489490
Cost_estimate *cost) override;
490491

491492
/** Initialize multi range read and get information.

Diff for: storage/myisam/ha_myisam.cc

+4-5
Original file line numberDiff line numberDiff line change
@@ -2020,11 +2020,10 @@ int ha_myisam::multi_range_read_next(char **range_info) {
20202020
return ds_mrr.dsmrr_next(range_info);
20212021
}
20222022

2023-
ha_rows ha_myisam::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
2024-
void *seq_init_param,
2025-
uint n_ranges, uint *bufsz,
2026-
uint *flags,
2027-
Cost_estimate *cost) {
2023+
ha_rows ha_myisam::multi_range_read_info_const(
2024+
uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param, uint n_ranges,
2025+
uint *bufsz, uint *flags, bool *force_default_mrr [[maybe_unused]],
2026+
Cost_estimate *cost) {
20282027
/*
20292028
This call is here because there is no location where this->table would
20302029
already be known.

Diff for: storage/myisam/ha_myisam.h

+1
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ class ha_myisam : public handler {
196196
ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
197197
void *seq_init_param, uint n_ranges,
198198
uint *bufsz, uint *flags,
199+
bool *force_default_mrr,
199200
Cost_estimate *cost) override;
200201
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
201202
uint *bufsz, uint *flags,

Diff for: storage/ndb/plugin/ha_ndbcluster.cc

+10-6
Original file line numberDiff line numberDiff line change
@@ -13287,27 +13287,31 @@ static bool read_multi_needs_scan(NDB_INDEX_TYPE cur_index_type,
1328713287

1328813288
ha_rows ha_ndbcluster::multi_range_read_info_const(
1328913289
uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param, uint n_ranges,
13290-
uint *bufsz, uint *flags, Cost_estimate *cost) {
13290+
uint *bufsz, uint *flags, bool *force_default_mrr, Cost_estimate *cost) {
1329113291
ha_rows rows;
1329213292
uint def_flags = *flags;
1329313293
uint def_bufsz = *bufsz;
1329413294

1329513295
DBUG_TRACE;
1329613296

1329713297
/* Get cost/flags/mem_usage of default MRR implementation */
13298-
rows = handler::multi_range_read_info_const(
13299-
keyno, seq, seq_init_param, n_ranges, &def_bufsz, &def_flags, cost);
13298+
rows = handler::multi_range_read_info_const(keyno, seq, seq_init_param,
13299+
n_ranges, &def_bufsz, &def_flags,
13300+
force_default_mrr, cost);
1330013301
if (unlikely(rows == HA_POS_ERROR)) {
1330113302
return rows;
1330213303
}
1330313304

1330413305
/*
1330513306
If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is
1330613307
an order to use the default MRR implementation.
13307-
Otherwise, make a choice based on requested *flags, handler
13308-
capabilities, cost and mrr* flags of @@optimizer_switch.
13308+
Also, if multi_range_read_info_const() detected that "DS_MRR" cannot
13309+
be used (E.g. Using a multi-valued index for non-equality ranges), we
13310+
are mandated to use the default implementation. Else, make a choice
13311+
based on requested *flags, handler capabilities, cost and mrr* flags
13312+
of @@optimizer_switch.
1330913313
*/
13310-
if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
13314+
if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || *force_default_mrr ||
1331113315
choose_mrr_impl(keyno, n_ranges, rows, bufsz, flags, cost)) {
1331213316
DBUG_PRINT("info", ("Default MRR implementation choosen"));
1331313317
*flags = def_flags;

Diff for: storage/ndb/plugin/ha_ndbcluster.h

+1
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ class ha_ndbcluster : public handler, public Partition_handler {
186186
ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
187187
void *seq_init_param, uint n_ranges,
188188
uint *bufsz, uint *flags,
189+
bool *force_default_mrr,
189190
Cost_estimate *cost) override;
190191
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
191192
uint *bufsz, uint *flags,

0 commit comments

Comments
 (0)