Skip to content

Commit 5373499

Browse files
committed
Bug#35813134: Optimize table value constructors in the hypergraph optimizer
Run table value constructors through the hypergraph optimizer instead of using the hard coded path for them in the old optimizer. This fixes bug#31949057 (TABLE VALUE CONSTRUCTOR FOLLOWED BY ORDER BY IS NOT SORTED CORRECTLY) in the hypergraph optimizer. The bug is still present in the old optimizer. The patch changes JOIN::optimize() so that it passes table value constructor query blocks to FindBestQueryPlan() in the hypergraph optimizer. Such query blocks go through the fast path for single-table queries. The fast path was updated so that it also accepted table-less queries. When adding a SORT access path for ORDER BY, we now also check if the query is table-less, and add a STREAM access path before the sorting if it is. This is needed because Filesort requires a record buffer, and we need a temporary table for that when there are no other tables. Change-Id: Ia1a0ed5ce6711c23e7a3825f602cdb3aa75478b8
1 parent 1f182ce commit 5373499

17 files changed

+1611
-676
lines changed

mysql-test/include/table_value_constructor.inc

+587
Large diffs are not rendered by default.

mysql-test/r/table_value_constructor_hypergraph.result

+814
Large diffs are not rendered by default.

mysql-test/t/table_value_constructor.test

+2-570
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
--source include/have_hypergraph.inc
2+
--source include/table_value_constructor.inc
3+
4+
--echo #
5+
--echo # Bug#31949057: TABLE VALUE CONSTRUCTOR FOLLOWED BY ORDER BY
6+
--echo # IS NOT SORTED CORRECTLY
7+
--echo #
8+
9+
VALUES ROW(1,9), ROW(2,4) ORDER BY column_1;
10+
11+
(VALUES ROW(1,2), ROW(3,4), ROW(-1,2) ORDER BY column_0) ORDER BY column_0;
12+
13+
VALUES ROW(1, 1), ROW(10, 2), ROW(2, 3), ROW(3, 4), ROW(5, 5)
14+
ORDER BY 1 LIMIT 2 OFFSET 1;
15+
16+
VALUES ROW(0, 1), ROW(4, 6), ROW(7, 5), ROW(5, 7), ROW(9, 8),
17+
ROW(7, 2), ROW(7, 6), ROW(4, 5), ROW(3, 2), ROW(9, 7)
18+
ORDER BY 1, 2;

mysql-test/t/unary_query_term.test

+5
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,15 @@ eval $query;
9999
--skip_if_hypergraph
100100
eval EXPLAIN FORMAT = tree $query;
101101

102+
# This query returns wrong results with the old optimizer. The
103+
# hypergraph optimizer returns correct results.
102104
let $query = ( ( (VALUES ROW(1,1), ROW(2,2), ROW(3,3), ROW(4,4), ROW(-1,-1))
103105
ORDER BY column_0 LIMIT 4)
104106
LIMIT 3)
105107
ORDER BY 1;
108+
--skip_if_hypergraph # Hypergraph returns correct results.
106109
eval $query;
110+
--skip_if_hypergraph # Different plan (actually sorts result before LIMIT).
107111
--replace_regex $elide_costs
108112
eval EXPLAIN FORMAT = tree $query;
109113

@@ -187,6 +191,7 @@ ORDER BY -a LIMIT 2;
187191
--echo # Check explicit table and table value constructor
188192
--echo #
189193
(TABLE r ORDER BY a LIMIT 5) ORDER BY -a LIMIT 4;
194+
--skip_if_hypergraph # Wrong result with the old optimizer. Correct with hypergraph.
190195
VALUES ROW(1,2),ROW(3,4),ROW(-1,2) ORDER BY 1;
191196
(VALUES ROW(1,2),ROW(3,4),ROW(-1,2) ORDER BY 1) ORDER BY 1 LIMIT 2;
192197

sql/item.cc

+2-8
Original file line numberDiff line numberDiff line change
@@ -10762,16 +10762,10 @@ Item_values_column::Item_values_column(THD *thd, Item *ref) : super(thd, ref) {
1076210762
fixed = true;
1076310763
}
1076410764

10765-
/* purecov: begin deadcode */
10766-
10767-
bool Item_values_column::eq(const Item *item, bool binary_cmp) const {
10768-
assert(false);
10769-
const Item *it = item->real_item();
10770-
return m_value_ref && m_value_ref->eq(it, binary_cmp);
10765+
bool Item_values_column::eq(const Item *item, bool) const {
10766+
return item->type() == VALUES_COLUMN_ITEM && item_name.eq(item->item_name);
1077110767
}
1077210768

10773-
/* purecov: end */
10774-
1077510769
double Item_values_column::val_real() {
1077610770
assert(fixed);
1077710771
const double tmp = m_value_ref->val_real();

sql/iterators/basic_row_iterators.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040

4141
class Filesort_info;
4242
class Item;
43+
class Item_values_column;
4344
class JOIN;
4445
class Sort_result;
4546
class THD;
@@ -500,7 +501,7 @@ class TableValueConstructorIterator final : public RowIterator {
500501
TableValueConstructorIterator(
501502
THD *thd, ha_rows *examined_rows,
502503
const mem_root_deque<mem_root_deque<Item *> *> &row_value_list,
503-
mem_root_deque<Item *> *join_fields);
504+
Mem_root_array<Item_values_column *> *output_refs);
504505

505506
bool Init() override;
506507
int Read() override;
@@ -521,7 +522,8 @@ class TableValueConstructorIterator final : public RowIterator {
521522
/// References to the row we currently want to output. When multiple rows must
522523
/// be output, this contains Item_values_column objects. In this case, each
523524
/// call to Read() will replace its current reference with the next row.
524-
mem_root_deque<Item *> *const m_output_refs;
525+
/// It is nullptr if there is only one row.
526+
Mem_root_array<Item_values_column *> *m_output_refs;
525527
};
526528

527529
/**

sql/iterators/ref_row_iterators.cc

+5-7
Original file line numberDiff line numberDiff line change
@@ -905,11 +905,11 @@ int ZeroRowsAggregatedIterator::Read() {
905905
TableValueConstructorIterator::TableValueConstructorIterator(
906906
THD *thd, ha_rows *examined_rows,
907907
const mem_root_deque<mem_root_deque<Item *> *> &row_value_list,
908-
mem_root_deque<Item *> *join_fields)
908+
Mem_root_array<Item_values_column *> *output_refs)
909909
: RowIterator(thd),
910910
m_examined_rows(examined_rows),
911911
m_row_value_list(row_value_list),
912-
m_output_refs(join_fields) {
912+
m_output_refs(output_refs) {
913913
assert(examined_rows != nullptr);
914914
}
915915

@@ -924,12 +924,10 @@ int TableValueConstructorIterator::Read() {
924924
// If the TVC has a single row, we don't create Item_values_column reference
925925
// objects during resolving. We will instead use the single row directly from
926926
// Query_block::item_list, such that we don't have to change references here.
927-
if (m_row_value_list.size() != 1) {
928-
auto output_refs_it = VisibleFields(*m_output_refs).begin();
927+
if (m_output_refs != nullptr) {
928+
Item_values_column **output_refs_it = m_output_refs->begin();
929929
for (const Item *value : **m_row_it) {
930-
Item_values_column *ref =
931-
down_cast<Item_values_column *>(*output_refs_it);
932-
++output_refs_it;
930+
Item_values_column *ref = *output_refs_it++;
933931

934932
// Ideally we would not be casting away constness here. However, as the
935933
// evaluation of Item objects during execution is not const (i.e. none of

sql/join_optimizer/access_path.cc

+53-2
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,28 @@
2323
#include "sql/join_optimizer/access_path.h"
2424

2525
#include <algorithm>
26+
#include <cmath>
27+
#include <memory>
2628
#include <vector>
2729

30+
#include "mem_root_deque.h"
2831
#include "my_base.h"
32+
#include "my_dbug.h"
33+
#include "mysql/components/services/bits/psi_bits.h"
34+
#include "prealloced_array.h"
35+
#include "sql/field.h"
2936
#include "sql/filesort.h"
37+
#include "sql/handler.h"
3038
#include "sql/item_cmpfunc.h"
3139
#include "sql/item_func.h"
32-
#include "sql/item_sum.h"
40+
#include "sql/item_subselect.h"
3341
#include "sql/iterators/basic_row_iterators.h"
3442
#include "sql/iterators/bka_iterator.h"
3543
#include "sql/iterators/composite_iterators.h"
3644
#include "sql/iterators/delete_rows_iterator.h"
3745
#include "sql/iterators/hash_join_iterator.h"
3846
#include "sql/iterators/ref_row_iterators.h"
47+
#include "sql/iterators/row_iterator.h"
3948
#include "sql/iterators/sorting_iterator.h"
4049
#include "sql/iterators/timing_iterator.h"
4150
#include "sql/iterators/window_iterators.h"
@@ -46,6 +55,7 @@
4655
#include "sql/join_optimizer/relational_expression.h"
4756
#include "sql/join_optimizer/walk_access_paths.h"
4857
#include "sql/mem_root_array.h"
58+
#include "sql/pack_rows.h"
4959
#include "sql/range_optimizer/geometry_index_range_scan.h"
5060
#include "sql/range_optimizer/group_index_skip_scan.h"
5161
#include "sql/range_optimizer/group_index_skip_scan_plan.h"
@@ -56,9 +66,18 @@
5666
#include "sql/range_optimizer/range_optimizer.h"
5767
#include "sql/range_optimizer/reverse_index_range_scan.h"
5868
#include "sql/range_optimizer/rowid_ordered_retrieval.h"
69+
#include "sql/sql_array.h"
70+
#include "sql/sql_const.h"
71+
#include "sql/sql_executor.h"
72+
#include "sql/sql_lex.h"
73+
#include "sql/sql_list.h"
74+
#include "sql/sql_opt_exec_shared.h"
5975
#include "sql/sql_optimizer.h"
6076
#include "sql/sql_update.h"
77+
#include "sql/system_variables.h"
6178
#include "sql/table.h"
79+
#include "sql/visible_fields.h"
80+
#include "template_utils.h"
6281

6382
using pack_rows::TableCollection;
6483
using std::all_of;
@@ -123,6 +142,38 @@ AccessPath *NewUpdateRowsAccessPath(THD *thd, AccessPath *child,
123142
return path;
124143
}
125144

145+
static Mem_root_array<Item_values_column *> *GetTableValueConstructorOutputRefs(
146+
MEM_ROOT *mem_root, const JOIN *join) {
147+
// If the table value constructor has a single row, the values are contained
148+
// directly in join->fields, and there are no Item_values_column output refs.
149+
if (join->query_block->row_value_list->size() == 1) {
150+
return nullptr;
151+
}
152+
153+
auto columns = new (mem_root) Mem_root_array<Item_values_column *>(mem_root);
154+
if (columns == nullptr) return nullptr;
155+
156+
for (Item *column : VisibleFields(*join->fields)) {
157+
if (columns->push_back(down_cast<Item_values_column *>(column))) {
158+
return nullptr;
159+
}
160+
}
161+
162+
return columns;
163+
}
164+
165+
AccessPath *NewTableValueConstructorAccessPath(const THD *thd,
166+
const JOIN *join) {
167+
AccessPath *path = new (thd->mem_root) AccessPath;
168+
path->type = AccessPath::TABLE_VALUE_CONSTRUCTOR;
169+
// The iterator keeps track of which row it is at in examined_rows,
170+
// so we always need to give it the pointer.
171+
path->count_examined_rows = true;
172+
path->table_value_constructor().output_refs =
173+
GetTableValueConstructorOutputRefs(thd->mem_root, join);
174+
return path;
175+
}
176+
126177
static AccessPath *FindSingleAccessPathOfType(AccessPath *path,
127178
AccessPath::Type type) {
128179
AccessPath *found_path = nullptr;
@@ -721,7 +772,7 @@ unique_ptr_destroy_only<RowIterator> CreateIteratorFromAccessPath(
721772
Query_block *query_block = join->query_block;
722773
iterator = NewIterator<TableValueConstructorIterator>(
723774
thd, mem_root, examined_rows, *query_block->row_value_list,
724-
query_block->join->fields);
775+
path->table_value_constructor().output_refs);
725776
break;
726777
}
727778
case AccessPath::FAKE_SINGLE_ROW:

sql/join_optimizer/access_path.h

+14-20
Original file line numberDiff line numberDiff line change
@@ -25,33 +25,35 @@
2525

2626
#include <assert.h>
2727
#include <stdint.h>
28-
29-
#include <string>
28+
#include <cstddef>
3029
#include <type_traits>
30+
#include <utility>
3131
#include <vector>
3232

33-
#include "sql/iterators/row_iterator.h"
33+
#include "my_alloc.h"
34+
#include "my_base.h"
35+
#include "my_table_map.h"
36+
#include "sql/item.h"
37+
// IWYU suggests removing row_iterator.h, but then the inlined short form of
38+
// CreateIteratorFromAccessPath() fails to compile. So use a pragma to keep it.
39+
#include "sql/iterators/row_iterator.h" // IWYU pragma: keep
3440
#include "sql/join_optimizer/interesting_orders_defs.h"
3541
#include "sql/join_optimizer/materialize_path_parameters.h"
3642
#include "sql/join_optimizer/node_map.h"
3743
#include "sql/join_optimizer/overflow_bitset.h"
38-
#include "sql/join_optimizer/relational_expression.h"
3944
#include "sql/join_type.h"
4045
#include "sql/mem_root_array.h"
4146
#include "sql/olap.h"
42-
#include "sql/sql_array.h"
4347
#include "sql/sql_class.h"
4448
#include "sql/table.h"
4549

46-
template <class T>
47-
class Bounds_checked_array;
48-
class Common_table_expr;
50+
class Cost_model_server;
4951
class Filesort;
5052
class HashJoinCondition;
51-
class Item;
5253
class Item_func_match;
5354
class JOIN;
5455
class KEY;
56+
class Query_expression;
5557
class QEP_TAB;
5658
class QUICK_RANGE;
5759
class SJ_TMP_TABLE;
@@ -63,10 +65,8 @@ struct GroupIndexSkipScanParameters;
6365
struct IndexSkipScanParameters;
6466
struct Index_lookup;
6567
struct KEY_PART;
66-
struct ORDER;
6768
struct POSITION;
6869
struct RelationalExpression;
69-
struct TABLE;
7070

7171
/**
7272
A specification that two specific relational expressions
@@ -1059,7 +1059,7 @@ struct AccessPath {
10591059
} unqualified_count;
10601060

10611061
struct {
1062-
// No members (implicit from the JOIN).
1062+
Mem_root_array<Item_values_column *> *output_refs;
10631063
} table_value_constructor;
10641064
struct {
10651065
// No members.
@@ -1424,14 +1424,8 @@ inline AccessPath *NewUnqualifiedCountAccessPath(THD *thd) {
14241424
return path;
14251425
}
14261426

1427-
inline AccessPath *NewTableValueConstructorAccessPath(THD *thd) {
1428-
AccessPath *path = new (thd->mem_root) AccessPath;
1429-
path->type = AccessPath::TABLE_VALUE_CONSTRUCTOR;
1430-
// The iterator keeps track of which row it is at in examined_rows,
1431-
// so we always need to give it the pointer.
1432-
path->count_examined_rows = true;
1433-
return path;
1434-
}
1427+
AccessPath *NewTableValueConstructorAccessPath(const THD *thd,
1428+
const JOIN *join);
14351429

14361430
inline AccessPath *NewNestedLoopSemiJoinWithDuplicateRemovalAccessPath(
14371431
THD *thd, AccessPath *outer, AccessPath *inner, const TABLE *table,

0 commit comments

Comments
 (0)