Skip to content

Commit 7be814e

Browse files
author
Dag Wanvik
committed
Bug#36652610 UNIONs are O(N^2) [revised - 2/2]
[ revised - the original version of these two patches were rolled back due to excessive space usage for mtr: caused OOM killer ] In this patch, we flatten equal set operations at the parsing level, before contextualization, to avoid deep ASTs if possible[1]. Contextualization of equal set operations (with the exception of INTERSECT ALL which due to its implementation method can't have more than one right side operand), now happens on a shallow AST: a large number of e.g. UNION have a parse tree looking like PT_union / \ .....\ / \ \ query-1 query-2 query-N leading directly to Query_term_set_op structure Query_term_union | ----------- | | ... | 1 2 N [1] It works on left deep structures, e.g. (a UNION b) union c # type a or a UNION b UNION c # type b but not on a UNION (b UNION c) # type c The latter type c will still be represented as a deep AST and eat stack during contextualization, but be flattened eventually in merge_descendants. Luckily, the left deep form of type b is the common one for very long set operations. We try 100000 operands as in the test report successfully without running out of space (~6.4Gb virtual space usage on Ubuntu, single thread) Change-Id: I15c1ecb42fdd9449e6d2b42827439363776f82b1
1 parent 1950dd5 commit 7be814e

File tree

5 files changed

+174
-20
lines changed

5 files changed

+174
-20
lines changed

mysql-test/r/parser.result

+36
Original file line numberDiff line numberDiff line change
@@ -2627,6 +2627,14 @@ SELECT 1 UNION SELECT 1 FROM DUAL INTO @var;
26272627
(SELECT 1 UNION SELECT 1 FROM DUAL INTO @var);
26282628
SELECT 1 UNION SELECT 1 FROM DUAL FOR UPDATE INTO @var;
26292629
(SELECT 1 UNION SELECT 1 FROM DUAL FOR UPDATE INTO @var);
2630+
# Check that this also works with flatten_equal_set_ops
2631+
# (minimum three operands to check this)
2632+
SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var;
2633+
(SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var);
2634+
SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL INTO @var;
2635+
(SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL INTO @var);
2636+
SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL FOR UPDATE INTO @var;
2637+
(SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL FOR UPDATE INTO @var);
26302638
#
26312639
# Deprecation warning expected:
26322640
#
@@ -2648,6 +2656,34 @@ Warning 3962 The INTO clause is deprecated inside query blocks of query expressi
26482656
(SELECT 1 UNION SELECT 1 INTO @var FOR UPDATE);
26492657
Warnings:
26502658
Warning 3962 The INTO clause is deprecated inside query blocks of query expressions and will be removed in a future release. Please move the INTO clause to the end of statement instead.
2659+
# Check that warning also works with flatten_equal_set_ops
2660+
# (minimum three operands to check this)
2661+
SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var FROM DUAL;
2662+
Warnings:
2663+
Warning 3962 The INTO clause is deprecated inside query blocks of query expressions and will be removed in a future release. Please move the INTO clause to the end of statement instead.
2664+
SELECT 1 UNION SELECT 1 UNION (SELECT 1 INTO @var FROM DUAL);
2665+
Warnings:
2666+
Warning 3962 The INTO clause is deprecated inside query blocks of query expressions and will be removed in a future release. Please move the INTO clause to the end of statement instead.
2667+
SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL INTO @var FOR UPDATE;
2668+
Warnings:
2669+
Warning 3962 The INTO clause is deprecated inside query blocks of query expressions and will be removed in a future release. Please move the INTO clause to the end of statement instead.
2670+
(SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL INTO @var FOR UPDATE);
2671+
Warnings:
2672+
Warning 3962 The INTO clause is deprecated inside query blocks of query expressions and will be removed in a future release. Please move the INTO clause to the end of statement instead.
2673+
SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var FOR UPDATE;
2674+
Warnings:
2675+
Warning 3962 The INTO clause is deprecated inside query blocks of query expressions and will be removed in a future release. Please move the INTO clause to the end of statement instead.
2676+
(SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var FOR UPDATE);
2677+
Warnings:
2678+
Warning 3962 The INTO clause is deprecated inside query blocks of query expressions and will be removed in a future release. Please move the INTO clause to the end of statement instead.
2679+
# Check that PT_set_operation::has_into_clause works correctly
2680+
# for more than two operands after flatten_equal_set_ops
2681+
SELECT 1 UNION (SELECT 1 INTO @var FROM DUAL) UNION SELECT 1;
2682+
ERROR HY000: Misplaced INTO clause, INTO is not allowed inside subqueries, and must be placed at end of UNION clauses.
2683+
SELECT 1 UNION SELECT 1 UNION SELECT * FROM (SELECT 2 UNION SELECT 1 INTO @var FROM DUAL) t;
2684+
ERROR HY000: Misplaced INTO clause, INTO is not allowed inside subqueries, and must be placed at end of UNION clauses.
2685+
SELECT 1 UNION SELECT 1 UNION SELECT * FROM (SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var FROM DUAL) t;
2686+
ERROR HY000: Misplaced INTO clause, INTO is not allowed inside subqueries, and must be placed at end of UNION clauses.
26512687
#
26522688
# Syntax error expected:
26532689
#

mysql-test/t/parser.test

+31
Original file line numberDiff line numberDiff line change
@@ -2520,6 +2520,17 @@ SELECT 1 UNION SELECT 1 FROM DUAL INTO @var;
25202520
SELECT 1 UNION SELECT 1 FROM DUAL FOR UPDATE INTO @var;
25212521
(SELECT 1 UNION SELECT 1 FROM DUAL FOR UPDATE INTO @var);
25222522

2523+
--echo # Check that this also works with flatten_equal_set_ops
2524+
--echo # (minimum three operands to check this)
2525+
SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var;
2526+
(SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var);
2527+
2528+
SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL INTO @var;
2529+
(SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL INTO @var);
2530+
2531+
SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL FOR UPDATE INTO @var;
2532+
(SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL FOR UPDATE INTO @var);
2533+
25232534
--echo #
25242535
--echo # Deprecation warning expected:
25252536
--echo #
@@ -2533,6 +2544,26 @@ SELECT 1 UNION SELECT 1 FROM DUAL INTO @var FOR UPDATE;
25332544
SELECT 1 UNION SELECT 1 INTO @var FOR UPDATE;
25342545
(SELECT 1 UNION SELECT 1 INTO @var FOR UPDATE);
25352546

2547+
--echo # Check that warning also works with flatten_equal_set_ops
2548+
--echo # (minimum three operands to check this)
2549+
SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var FROM DUAL;
2550+
SELECT 1 UNION SELECT 1 UNION (SELECT 1 INTO @var FROM DUAL);
2551+
2552+
SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL INTO @var FOR UPDATE;
2553+
(SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL INTO @var FOR UPDATE);
2554+
2555+
SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var FOR UPDATE;
2556+
(SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var FOR UPDATE);
2557+
2558+
--echo # Check that PT_set_operation::has_into_clause works correctly
2559+
--echo # for more than two operands after flatten_equal_set_ops
2560+
--error ER_MISPLACED_INTO
2561+
SELECT 1 UNION (SELECT 1 INTO @var FROM DUAL) UNION SELECT 1;
2562+
--error ER_MISPLACED_INTO
2563+
SELECT 1 UNION SELECT 1 UNION SELECT * FROM (SELECT 2 UNION SELECT 1 INTO @var FROM DUAL) t;
2564+
--error ER_MISPLACED_INTO
2565+
SELECT 1 UNION SELECT 1 UNION SELECT * FROM (SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var FROM DUAL) t;
2566+
25362567
--echo #
25372568
--echo # Syntax error expected:
25382569
--echo #

sql/parse_tree_nodes.cc

+9-6
Original file line numberDiff line numberDiff line change
@@ -1768,13 +1768,16 @@ bool PT_set_operation::contextualize_setop(Parse_context *pc,
17681768
pc->m_stack.push_back(QueryLevel(pc->mem_root, context));
17691769
if (super::do_contextualize(pc)) return true;
17701770

1771-
if (m_lhs->contextualize(pc)) return true;
1771+
if (m_list[0]->contextualize(pc)) return true;
17721772

1773-
pc->select = pc->thd->lex->new_set_operation_query(pc->select);
1774-
1775-
if (pc->select == nullptr || m_rhs->contextualize(pc)) return true;
1776-
1777-
pc->thd->lex->pop_context();
1773+
List_iterator<PT_query_expression_body> it(m_list);
1774+
PT_query_expression_body *elt;
1775+
it++; // skip first
1776+
while ((elt = it++)) {
1777+
pc->select = pc->thd->lex->new_set_operation_query(pc->select);
1778+
if (pc->select == nullptr || elt->contextualize(pc)) return true;
1779+
pc->thd->lex->pop_context();
1780+
}
17781781

17791782
QueryLevel ql = pc->m_stack.back();
17801783
pc->m_stack.pop_back();

sql/parse_tree_nodes.h

+86-11
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "my_base.h"
3838

3939
#include "my_inttypes.h" // TODO: replace with cstdint
40+
#include "my_list.h"
4041
#include "my_sqlcommand.h"
4142
#include "my_sys.h"
4243
#include "my_thread_local.h"
@@ -821,9 +822,9 @@ class PT_locking_clause_list : public Parse_tree_node {
821822
class PT_query_expression_body : public Parse_tree_node {
822823
public:
823824
explicit PT_query_expression_body(const POS &pos) : Parse_tree_node(pos) {}
824-
825+
enum Setop_type { NONE, UNION, INTERSECT, EXCEPT };
826+
virtual Setop_type type() const { return NONE; }
825827
virtual bool is_set_operation() const = 0;
826-
827828
/**
828829
True if this query expression can absorb an extraneous order by/limit
829830
clause. The `ORDER BY`/`LIMIT` syntax is mostly consistestent, i.e. a
@@ -1825,37 +1826,43 @@ class PT_set_operation : public PT_query_expression_body {
18251826
bool is_distinct, PT_query_expression_body *rhs,
18261827
bool is_rhs_in_parentheses = false)
18271828
: super(pos),
1828-
m_lhs(lhs),
18291829
m_is_distinct(is_distinct),
1830-
m_rhs(rhs),
1831-
m_is_rhs_in_parentheses{is_rhs_in_parentheses} {}
1830+
m_is_rhs_in_parentheses{is_rhs_in_parentheses} {
1831+
m_list.push_back(lhs);
1832+
m_list.push_back(rhs);
1833+
}
18321834

18331835
void merge_descendants(Parse_context *pc, Query_term_set_op *setop,
18341836
QueryLevel &ql);
18351837
bool is_set_operation() const override { return true; }
18361838

18371839
bool has_into_clause() const override {
1838-
return m_lhs->has_into_clause() || m_rhs->has_into_clause();
1840+
return std::any_of(m_list.cbegin(), m_list.cend(),
1841+
[](const PT_query_expression_body &body) {
1842+
return body.has_into_clause();
1843+
});
18391844
}
18401845
bool has_trailing_into_clause() const override {
1841-
return !m_is_rhs_in_parentheses && m_rhs->has_trailing_into_clause();
1846+
return !m_is_rhs_in_parentheses &&
1847+
m_list[m_list.elements - 1]->has_trailing_into_clause();
18421848
}
18431849

18441850
bool can_absorb_order_and_limit(bool, bool) const override { return false; }
18451851

18461852
bool is_table_value_constructor() const override { return false; }
18471853
PT_insert_values_list *get_row_value_list() const override { return nullptr; }
1854+
bool is_distinct() const { return m_is_distinct; }
1855+
1856+
List<PT_query_expression_body> m_list;
1857+
void set_is_rhs_in_parentheses(bool v) { m_is_rhs_in_parentheses = v; }
18481858

18491859
protected:
18501860
bool contextualize_setop(Parse_context *pc, Query_term_type setop_type,
18511861
Surrounding_context context);
18521862
void merge_children(Query_term_set_op *setop, Query_term_set_op *lower);
1853-
PT_query_expression_body *m_lhs;
18541863
bool m_is_distinct;
1855-
PT_query_expression_body *m_rhs;
18561864
PT_into_destination *m_into{nullptr};
1857-
const bool m_is_rhs_in_parentheses;
1858-
1865+
bool m_is_rhs_in_parentheses;
18591866
void add_json_info(Json_object *obj) override {
18601867
obj->add_alias("distinct", create_dom_ptr<Json_boolean>(m_is_distinct));
18611868
obj->add_alias("rhs_in_parentheses",
@@ -1869,6 +1876,7 @@ class PT_union : public PT_set_operation {
18691876
public:
18701877
using PT_set_operation::PT_set_operation;
18711878
bool do_contextualize(Parse_context *pc) override;
1879+
enum Setop_type type() const override { return UNION; }
18721880
};
18731881

18741882
class PT_except : public PT_set_operation {
@@ -1877,6 +1885,7 @@ class PT_except : public PT_set_operation {
18771885
public:
18781886
using PT_set_operation::PT_set_operation;
18791887
bool do_contextualize(Parse_context *pc) override;
1888+
enum Setop_type type() const override { return EXCEPT; }
18801889
};
18811890

18821891
class PT_intersect : public PT_set_operation {
@@ -1885,6 +1894,7 @@ class PT_intersect : public PT_set_operation {
18851894
public:
18861895
using PT_set_operation::PT_set_operation;
18871896
bool do_contextualize(Parse_context *pc) override;
1897+
enum Setop_type type() const override { return INTERSECT; }
18881898
};
18891899

18901900
class PT_select_stmt : public Parse_tree_root {
@@ -5595,4 +5605,69 @@ PT_column_attr_base *make_column_secondary_engine_attribute(MEM_ROOT *,
55955605
PT_base_index_option *make_index_engine_attribute(MEM_ROOT *, LEX_CSTRING);
55965606
PT_base_index_option *make_index_secondary_engine_attribute(MEM_ROOT *,
55975607
LEX_CSTRING);
5608+
5609+
/**
5610+
Helper function to imitate \c dynamic_cast for \c PT_set_operation hierarchy.
5611+
5612+
Template parameter @p To is the destination type (@c PT_union, \c PT_except or
5613+
\c PT_intersect). For \c PT_intersect we return nullptr if ALL due to impl.
5614+
restriction: we cannot merge INTERSECT ALL.
5615+
5616+
@param from source item
5617+
@param is_distinct true if distinct
5618+
@return typecast item to the type To or NULL
5619+
*/
5620+
template <class To, PT_set_operation::Setop_type Tag>
5621+
To *setop_cast(PT_query_expression_body *from, bool is_distinct) {
5622+
return (from->type() == Tag &&
5623+
down_cast<PT_set_operation *>(from)->is_distinct() == is_distinct &&
5624+
(Tag != PT_query_expression_body::INTERSECT || is_distinct))
5625+
? static_cast<To *>(from)
5626+
: nullptr;
5627+
}
5628+
5629+
/**
5630+
Flatten set operators at parse time
5631+
5632+
This function flattens UNION ALL/DISTINCT, EXCEPT All/DISTINCT
5633+
and INTERSECT DISTINCT (not ALL due to implementation restrictions) operators
5634+
at parse time if applicable, otherwise it creates
5635+
new \c PT_<setop> nodes respectively of the two input operands.
5636+
5637+
Template parameter @p Class is @c PT_union or @c PT_intersect
5638+
Template parameter @p Tag is @c PT_query_specification::UNION or
5639+
@c ::INTERSECT
5640+
5641+
@param mem_root MEM_ROOT
5642+
@param pos parse location
5643+
@param left left argument of the operator
5644+
@param is_distinct true if DISTINCT
5645+
@param right right argument of the operator
5646+
@param is_right_in_parentheses
5647+
true if right hand size is parenthesized
5648+
@return resulting parse tree Item
5649+
*/
5650+
template <class Class, PT_set_operation::Setop_type Tag>
5651+
PT_set_operation *flatten_equal_set_ops(MEM_ROOT *mem_root, const POS &pos,
5652+
PT_query_expression_body *left,
5653+
bool is_distinct,
5654+
PT_query_expression_body *right,
5655+
bool is_right_in_parentheses) {
5656+
if (left == nullptr || right == nullptr) return nullptr;
5657+
Class *left_setop = setop_cast<Class, Tag>(left, is_distinct);
5658+
Class *right_setop [[maybe_unused]] =
5659+
setop_cast<Class, Tag>(right, is_distinct);
5660+
assert(right_setop == nullptr); // doesn't happen
5661+
if (left_setop != nullptr) {
5662+
// X1 op X2 op Y ==> op (X1, X2, Y)
5663+
left_setop->m_list.push_back(right);
5664+
left_setop->set_is_rhs_in_parentheses(is_right_in_parentheses);
5665+
return left_setop;
5666+
} else {
5667+
/* X op Y */
5668+
return new (mem_root)
5669+
Class(pos, left, is_distinct, right, is_right_in_parentheses);
5670+
}
5671+
}
5672+
55985673
#endif /* PARSE_TREE_NODES_INCLUDED */

sql/sql_yacc.yy

+12-3
Original file line numberDiff line numberDiff line change
@@ -9803,17 +9803,26 @@ query_expression_body:
98039803
}
98049804
| query_expression_body UNION_SYM union_option query_expression_body
98059805
{
9806-
$$ = {NEW_PTN PT_union(@$, $1.body, $3, $4.body, $4.is_parenthesized),
9806+
$$ = {flatten_equal_set_ops<PT_union,
9807+
PT_set_operation::UNION>(
9808+
YYMEM_ROOT, @$, $1.body, $3, $4.body,
9809+
$4.is_parenthesized),
98079810
false};
98089811
}
98099812
| query_expression_body EXCEPT_SYM union_option query_expression_body
98109813
{
9811-
$$ = {NEW_PTN PT_except(@$, $1.body, $3, $4.body, $4.is_parenthesized),
9814+
$$ = {flatten_equal_set_ops<PT_except,
9815+
PT_set_operation::EXCEPT>(
9816+
YYMEM_ROOT, @$, $1.body, $3, $4.body,
9817+
$4.is_parenthesized),
98129818
false};
98139819
}
98149820
| query_expression_body INTERSECT_SYM union_option query_expression_body
98159821
{
9816-
$$ = {NEW_PTN PT_intersect(@$, $1.body, $3, $4.body, $4.is_parenthesized),
9822+
$$ = {flatten_equal_set_ops<PT_intersect,
9823+
PT_set_operation::INTERSECT>(
9824+
YYMEM_ROOT, @$, $1.body, $3, $4.body,
9825+
$4.is_parenthesized),
98179826
false};
98189827
}
98199828
;

0 commit comments

Comments
 (0)