Skip to content

Commit 3f61e7c

Browse files
author
Dag Wanvik
committed
Bug#36652610 UNIONs are O(N^2) [2/2]
In this patch, we flatten equal set operations at the parsing level, before contextualization, to avoid deep ASTs if possible[1]. Contextualization of equal set operations (with the exception of INTERSECT ALL which due to its implementation method can't have more than one right side operand), now happens on a shallow AST: a large number of e.g. UNION have a parse tree looking like PT_union / \ .....\ / \ \ query-1 query-2 query-N leading directly to Query_term_set_op structure Query_term_union | ----------- | | ... | 1 2 N [1] It works on left deep structures, e.g. (a UNION b) union c # type a or a UNION b UNION c # type b but not on a UNION (b UNION c) # type c The latter type c will still be represented as a deep AST and eat stack during contextualization, but be flattened eventually in merge_descendants. Luckily, the left deep form of type b is the common one for very long set operations. We have manually tried 100000 operands as in the test report successfully without running out of space. Change-Id: Ia06efbc35cbceb01c63510428d1f9b4089bb861c
1 parent 5ccd6d4 commit 3f61e7c

File tree

5 files changed

+174
-20
lines changed

5 files changed

+174
-20
lines changed

mysql-test/r/parser.result

+36
Original file line numberDiff line numberDiff line change
@@ -2627,6 +2627,14 @@ SELECT 1 UNION SELECT 1 FROM DUAL INTO @var;
26272627
(SELECT 1 UNION SELECT 1 FROM DUAL INTO @var);
26282628
SELECT 1 UNION SELECT 1 FROM DUAL FOR UPDATE INTO @var;
26292629
(SELECT 1 UNION SELECT 1 FROM DUAL FOR UPDATE INTO @var);
2630+
# Check that this also works with flatten_equal_set_ops
2631+
# (minimum three operands to check this)
2632+
SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var;
2633+
(SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var);
2634+
SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL INTO @var;
2635+
(SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL INTO @var);
2636+
SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL FOR UPDATE INTO @var;
2637+
(SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL FOR UPDATE INTO @var);
26302638
#
26312639
# Deprecation warning expected:
26322640
#
@@ -2648,6 +2656,34 @@ Warning 3962 The INTO clause is deprecated inside query blocks of query expressi
26482656
(SELECT 1 UNION SELECT 1 INTO @var FOR UPDATE);
26492657
Warnings:
26502658
Warning 3962 The INTO clause is deprecated inside query blocks of query expressions and will be removed in a future release. Please move the INTO clause to the end of statement instead.
2659+
# Check that warning also works with flatten_equal_set_ops
2660+
# (minimum three operands to check this)
2661+
SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var FROM DUAL;
2662+
Warnings:
2663+
Warning 3962 The INTO clause is deprecated inside query blocks of query expressions and will be removed in a future release. Please move the INTO clause to the end of statement instead.
2664+
SELECT 1 UNION SELECT 1 UNION (SELECT 1 INTO @var FROM DUAL);
2665+
Warnings:
2666+
Warning 3962 The INTO clause is deprecated inside query blocks of query expressions and will be removed in a future release. Please move the INTO clause to the end of statement instead.
2667+
SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL INTO @var FOR UPDATE;
2668+
Warnings:
2669+
Warning 3962 The INTO clause is deprecated inside query blocks of query expressions and will be removed in a future release. Please move the INTO clause to the end of statement instead.
2670+
(SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL INTO @var FOR UPDATE);
2671+
Warnings:
2672+
Warning 3962 The INTO clause is deprecated inside query blocks of query expressions and will be removed in a future release. Please move the INTO clause to the end of statement instead.
2673+
SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var FOR UPDATE;
2674+
Warnings:
2675+
Warning 3962 The INTO clause is deprecated inside query blocks of query expressions and will be removed in a future release. Please move the INTO clause to the end of statement instead.
2676+
(SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var FOR UPDATE);
2677+
Warnings:
2678+
Warning 3962 The INTO clause is deprecated inside query blocks of query expressions and will be removed in a future release. Please move the INTO clause to the end of statement instead.
2679+
# Check that PT_set_operation::has_into_clause works correctly
2680+
# for more than two operands after flatten_equal_set_ops
2681+
SELECT 1 UNION (SELECT 1 INTO @var FROM DUAL) UNION SELECT 1;
2682+
ERROR HY000: Misplaced INTO clause, INTO is not allowed inside subqueries, and must be placed at end of UNION clauses.
2683+
SELECT 1 UNION SELECT 1 UNION SELECT * FROM (SELECT 2 UNION SELECT 1 INTO @var FROM DUAL) t;
2684+
ERROR HY000: Misplaced INTO clause, INTO is not allowed inside subqueries, and must be placed at end of UNION clauses.
2685+
SELECT 1 UNION SELECT 1 UNION SELECT * FROM (SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var FROM DUAL) t;
2686+
ERROR HY000: Misplaced INTO clause, INTO is not allowed inside subqueries, and must be placed at end of UNION clauses.
26512687
#
26522688
# Syntax error expected:
26532689
#

mysql-test/t/parser.test

+31
Original file line numberDiff line numberDiff line change
@@ -2520,6 +2520,17 @@ SELECT 1 UNION SELECT 1 FROM DUAL INTO @var;
25202520
SELECT 1 UNION SELECT 1 FROM DUAL FOR UPDATE INTO @var;
25212521
(SELECT 1 UNION SELECT 1 FROM DUAL FOR UPDATE INTO @var);
25222522

2523+
--echo # Check that this also works with flatten_equal_set_ops
2524+
--echo # (minimum three operands to check this)
2525+
SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var;
2526+
(SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var);
2527+
2528+
SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL INTO @var;
2529+
(SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL INTO @var);
2530+
2531+
SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL FOR UPDATE INTO @var;
2532+
(SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL FOR UPDATE INTO @var);
2533+
25232534
--echo #
25242535
--echo # Deprecation warning expected:
25252536
--echo #
@@ -2533,6 +2544,26 @@ SELECT 1 UNION SELECT 1 FROM DUAL INTO @var FOR UPDATE;
25332544
SELECT 1 UNION SELECT 1 INTO @var FOR UPDATE;
25342545
(SELECT 1 UNION SELECT 1 INTO @var FOR UPDATE);
25352546

2547+
--echo # Check that warning also works with flatten_equal_set_ops
2548+
--echo # (minimum three operands to check this)
2549+
SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var FROM DUAL;
2550+
SELECT 1 UNION SELECT 1 UNION (SELECT 1 INTO @var FROM DUAL);
2551+
2552+
SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL INTO @var FOR UPDATE;
2553+
(SELECT 1 UNION SELECT 1 UNION SELECT 1 FROM DUAL INTO @var FOR UPDATE);
2554+
2555+
SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var FOR UPDATE;
2556+
(SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var FOR UPDATE);
2557+
2558+
--echo # Check that PT_set_operation::has_into_clause works correctly
2559+
--echo # for more than two operands after flatten_equal_set_ops
2560+
--error ER_MISPLACED_INTO
2561+
SELECT 1 UNION (SELECT 1 INTO @var FROM DUAL) UNION SELECT 1;
2562+
--error ER_MISPLACED_INTO
2563+
SELECT 1 UNION SELECT 1 UNION SELECT * FROM (SELECT 2 UNION SELECT 1 INTO @var FROM DUAL) t;
2564+
--error ER_MISPLACED_INTO
2565+
SELECT 1 UNION SELECT 1 UNION SELECT * FROM (SELECT 1 UNION SELECT 1 UNION SELECT 1 INTO @var FROM DUAL) t;
2566+
25362567
--echo #
25372568
--echo # Syntax error expected:
25382569
--echo #

sql/parse_tree_nodes.cc

+9-6
Original file line numberDiff line numberDiff line change
@@ -1766,13 +1766,16 @@ bool PT_set_operation::contextualize_setop(Parse_context *pc,
17661766
pc->m_stack.push_back(QueryLevel(pc->mem_root, context));
17671767
if (super::do_contextualize(pc)) return true;
17681768

1769-
if (m_lhs->contextualize(pc)) return true;
1769+
if (m_list[0]->contextualize(pc)) return true;
17701770

1771-
pc->select = pc->thd->lex->new_set_operation_query(pc->select);
1772-
1773-
if (pc->select == nullptr || m_rhs->contextualize(pc)) return true;
1774-
1775-
pc->thd->lex->pop_context();
1771+
List_iterator<PT_query_expression_body> it(m_list);
1772+
PT_query_expression_body *elt;
1773+
it++; // skip first
1774+
while ((elt = it++)) {
1775+
pc->select = pc->thd->lex->new_set_operation_query(pc->select);
1776+
if (pc->select == nullptr || elt->contextualize(pc)) return true;
1777+
pc->thd->lex->pop_context();
1778+
}
17761779

17771780
QueryLevel ql = pc->m_stack.back();
17781781
pc->m_stack.pop_back();

sql/parse_tree_nodes.h

+86-11
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "my_base.h"
3838

3939
#include "my_inttypes.h" // TODO: replace with cstdint
40+
#include "my_list.h"
4041
#include "my_sqlcommand.h"
4142
#include "my_sys.h"
4243
#include "my_thread_local.h"
@@ -821,9 +822,9 @@ class PT_locking_clause_list : public Parse_tree_node {
821822
class PT_query_expression_body : public Parse_tree_node {
822823
public:
823824
explicit PT_query_expression_body(const POS &pos) : Parse_tree_node(pos) {}
824-
825+
enum Setop_type { NONE, UNION, INTERSECT, EXCEPT };
826+
virtual Setop_type type() const { return NONE; }
825827
virtual bool is_set_operation() const = 0;
826-
827828
/**
828829
True if this query expression can absorb an extraneous order by/limit
829830
clause. The `ORDER BY`/`LIMIT` syntax is mostly consistestent, i.e. a
@@ -1798,37 +1799,43 @@ class PT_set_operation : public PT_query_expression_body {
17981799
bool is_distinct, PT_query_expression_body *rhs,
17991800
bool is_rhs_in_parentheses = false)
18001801
: super(pos),
1801-
m_lhs(lhs),
18021802
m_is_distinct(is_distinct),
1803-
m_rhs(rhs),
1804-
m_is_rhs_in_parentheses{is_rhs_in_parentheses} {}
1803+
m_is_rhs_in_parentheses{is_rhs_in_parentheses} {
1804+
m_list.push_back(lhs);
1805+
m_list.push_back(rhs);
1806+
}
18051807

18061808
void merge_descendants(Parse_context *pc, Query_term_set_op *setop,
18071809
QueryLevel &ql);
18081810
bool is_set_operation() const override { return true; }
18091811

18101812
bool has_into_clause() const override {
1811-
return m_lhs->has_into_clause() || m_rhs->has_into_clause();
1813+
return std::any_of(m_list.cbegin(), m_list.cend(),
1814+
[](const PT_query_expression_body &body) {
1815+
return body.has_into_clause();
1816+
});
18121817
}
18131818
bool has_trailing_into_clause() const override {
1814-
return !m_is_rhs_in_parentheses && m_rhs->has_trailing_into_clause();
1819+
return !m_is_rhs_in_parentheses &&
1820+
m_list[m_list.elements - 1]->has_trailing_into_clause();
18151821
}
18161822

18171823
bool can_absorb_order_and_limit(bool, bool) const override { return false; }
18181824

18191825
bool is_table_value_constructor() const override { return false; }
18201826
PT_insert_values_list *get_row_value_list() const override { return nullptr; }
1827+
bool is_distinct() const { return m_is_distinct; }
1828+
1829+
List<PT_query_expression_body> m_list;
1830+
void set_is_rhs_in_parentheses(bool v) { m_is_rhs_in_parentheses = v; }
18211831

18221832
protected:
18231833
bool contextualize_setop(Parse_context *pc, Query_term_type setop_type,
18241834
Surrounding_context context);
18251835
void merge_children(Query_term_set_op *setop, Query_term_set_op *lower);
1826-
PT_query_expression_body *m_lhs;
18271836
bool m_is_distinct;
1828-
PT_query_expression_body *m_rhs;
18291837
PT_into_destination *m_into{nullptr};
1830-
const bool m_is_rhs_in_parentheses;
1831-
1838+
bool m_is_rhs_in_parentheses;
18321839
void add_json_info(Json_object *obj) override {
18331840
obj->add_alias("distinct", create_dom_ptr<Json_boolean>(m_is_distinct));
18341841
obj->add_alias("rhs_in_parentheses",
@@ -1842,6 +1849,7 @@ class PT_union : public PT_set_operation {
18421849
public:
18431850
using PT_set_operation::PT_set_operation;
18441851
bool do_contextualize(Parse_context *pc) override;
1852+
enum Setop_type type() const override { return UNION; }
18451853
};
18461854

18471855
class PT_except : public PT_set_operation {
@@ -1850,6 +1858,7 @@ class PT_except : public PT_set_operation {
18501858
public:
18511859
using PT_set_operation::PT_set_operation;
18521860
bool do_contextualize(Parse_context *pc) override;
1861+
enum Setop_type type() const override { return EXCEPT; }
18531862
};
18541863

18551864
class PT_intersect : public PT_set_operation {
@@ -1858,6 +1867,7 @@ class PT_intersect : public PT_set_operation {
18581867
public:
18591868
using PT_set_operation::PT_set_operation;
18601869
bool do_contextualize(Parse_context *pc) override;
1870+
enum Setop_type type() const override { return INTERSECT; }
18611871
};
18621872

18631873
class PT_select_stmt : public Parse_tree_root {
@@ -5568,4 +5578,69 @@ PT_column_attr_base *make_column_secondary_engine_attribute(MEM_ROOT *,
55685578
PT_base_index_option *make_index_engine_attribute(MEM_ROOT *, LEX_CSTRING);
55695579
PT_base_index_option *make_index_secondary_engine_attribute(MEM_ROOT *,
55705580
LEX_CSTRING);
5581+
5582+
/**
5583+
Helper function to imitate \c dynamic_cast for \c PT_set_operation hierarchy.
5584+
5585+
Template parameter @p To is the destination type (@c PT_union, \c PT_except or
5586+
\c PT_intersect). For \c PT_intersect we return nullptr if ALL due to impl.
5587+
restriction: we cannot merge INTERSECT ALL.
5588+
5589+
@param from source item
5590+
@param is_distinct true if distinct
5591+
@return typecast item to the type To or NULL
5592+
*/
5593+
template <class To, PT_set_operation::Setop_type Tag>
5594+
To *setop_cast(PT_query_expression_body *from, bool is_distinct) {
5595+
return (from->type() == Tag &&
5596+
down_cast<PT_set_operation *>(from)->is_distinct() == is_distinct &&
5597+
(Tag != PT_query_expression_body::INTERSECT || is_distinct))
5598+
? static_cast<To *>(from)
5599+
: nullptr;
5600+
}
5601+
5602+
/**
5603+
Flatten set operators at parse time
5604+
5605+
This function flattens UNION ALL/DISTINCT, EXCEPT All/DISTINCT
5606+
and INTERSECT DISTINCT (not ALL due to implementation restrictions) operators
5607+
at parse time if applicable, otherwise it creates
5608+
new \c PT_<setop> nodes respectively of the two input operands.
5609+
5610+
Template parameter @p Class is @c PT_union or @c PT_intersect
5611+
Template parameter @p Tag is @c PT_query_specification::UNION or
5612+
@c ::INTERSECT
5613+
5614+
@param mem_root MEM_ROOT
5615+
@param pos parse location
5616+
@param left left argument of the operator
5617+
@param is_distinct true if DISTINCT
5618+
@param right right argument of the operator
5619+
@param is_right_in_parentheses
5620+
true if right hand size is parenthesized
5621+
@return resulting parse tree Item
5622+
*/
5623+
template <class Class, PT_set_operation::Setop_type Tag>
5624+
PT_set_operation *flatten_equal_set_ops(MEM_ROOT *mem_root, const POS &pos,
5625+
PT_query_expression_body *left,
5626+
bool is_distinct,
5627+
PT_query_expression_body *right,
5628+
bool is_right_in_parentheses) {
5629+
if (left == nullptr || right == nullptr) return nullptr;
5630+
Class *left_setop = setop_cast<Class, Tag>(left, is_distinct);
5631+
Class *right_setop [[maybe_unused]] =
5632+
setop_cast<Class, Tag>(right, is_distinct);
5633+
assert(right_setop == nullptr); // doesn't happen
5634+
if (left_setop != nullptr) {
5635+
// X1 op X2 op Y ==> op (X1, X2, Y)
5636+
left_setop->m_list.push_back(right);
5637+
left_setop->set_is_rhs_in_parentheses(is_right_in_parentheses);
5638+
return left_setop;
5639+
} else {
5640+
/* X op Y */
5641+
return new (mem_root)
5642+
Class(pos, left, is_distinct, right, is_right_in_parentheses);
5643+
}
5644+
}
5645+
55715646
#endif /* PARSE_TREE_NODES_INCLUDED */

sql/sql_yacc.yy

+12-3
Original file line numberDiff line numberDiff line change
@@ -9796,17 +9796,26 @@ query_expression_body:
97969796
}
97979797
| query_expression_body UNION_SYM union_option query_expression_body
97989798
{
9799-
$$ = {NEW_PTN PT_union(@$, $1.body, $3, $4.body, $4.is_parenthesized),
9799+
$$ = {flatten_equal_set_ops<PT_union,
9800+
PT_set_operation::UNION>(
9801+
YYMEM_ROOT, @$, $1.body, $3, $4.body,
9802+
$4.is_parenthesized),
98009803
false};
98019804
}
98029805
| query_expression_body EXCEPT_SYM union_option query_expression_body
98039806
{
9804-
$$ = {NEW_PTN PT_except(@$, $1.body, $3, $4.body, $4.is_parenthesized),
9807+
$$ = {flatten_equal_set_ops<PT_except,
9808+
PT_set_operation::EXCEPT>(
9809+
YYMEM_ROOT, @$, $1.body, $3, $4.body,
9810+
$4.is_parenthesized),
98059811
false};
98069812
}
98079813
| query_expression_body INTERSECT_SYM union_option query_expression_body
98089814
{
9809-
$$ = {NEW_PTN PT_intersect(@$, $1.body, $3, $4.body, $4.is_parenthesized),
9815+
$$ = {flatten_equal_set_ops<PT_intersect,
9816+
PT_set_operation::INTERSECT>(
9817+
YYMEM_ROOT, @$, $1.body, $3, $4.body,
9818+
$4.is_parenthesized),
98109819
false};
98119820
}
98129821
;

0 commit comments

Comments
 (0)