Skip to content

Commit cb760fd

Browse files
author
Steinar H. Gunderson
committed
Bug #32169846: HYPERGRAPH: ASSERTION `M_OPENED_TABLE != NULLPTR' FAILED.
If we are sorting by row ID, make sure not to use streaming for any derived tables, as it cannot deliver old rows (by definition). Change-Id: Ib4be21201e8eadaa45444b5a926fd684b72d7cac
1 parent 7592fec commit cb760fd

File tree

7 files changed

+111
-36
lines changed

7 files changed

+111
-36
lines changed

mysql-test/r/derived.result

+14
Original file line numberDiff line numberDiff line change
@@ -4595,3 +4595,17 @@ Warnings:
45954595
Warning 1292 Incorrect datetime value: '1'
45964596
SET sql_mode=DEFAULT;
45974597
DROP TABLE t1;
4598+
#
4599+
# Bug #32169846: HYPERGRAPH: ASSERTION `M_OPENED_TABLE != NULLPTR' FAILED.
4600+
#
4601+
CREATE TABLE t1 (a LONGTEXT);
4602+
INSERT INTO t1 VALUES ('');
4603+
CREATE TABLE t2 (b INTEGER);
4604+
INSERT INTO t2 VALUES (0);
4605+
SELECT 1 FROM t2, (
4606+
SELECT a, rand() FROM t1 GROUP BY a
4607+
) d1
4608+
GROUP BY b;
4609+
1
4610+
1
4611+
DROP TABLE t1, t2;

mysql-test/t/derived.test

+21
Original file line numberDiff line numberDiff line change
@@ -3280,3 +3280,24 @@ SELECT (
32803280
) FROM t1;
32813281
SET sql_mode=DEFAULT;
32823282
DROP TABLE t1;
3283+
3284+
--echo #
3285+
--echo # Bug #32169846: HYPERGRAPH: ASSERTION `M_OPENED_TABLE != NULLPTR' FAILED.
3286+
--echo #
3287+
3288+
CREATE TABLE t1 (a LONGTEXT);
3289+
INSERT INTO t1 VALUES ('');
3290+
CREATE TABLE t2 (b INTEGER);
3291+
INSERT INTO t2 VALUES (0);
3292+
3293+
# The sort for GROUP BY needs to be on row ID due to the long blob in t1.
3294+
# This is incompatible with streaming of d1, so we want to check that it
3295+
# uses materialization instead. The rand() is there to force rematerialization
3296+
# every time (otherwise, streaming is not considered). The inner GROUP BY
3297+
# is there to preclude merging.
3298+
SELECT 1 FROM t2, (
3299+
SELECT a, rand() FROM t1 GROUP BY a
3300+
) d1
3301+
GROUP BY b;
3302+
3303+
DROP TABLE t1, t2;

sql/filesort.cc

+36-25
Original file line numberDiff line numberDiff line change
@@ -2096,6 +2096,37 @@ uint sortlength(THD *thd, st_sort_field *sortorder, uint s_length) {
20962096
return total_length;
20972097
}
20982098

2099+
bool SortWillBeOnRowId(TABLE *table) {
2100+
for (Field **pfield = table->field; *pfield != nullptr; ++pfield) {
2101+
Field *field = *pfield;
2102+
if (!bitmap_is_set(table->read_set, field->field_index())) continue;
2103+
2104+
// Having large blobs in addon fields could be very inefficient,
2105+
// but small blobs are OK (where “small” is a bit fuzzy, and relative
2106+
// to the size of the sort buffer). There are two types of small blobs:
2107+
//
2108+
// - Those explicitly bounded to small lengths, namely tinyblob
2109+
// (255 bytes) and blob (65535 bytes).
2110+
// - Those that are _typically_ fairly small, which includes JSON and
2111+
// geometries. We don't actually declare anywhere that they are
2112+
// implemented using blobs under the hood, so it's not unreasonable to
2113+
// demand that the user have large enough sort buffers for a few rows.
2114+
// (If a user has multi-megabyte JSON rows and wishes to sort them,
2115+
// they would usually have a fair bit of RAM anyway, since they'd need
2116+
// that to hold the result set and process it in a reasonable fashion.)
2117+
//
2118+
// That leaves only mediumblob and longblob. If a user declares a field as
2119+
// one of those, it's reasonable for them to expect that sorting doesn't
2120+
// need to pull many of them up in memory, so we should stick to sorting
2121+
// row IDs.
2122+
if (field->type() == MYSQL_TYPE_BLOB &&
2123+
field->max_packed_col_length() > 70000u) {
2124+
return true;
2125+
}
2126+
}
2127+
return false;
2128+
}
2129+
20992130
/**
21002131
Get descriptors of fields appended to sorted fields and
21012132
calculate their total length.
@@ -2146,35 +2177,15 @@ Addon_fields *Filesort::get_addon_fields(
21462177
if (table->is_nullable()) {
21472178
null_fields++;
21482179
}
2180+
if (SortWillBeOnRowId(table)) {
2181+
DBUG_ASSERT(m_sort_param.addon_fields == nullptr);
2182+
*addon_fields_status = Addon_fields_status::row_contains_blob;
2183+
return nullptr;
2184+
}
21492185
for (Field **pfield = table->field; *pfield != nullptr; ++pfield) {
21502186
Field *field = *pfield;
21512187
if (!bitmap_is_set(table->read_set, field->field_index())) continue;
21522188

2153-
// Having large blobs in addon fields could be very inefficient,
2154-
// but small blobs are OK (where “small” is a bit fuzzy, and relative
2155-
// to the size of the sort buffer). There are two types of small blobs:
2156-
//
2157-
// - Those explicitly bounded to small lengths, namely tinyblob
2158-
// (255 bytes) and blob (65535 bytes).
2159-
// - Those that are _typically_ fairly small, which includes JSON and
2160-
// geometries. We don't actually declare anywhere that they are
2161-
// implemented using blobs under the hood, so it's not unreasonable to
2162-
// demand that the user have large enough sort buffers for a few rows.
2163-
// (If a user has multi-megabyte JSON rows and wishes to sort them,
2164-
// they would usually have a fair bit of RAM anyway, since they'd need
2165-
// that to hold the result set and process it in a reasonable fashion.)
2166-
//
2167-
// That leaves only mediumblob and longblob. If a user declares a field as
2168-
// one of those, it's reasonable for them to expect that sorting doesn't
2169-
// need to pull many of them up in memory, so we should stick to sorting
2170-
// row IDs.
2171-
if (field->type() == MYSQL_TYPE_BLOB &&
2172-
field->max_packed_col_length() > 70000u) {
2173-
DBUG_ASSERT(m_sort_param.addon_fields == nullptr);
2174-
*addon_fields_status = Addon_fields_status::row_contains_blob;
2175-
return nullptr;
2176-
}
2177-
21782189
const uint field_length = field->max_packed_col_length();
21792190
AddWithSaturate(field_length, &total_length);
21802191

sql/filesort.h

+4
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,10 @@ template <bool Is_big_endian>
118118
void copy_integer(uchar *to, size_t to_length, const uchar *from,
119119
size_t from_length, bool is_unsigned);
120120

121+
// Returns whether a sort involving this table would necessarily be on row ID,
122+
// even if not forced by other means.
123+
bool SortWillBeOnRowId(TABLE *table);
124+
121125
static inline void copy_native_longlong(uchar *to, size_t to_length,
122126
longlong val, bool is_unsigned) {
123127
#ifdef WORDS_BIGENDIAN

sql/join_optimizer/join_optimizer.cc

+28-5
Original file line numberDiff line numberDiff line change
@@ -112,12 +112,13 @@ constexpr double kMaterializeOneRowCost = 0.1;
112112
class CostingReceiver {
113113
public:
114114
CostingReceiver(
115-
THD *thd, const JoinHypergraph &graph,
115+
THD *thd, const JoinHypergraph &graph, bool need_rowid,
116116
uint64_t supported_access_path_types,
117117
secondary_engine_modify_access_path_cost_t secondary_engine_cost_hook,
118118
string *trace)
119119
: m_thd(thd),
120120
m_graph(graph),
121+
m_need_rowid(need_rowid),
121122
m_supported_access_path_types(supported_access_path_types),
122123
m_secondary_engine_cost_hook(secondary_engine_cost_hook),
123124
m_trace(trace) {
@@ -162,6 +163,14 @@ class CostingReceiver {
162163
/// The graph we are running over.
163164
const JoinHypergraph &m_graph;
164165

166+
/// Whether we will be needing row IDs from our tables, typically for
167+
/// a later sort. If this happens, derived tables cannot use streaming,
168+
/// but need an actual materialization, since filesort expects to be
169+
/// able to go back and ask for a given row. (This is different from
170+
/// when we need row IDs for weedout, which doesn't preclude streaming.
171+
/// The hypergraph optimizer does not use weedout.)
172+
bool m_need_rowid;
173+
165174
/// The supported access path types. Access paths of types not in
166175
/// this set should not be created. It is currently only used to
167176
/// limit which join types to use, so any bit that does not
@@ -588,9 +597,9 @@ bool CostingReceiver::ProposeTableScan(TABLE *table, int node_idx) {
588597
// Handled in clear_corr_something_something, not here
589598
rematerialize = false;
590599
}
591-
materialize_path =
592-
GetAccessPathForDerivedTable(m_thd, tl, table, rematerialize,
593-
/*invalidators=*/nullptr, path);
600+
materialize_path = GetAccessPathForDerivedTable(
601+
m_thd, tl, table, rematerialize,
602+
/*invalidators=*/nullptr, m_need_rowid, path);
594603
}
595604

596605
// TODO(sgunders): Take rematerialization cost into account,
@@ -1588,6 +1597,19 @@ AccessPath *FindBestQueryPlan(THD *thd, SELECT_LEX *select_lex, string *trace) {
15881597
}
15891598
}
15901599

1600+
// Figure out if any later sort will need row IDs.
1601+
bool need_rowid = false;
1602+
if (select_lex->is_explicitly_grouped() || select_lex->is_ordered() ||
1603+
join->select_distinct) {
1604+
for (TABLE_LIST *tl = select_lex->leaf_tables; tl != nullptr;
1605+
tl = tl->next_leaf) {
1606+
if (SortWillBeOnRowId(tl->table)) {
1607+
need_rowid = true;
1608+
break;
1609+
}
1610+
}
1611+
}
1612+
15911613
// Run the actual join optimizer algorithm. This creates an access path
15921614
// for the join as a whole (with lowest possible cost, and thus also
15931615
// hopefully optimal execution time), with all pushable predicates applied.
@@ -1599,7 +1621,8 @@ AccessPath *FindBestQueryPlan(THD *thd, SELECT_LEX *select_lex, string *trace) {
15991621
}
16001622
const secondary_engine_modify_access_path_cost_t secondary_engine_cost_hook =
16011623
SecondaryEngineCostHook(thd);
1602-
CostingReceiver receiver(thd, graph, SupportedAccessPathTypes(thd),
1624+
CostingReceiver receiver(thd, graph, need_rowid,
1625+
SupportedAccessPathTypes(thd),
16031626
secondary_engine_cost_hook, trace);
16041627
if (EnumerateAllConnectedPartitions(graph.graph, &receiver) &&
16051628
!thd->is_error()) {

sql/sql_executor.cc

+6-5
Original file line numberDiff line numberDiff line change
@@ -1450,14 +1450,15 @@ static bool IsTableScan(AccessPath *path) {
14501450

14511451
AccessPath *GetAccessPathForDerivedTable(THD *thd, QEP_TAB *qep_tab,
14521452
AccessPath *table_path) {
1453-
return GetAccessPathForDerivedTable(thd, qep_tab->table_ref, qep_tab->table(),
1454-
qep_tab->rematerialize,
1455-
qep_tab->invalidators, table_path);
1453+
return GetAccessPathForDerivedTable(
1454+
thd, qep_tab->table_ref, qep_tab->table(), qep_tab->rematerialize,
1455+
qep_tab->invalidators, /*need_rowid=*/false, table_path);
14561456
}
14571457

14581458
AccessPath *GetAccessPathForDerivedTable(
14591459
THD *thd, TABLE_LIST *table_ref, TABLE *table, bool rematerialize,
1460-
Mem_root_array<const AccessPath *> *invalidators, AccessPath *table_path) {
1460+
Mem_root_array<const AccessPath *> *invalidators, bool need_rowid,
1461+
AccessPath *table_path) {
14611462
SELECT_LEX_UNIT *unit = table_ref->derived_unit();
14621463
JOIN *subjoin = nullptr;
14631464
Temp_table_param *tmp_table_param;
@@ -1509,7 +1510,7 @@ AccessPath *GetAccessPathForDerivedTable(
15091510
/*send_records_override=*/nullptr);
15101511
}
15111512
} else if (table_ref->common_table_expr() == nullptr && rematerialize &&
1512-
IsTableScan(table_path)) {
1513+
IsTableScan(table_path) && !need_rowid) {
15131514
// We don't actually need the materialization for anything (we would
15141515
// just be reading the rows straight out from the table, never to be used
15151516
// again), so we can just stream records directly over to the next

sql/sql_executor.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,8 @@ AccessPath *GetAccessPathForDerivedTable(THD *thd, QEP_TAB *qep_tab,
566566
AccessPath *table_path);
567567
AccessPath *GetAccessPathForDerivedTable(
568568
THD *thd, TABLE_LIST *table_ref, TABLE *table, bool rematerialize,
569-
Mem_root_array<const AccessPath *> *invalidators, AccessPath *table_path);
569+
Mem_root_array<const AccessPath *> *invalidators, bool need_rowid,
570+
AccessPath *table_path);
570571

571572
void ConvertItemsToCopy(const mem_root_deque<Item *> &items, Field **fields,
572573
Temp_table_param *param);

0 commit comments

Comments
 (0)