Skip to content

Commit d5bdabc

Browse files
author
Steinar H. Gunderson
committed
Bug #32644631: PRELIMINARY FIXES FOR WL #14419 [simplify order/distinct, noclose]
Make the logic in ApplyDistinctAndOrder() much simpler; we decide beforehand whether we need to sort by row IDs or not, so that we don't need to create the filesort objects early (which would otherwise give us that answer). This is possible largely because WL #14325 removed the main edge case of sorting by constant expressions only, which would earlier require row IDs. It is also greatly helped by the new and more generic CollectTables() introduced in the previous patch, relieving us from having to track the table list ourselves as we add temporary tables. In theory, we could be slightly more aggressive about avoiding row ID sorts; e.g., if we have a materialization at some point that removes all remaining blobs (perhaps they're only used in some expression), we don't really need row IDs for sortahead sorts, but this is marginal. So at this point, we enshrine fairly early on that all sorts are either on row IDs or they are not. Change-Id: I9ef89ad4cbdf943de2eb7b5e8eeb06262f29c80b
1 parent e9e88a8 commit d5bdabc

File tree

2 files changed

+65
-129
lines changed

2 files changed

+65
-129
lines changed

sql/filesort.cc

+17-8
Original file line numberDiff line numberDiff line change
@@ -189,14 +189,7 @@ void Sort_param::decide_addon_fields(Filesort *file_sort,
189189
for (TABLE *table : tables) {
190190
if (table->pos_in_table_list &&
191191
table->pos_in_table_list->is_fulltext_searched()) {
192-
// MATCH() (except in “boolean mode”) doesn't use the actual value,
193-
// it just goes and asks the handler directly for the current row.
194-
// Thus, we need row IDs, so that the row is positioned correctly.
195-
//
196-
// When sorting a join, table->fulltext_searched will be false,
197-
// but items (like Item_func_match) are materialized
198-
// (by StreamingIterator or MaterializeIterator) before the sort,
199-
// so this is moot.
192+
// See comment in SortWillBeOnRowId().
200193
m_addon_fields_status = Addon_fields_status::fulltext_searched;
201194
return;
202195
}
@@ -2106,6 +2099,19 @@ uint sortlength(THD *thd, st_sort_field *sortorder, uint s_length) {
21062099
}
21072100

21082101
bool SortWillBeOnRowId(TABLE *table) {
2102+
if (table->pos_in_table_list &&
2103+
table->pos_in_table_list->is_fulltext_searched()) {
2104+
// MATCH() (except in “boolean mode”) doesn't use the actual value,
2105+
// it just goes and asks the handler directly for the current row.
2106+
// Thus, we need row IDs, so that the row is positioned correctly.
2107+
//
2108+
// When sorting a join, table->fulltext_searched will be false,
2109+
// but items (like Item_func_match) are materialized
2110+
// (by StreamingIterator or MaterializeIterator) before the sort,
2111+
// so this is moot.
2112+
return true;
2113+
}
2114+
21092115
for (Field **pfield = table->field; *pfield != nullptr; ++pfield) {
21102116
Field *field = *pfield;
21112117
if (!bitmap_is_set(table->read_set, field->field_index())) continue;
@@ -2188,6 +2194,9 @@ Addon_fields *Filesort::get_addon_fields(
21882194
}
21892195
if (SortWillBeOnRowId(table)) {
21902196
assert(m_sort_param.addon_fields == nullptr);
2197+
// If the reason was FTS and not that the table contained blobs, we would
2198+
// already have made that decision earlier in decide_addon_fields(),
2199+
// so the only possible reason is due to a blob.
21912200
*addon_fields_status = Addon_fields_status::row_contains_blob;
21922201
return nullptr;
21932202
}

sql/join_optimizer/join_optimizer.cc

+48-121
Original file line numberDiff line numberDiff line change
@@ -2074,9 +2074,8 @@ bool CheckSupportedQuery(THD *thd, JOIN *join) {
20742074
that are not aggregate functions. This is a tiny performance loss,
20752075
but makes things simpler.
20762076
2077-
Note that we cannot set up an access path for this temporary table yet;
2078-
that needs to wait until we know whether the sort decided to use row IDs
2079-
or not, and Filesort cannot be set up until it knows what tables to sort.
2077+
Note that we don't set up an access path for this temporary table yet,
2078+
since there may be multiple access paths sharing this temporary table.
20802079
Thus, that job is deferred to CreateMaterializationPathForSortingAggregates().
20812080
*/
20822081
TABLE *CreateTemporaryTableForSortingAggregates(
@@ -2243,8 +2242,9 @@ void ReplaceOrderItemsWithTempTableFields(THD *thd, ORDER *order) {
22432242
*/
22442243
AccessPath *CreateMaterializationPathForSortingAggregates(
22452244
THD *thd, JOIN *join, AccessPath *path, TABLE *temp_table,
2246-
Temp_table_param *temp_table_param, bool using_addon_fields) {
2247-
if (using_addon_fields) {
2245+
Temp_table_param *temp_table_param) {
2246+
// See if later sorts will need row IDs from us or not.
2247+
if (!SortWillBeOnRowId(temp_table)) {
22482248
// The common case; we can use streaming.
22492249
AccessPath *stream_path =
22502250
NewStreamingAccessPath(thd, path, join, temp_table_param, temp_table,
@@ -2443,12 +2443,10 @@ JoinHypergraph::Node *FindNodeWithTable(JoinHypergraph *graph, TABLE *table) {
24432443
Prealloced_array<AccessPath *, 4> ApplyDistinctAndOrder(
24442444
THD *thd, const CostingReceiver &receiver,
24452445
const LogicalOrderings &orderings, bool aggregation_is_unordered,
2446-
int order_by_ordering_idx, int distinct_ordering_idx, ORDER *distinct_order,
2446+
int order_by_ordering_idx, int distinct_ordering_idx,
24472447
const Mem_root_array<SortAheadOrdering> &sort_ahead_orderings,
2448-
FunctionalDependencySet fd_set, Query_block *query_block,
2449-
bool need_rowid_from_tables,
2450-
Prealloced_array<AccessPath *, 4> root_candidates, string *trace,
2451-
TABLE **temp_table) {
2448+
FunctionalDependencySet fd_set, Query_block *query_block, bool need_rowid,
2449+
Prealloced_array<AccessPath *, 4> root_candidates, string *trace) {
24522450
JOIN *join = query_block->join;
24532451
assert(join->select_distinct || query_block->is_ordered());
24542452

@@ -2458,11 +2456,7 @@ Prealloced_array<AccessPath *, 4> ApplyDistinctAndOrder(
24582456
return root_candidates;
24592457
}
24602458

2461-
*temp_table = nullptr;
2462-
Temp_table_param *temp_table_param = nullptr;
2463-
2464-
Mem_root_array<TABLE *> tables =
2465-
CollectTables(thd, root_candidates[0]); // Should be same for all paths.
2459+
bool inserted_temp_table = false;
24662460

24672461
// If we have grouping followed by a sort, we need to bounce via
24682462
// the buffers of a temporary table. See the comments on
@@ -2475,93 +2469,23 @@ Prealloced_array<AccessPath *, 4> ApplyDistinctAndOrder(
24752469
// doesn't preserve them (doing so would probably not be worth it for
24762470
// something that's fairly niche).
24772471
//
2478-
// NOTE: We could need row IDs later without need_rowid_from_tables being set,
2479-
// but only in certain edge cases; for instance, if we sort only constants
2480-
// (although filesort should arguably be fixed not to request row IDs
2481-
// in that case). The test here is really about data being pulled from
2482-
// individual tables, and should be safe.
2483-
if (query_block->is_explicitly_grouped() &&
2484-
(*join->sum_funcs != nullptr ||
2485-
join->rollup_state != JOIN::RollupState::NONE ||
2486-
need_rowid_from_tables)) {
2487-
*temp_table = CreateTemporaryTableForSortingAggregates(thd, query_block,
2488-
&temp_table_param);
2489-
// Filesort now only needs to worry about one input -- this temporary
2490-
// table. This holds whether we are actually materializing or just
2491-
// using streaming.
2492-
tables.clear();
2493-
tables.push_back(*temp_table);
2494-
}
2495-
2496-
// Set up the filesort objects. They have some interactions
2497-
// around addon fields vs. sort by row ID, so we need to do this
2498-
// before we decide on iterators.
2499-
bool need_rowid = false;
2500-
Filesort *filesort = nullptr;
2501-
if (query_block->is_ordered()) {
2502-
Mem_root_array<TABLE *> table_copy(thd->mem_root, tables.begin(),
2503-
tables.end());
2504-
filesort = new (thd->mem_root)
2505-
Filesort(thd, std::move(table_copy), /*keep_buffers=*/false,
2506-
query_block->order_list.first,
2507-
/*limit_arg=*/HA_POS_ERROR, /*force_stable_sort=*/false,
2508-
/*remove_duplicates=*/false, /*force_sort_positions=*/false,
2509-
/*unwrap_rollup=*/false);
2510-
join->filesorts_to_cleanup.push_back(filesort);
2511-
need_rowid = !filesort->using_addon_fields();
2512-
}
2513-
Filesort *filesort_for_distinct = nullptr;
2514-
if (join->select_distinct) {
2515-
bool force_sort_positions = false;
2516-
if (filesort != nullptr && !filesort->using_addon_fields()) {
2517-
// We have the rather unusual situation here that we have two sorts
2518-
// directly after each other, with no temporary table in-between,
2519-
// and filesort expects to be able to refer to rows by their position.
2520-
// Usually, the sort for DISTINCT would be a superset of the sort for
2521-
// ORDER BY, but not always (e.g. when sorting by some expression),
2522-
// so we could end up in a situation where the first sort is by addon
2523-
// fields and the second one is by positions.
2524-
//
2525-
// Thus, in this case, we force the first sort to be by positions,
2526-
// so that the result comes from SortFileIndirectIterator or
2527-
// SortBufferIndirectIterator. These will both position the cursor
2528-
// on the underlying temporary table correctly before returning it,
2529-
// so that the successive filesort will save the right position
2530-
// for the row.
2531-
if (trace != nullptr) {
2532-
*trace +=
2533-
"Forcing DISTINCT to sort row IDs, since ORDER BY sort does\n";
2534-
}
2535-
force_sort_positions = true;
2536-
}
2537-
2538-
if (distinct_order != nullptr) {
2539-
filesort_for_distinct = new (thd->mem_root)
2540-
Filesort(thd, std::move(tables),
2541-
/*keep_buffers=*/false, distinct_order, HA_POS_ERROR,
2542-
/*force_stable_sort=*/false,
2543-
/*remove_duplicates=*/true, force_sort_positions,
2544-
/*unwrap_rollup=*/false);
2545-
join->filesorts_to_cleanup.push_back(filesort_for_distinct);
2546-
if (!filesort_for_distinct->using_addon_fields()) {
2547-
need_rowid = true;
2548-
}
2549-
}
2550-
}
2551-
2552-
// Apply streaming between grouping and us, if needed (see above).
25532472
// NOTE: If we elide the sort due to interesting orderings, this might
25542473
// be redundant. It is fairly harmless, though.
2555-
if (*temp_table != nullptr) {
2474+
if (query_block->is_explicitly_grouped() &&
2475+
(*join->sum_funcs != nullptr ||
2476+
join->rollup_state != JOIN::RollupState::NONE || need_rowid)) {
2477+
Temp_table_param *temp_table_param = nullptr;
2478+
TABLE *temp_table = CreateTemporaryTableForSortingAggregates(
2479+
thd, query_block, &temp_table_param);
25562480
Prealloced_array<AccessPath *, 4> new_root_candidates(PSI_NOT_INSTRUMENTED);
25572481
for (AccessPath *root_path : root_candidates) {
25582482
root_path = CreateMaterializationPathForSortingAggregates(
2559-
thd, join, root_path, *temp_table, temp_table_param,
2560-
/*using_addon_fields=*/!need_rowid);
2483+
thd, join, root_path, temp_table, temp_table_param);
25612484
receiver.ProposeAccessPath(root_path, &new_root_candidates,
25622485
/*obsolete_orderings=*/0, "");
25632486
}
25642487
root_candidates = std::move(new_root_candidates);
2488+
inserted_temp_table = true;
25652489
}
25662490

25672491
// Now create iterators for DISTINCT, if applicable.
@@ -2610,40 +2534,38 @@ Prealloced_array<AccessPath *, 4> ApplyDistinctAndOrder(
26102534
if (!orderings.DoesFollowOrder(ordering_state, distinct_ordering_idx)) {
26112535
continue;
26122536
}
2613-
AccessPath *sort_path =
2614-
NewSortAccessPath(thd, root_path, filesort_for_distinct,
2615-
/*count_examined_rows=*/false);
2616-
EstimateSortCost(sort_path);
2537+
AccessPath sort_path;
2538+
sort_path.type = AccessPath::SORT;
2539+
sort_path.count_examined_rows = false;
2540+
sort_path.sort().child = root_path;
2541+
sort_path.sort().filesort = nullptr;
2542+
sort_path.sort().remove_duplicates = true;
2543+
sort_path.sort().unwrap_rollup = false;
2544+
26172545
if (aggregation_is_unordered) {
26182546
// Even though we create a sort node for the distinct operation,
26192547
// the engine does not actually sort the rows. (The deduplication
26202548
// flag is the hint in this case.)
2621-
sort_path->ordering_state = 0;
2549+
sort_path.ordering_state = 0;
26222550
} else {
2623-
sort_path->ordering_state = ordering_state;
2551+
sort_path.ordering_state = ordering_state;
26242552
}
26252553

2626-
// Swap out the ordering for the order we're actually using.
2627-
// filesort_for_distinct was only used for setting row ID status,
2628-
// and we can keep that information, but we need to use the right
2629-
// ordering.
2630-
sort_path->sort().filesort = nullptr;
2631-
sort_path->sort().remove_duplicates = true;
2632-
sort_path->sort().unwrap_rollup = false;
2633-
if (*temp_table != nullptr) {
2554+
if (inserted_temp_table) {
26342555
ORDER *order_copy = BuildSortAheadOrdering(
26352556
thd, &orderings,
26362557
orderings.ordering(sort_ahead_ordering.ordering_idx));
26372558
ReplaceOrderItemsWithTempTableFields(thd, order_copy);
2638-
sort_path->sort().order = order_copy;
2559+
sort_path.sort().order = order_copy;
26392560
} else {
2640-
sort_path->sort().order = sort_ahead_ordering.order;
2561+
sort_path.sort().order = sort_ahead_ordering.order;
26412562
}
26422563

2643-
if (!filesort_for_distinct->using_addon_fields()) {
2644-
FindTablesToGetRowidFor(sort_path);
2564+
if (need_rowid) {
2565+
FindTablesToGetRowidFor(&sort_path);
26452566
}
2646-
receiver.ProposeAccessPath(sort_path, &new_root_candidates,
2567+
EstimateSortCost(&sort_path);
2568+
receiver.ProposeAccessPath(&sort_path, &new_root_candidates,
26472569
/*obsolete_orderings=*/0, "");
26482570
}
26492571
}
@@ -2652,6 +2574,15 @@ Prealloced_array<AccessPath *, 4> ApplyDistinctAndOrder(
26522574

26532575
// Apply ORDER BY, if applicable.
26542576
if (query_block->is_ordered()) {
2577+
Mem_root_array<TABLE *> tables = CollectTables(
2578+
thd, root_candidates[0]); // Should be same for all paths.
2579+
Filesort *filesort = new (thd->mem_root)
2580+
Filesort(thd, std::move(tables), /*keep_buffers=*/false,
2581+
query_block->order_list.first,
2582+
/*limit_arg=*/HA_POS_ERROR, /*force_stable_sort=*/false,
2583+
/*remove_duplicates=*/false, /*force_sort_positions=*/false,
2584+
/*unwrap_rollup=*/false);
2585+
join->filesorts_to_cleanup.push_back(filesort);
26552586
if (trace != nullptr) {
26562587
*trace += "Applying sort for ORDER BY\n";
26572588
}
@@ -3044,8 +2975,8 @@ static void BuildInterestingOrders(
30442975
LogicalOrderings *orderings,
30452976
Mem_root_array<SortAheadOrdering> *sort_ahead_orderings,
30462977
int *order_by_ordering_idx, int *group_by_ordering_idx,
3047-
int *distinct_ordering_idx, ORDER **distinct_order,
3048-
Mem_root_array<ActiveIndexInfo> *active_indexes, string *trace) {
2978+
int *distinct_ordering_idx, Mem_root_array<ActiveIndexInfo> *active_indexes,
2979+
string *trace) {
30492980
// Collect ordering from ORDER BY.
30502981
if (query_block->is_ordered()) {
30512982
table_map used_tables;
@@ -3103,7 +3034,6 @@ static void BuildInterestingOrders(
31033034
/*order_for_filesort=*/order,
31043035
/*used_at_end=*/true, /*homogenize_tables=*/0, used_tables,
31053036
orderings, sort_ahead_orderings);
3106-
*distinct_order = order;
31073037
}
31083038

31093039
// Collect groupings from semijoins (because we might want to do duplicate
@@ -3415,11 +3345,10 @@ AccessPath *FindBestQueryPlan(THD *thd, Query_block *query_block,
34153345
int order_by_ordering_idx = -1;
34163346
int group_by_ordering_idx = -1;
34173347
int distinct_ordering_idx = -1;
3418-
ORDER *distinct_order = nullptr;
34193348
BuildInterestingOrders(thd, &graph, query_block, &orderings,
34203349
&sort_ahead_orderings, &order_by_ordering_idx,
34213350
&group_by_ordering_idx, &distinct_ordering_idx,
3422-
&distinct_order, &active_indexes, trace);
3351+
&active_indexes, trace);
34233352

34243353
// Run the actual join optimizer algorithm. This creates an access path
34253354
// for the join as a whole (with lowest possible cost, and thus also
@@ -3675,13 +3604,11 @@ AccessPath *FindBestQueryPlan(THD *thd, Query_block *query_block,
36753604
root_candidates = std::move(new_root_candidates);
36763605
}
36773606

3678-
TABLE *temp_table = nullptr;
36793607
if (join->select_distinct || query_block->is_ordered()) {
36803608
root_candidates = ApplyDistinctAndOrder(
36813609
thd, receiver, orderings, aggregation_is_unordered,
3682-
order_by_ordering_idx, distinct_ordering_idx, distinct_order,
3683-
sort_ahead_orderings, fd_set, query_block, need_rowid,
3684-
std::move(root_candidates), trace, &temp_table);
3610+
order_by_ordering_idx, distinct_ordering_idx, sort_ahead_orderings,
3611+
fd_set, query_block, need_rowid, std::move(root_candidates), trace);
36853612
}
36863613

36873614
// Apply LIMIT, if applicable.

0 commit comments

Comments
 (0)