Skip to content

Commit 8e14fa8

Browse files
author
Steinar H. Gunderson
committed
Bug #31310238: SUPPORT SORTING MULTIPLE TABLES [patch 3/5, get row IDs] [noclose]
When sorting a join of multiple tables by row ID, make filesort and underlying iterators call prepare_for_position() correctly. Otherwise, some storage engines will not give us proper row IDs when calling position(). Change-Id: I7892a0a0a5f806aed0dd8063bc6d6b39f616bb39
1 parent 443a01a commit 8e14fa8

10 files changed

+112
-27
lines changed

sql/bka_iterator.cc

+2
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ bool BKAIterator::Init() {
103103
return true;
104104
}
105105
}
106+
PrepareForRequestRowId(m_outer_input_tables.tables(),
107+
m_outer_input_tables.tables_to_get_rowid_for());
106108

107109
BeginNewBatch();
108110
m_end_of_outer_rows = false;

sql/composite_iterators.cc

+6
Original file line numberDiff line numberDiff line change
@@ -1324,6 +1324,12 @@ bool WeedoutIterator::Init() {
13241324
!m_sj->tmp_table->file->inited) {
13251325
m_sj->tmp_table->file->ha_index_init(0, false);
13261326
}
1327+
for (SJ_TMP_TABLE_TAB *tab = m_sj->tabs; tab != m_sj->tabs_end; ++tab) {
1328+
TABLE *table = tab->qep_tab->table();
1329+
if (m_tables_to_get_rowid_for & table->pos_in_table_list->map()) {
1330+
table->prepare_for_position();
1331+
}
1332+
}
13271333
return m_source->Init();
13281334
}
13291335

sql/filesort.cc

+31-8
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,8 @@ struct Mem_compare_queue_key {
143143

144144
static ha_rows read_all_rows(
145145
THD *thd, Sort_param *param, const Prealloced_array<TABLE *, 4> &tables,
146-
Filesort_info *fs_info, IO_CACHE *chunk_file, IO_CACHE *tempfile,
146+
table_map tables_to_get_rowid_for, Filesort_info *fs_info,
147+
IO_CACHE *chunk_file, IO_CACHE *tempfile,
147148
Bounded_queue<uchar *, uchar *, Sort_param, Mem_compare_queue_key> *pq,
148149
RowIterator *source_iterator, ha_rows *found_rows, size_t *longest_key,
149150
size_t *longest_addon);
@@ -355,6 +356,10 @@ static void trace_filesort_information(Opt_trace_context *trace,
355356
@param thd Current thread
356357
@param filesort How to sort the table
357358
@param source_iterator Where to read the rows to be sorted from.
359+
@param tables_to_get_rowid_for
360+
Which tables we are responsible for getting row IDs
361+
for. Tables in this set that are not also in
362+
"tables" are ignored.
358363
@param num_rows_estimate How many rows source_iterator is expected
359364
to produce. Only used for whether we intend
360365
to use the priority queue optimization or not;
@@ -374,8 +379,9 @@ static void trace_filesort_information(Opt_trace_context *trace,
374379
*/
375380

376381
bool filesort(THD *thd, Filesort *filesort, RowIterator *source_iterator,
377-
ha_rows num_rows_estimate, Filesort_info *fs_info,
378-
Sort_result *sort_result, ha_rows *found_rows) {
382+
table_map tables_to_get_rowid_for, ha_rows num_rows_estimate,
383+
Filesort_info *fs_info, Sort_result *sort_result,
384+
ha_rows *found_rows) {
379385
int error;
380386
ulong memory_available = thd->variables.sortbuff_size;
381387
ha_rows num_rows_found = HA_POS_ERROR;
@@ -411,6 +417,15 @@ bool filesort(THD *thd, Filesort *filesort, RowIterator *source_iterator,
411417
my_b_clear(&chunk_file);
412418
error = 1;
413419

420+
if (!param->using_addon_fields()) {
421+
for (TABLE *table : filesort->tables) {
422+
if (table->pos_in_table_list == nullptr ||
423+
(tables_to_get_rowid_for & table->pos_in_table_list->map())) {
424+
table->prepare_for_position();
425+
}
426+
}
427+
}
428+
414429
// Make sure the source iterator is initialized before init_for_filesort(),
415430
// since table->file (and in particular, ref_length) may not be initialized
416431
// before that.
@@ -512,9 +527,9 @@ bool filesort(THD *thd, Filesort *filesort, RowIterator *source_iterator,
512527
{
513528
Opt_trace_array ota(trace, "filesort_execution");
514529
num_rows_found = read_all_rows(
515-
thd, param, filesort->tables, fs_info, &chunk_file, &tempfile,
516-
param->using_pq ? &pq : nullptr, source_iterator, found_rows,
517-
&longest_key, &longest_addons);
530+
thd, param, filesort->tables, tables_to_get_rowid_for, fs_info,
531+
&chunk_file, &tempfile, param->using_pq ? &pq : nullptr,
532+
source_iterator, found_rows, &longest_key, &longest_addons);
518533
if (num_rows_found == HA_POS_ERROR) goto err;
519534
}
520535

@@ -875,6 +890,10 @@ static bool alloc_and_make_sortkey(Sort_param *param, Filesort_info *fs_info,
875890
@param thd Thread handle
876891
@param param Sorting parameter
877892
@param tables List of all tables being sorted.
893+
@param tables_to_get_rowid_for
894+
Which tables we are responsible for getting row IDs
895+
for. Tables in this set that are not also in "tables"
896+
are ignored.
878897
@param fs_info Struct containing sort buffer etc.
879898
@param chunk_file File to write Merge_chunks describing sorted segments
880899
in tempfile.
@@ -930,7 +949,8 @@ static bool alloc_and_make_sortkey(Sort_param *param, Filesort_info *fs_info,
930949

931950
static ha_rows read_all_rows(
932951
THD *thd, Sort_param *param, const Prealloced_array<TABLE *, 4> &tables,
933-
Filesort_info *fs_info, IO_CACHE *chunk_file, IO_CACHE *tempfile,
952+
table_map tables_to_get_rowid_for, Filesort_info *fs_info,
953+
IO_CACHE *chunk_file, IO_CACHE *tempfile,
934954
Bounded_queue<uchar *, uchar *, Sort_param, Mem_compare_queue_key> *pq,
935955
RowIterator *source_iterator, ha_rows *found_rows, size_t *longest_key,
936956
size_t *longest_addons) {
@@ -985,7 +1005,10 @@ static ha_rows read_all_rows(
9851005
// Note where we are, for the case where we are not using addon fields.
9861006
if (!param->using_addon_fields()) {
9871007
for (TABLE *table : tables) {
988-
table->file->position(table->record[0]);
1008+
if (table->pos_in_table_list == nullptr ||
1009+
(table->pos_in_table_list->map() & tables_to_get_rowid_for)) {
1010+
table->file->position(table->record[0]);
1011+
}
9891012
}
9901013
}
9911014
DBUG_EXECUTE_IF("debug_filesort", {

sql/filesort.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "my_base.h" /* ha_rows */
3030
#include "my_dbug.h"
3131
#include "my_inttypes.h"
32+
#include "my_table_map.h"
3233
#include "prealloced_array.h"
3334
#include "sql/sort_param.h"
3435

@@ -96,8 +97,9 @@ class Filesort {
9697
};
9798

9899
bool filesort(THD *thd, Filesort *filesort, RowIterator *source_iterator,
99-
ha_rows num_rows_estimate, Filesort_info *fs_info,
100-
Sort_result *sort_result, ha_rows *found_rows);
100+
table_map tables_to_get_rowid_for, ha_rows num_rows_estimate,
101+
Filesort_info *fs_info, Sort_result *sort_result,
102+
ha_rows *found_rows);
101103
void filesort_free_buffers(TABLE *table, bool full);
102104
void change_double_for_sort(double nr, uchar *to);
103105

sql/hash_join_iterator.cc

+15
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,11 @@ bool HashJoinIterator::Init() {
206206
m_probe_chunk_current_row = 0;
207207
m_current_chunk = -1;
208208

209+
PrepareForRequestRowId(m_probe_input_tables.tables(),
210+
m_tables_to_get_rowid_for);
211+
PrepareForRequestRowId(m_build_input_tables.tables(),
212+
m_tables_to_get_rowid_for);
213+
209214
// Build the hash table
210215
if (BuildHashTable()) {
211216
DBUG_ASSERT(thd()->is_error() ||
@@ -301,6 +306,16 @@ void RequestRowId(const Prealloced_array<hash_join_buffer::Table, 4> &tables,
301306
}
302307
}
303308

309+
void PrepareForRequestRowId(
310+
const Prealloced_array<hash_join_buffer::Table, 4> &tables,
311+
table_map tables_to_get_rowid_for) {
312+
for (const hash_join_buffer::Table &it : tables) {
313+
if (tables_to_get_rowid_for & it.table->pos_in_table_list->map()) {
314+
it.table->prepare_for_position();
315+
}
316+
}
317+
}
318+
304319
// Write all the remaining rows from the given iterator out to chunk files
305320
// on disk. If the function returns true, an unrecoverable error occurred
306321
// (IO error etc.).

sql/hash_join_iterator.h

+4
Original file line numberDiff line numberDiff line change
@@ -652,4 +652,8 @@ class HashJoinIterator final : public RowIterator {
652652
void RequestRowId(const Prealloced_array<hash_join_buffer::Table, 4> &tables,
653653
table_map tables_to_get_rowid_for);
654654

655+
void PrepareForRequestRowId(
656+
const Prealloced_array<hash_join_buffer::Table, 4> &tables,
657+
table_map tables_to_get_rowid_for);
658+
655659
#endif // SQL_HASH_JOIN_ITERATOR_H_

sql/join_optimizer/access_path.cc

+35-2
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,32 @@
3838

3939
using std::vector;
4040

41+
AccessPath *NewSortAccessPath(THD *thd, AccessPath *child, Filesort *filesort,
42+
bool count_examined_rows) {
43+
AccessPath *path = new (thd->mem_root) AccessPath;
44+
path->type = AccessPath::SORT;
45+
path->count_examined_rows = count_examined_rows;
46+
path->sort().child = child;
47+
path->sort().filesort = filesort;
48+
49+
if (filesort->using_addon_fields()) {
50+
path->sort().tables_to_get_rowid_for = 0;
51+
} else {
52+
if (filesort->tables.size() == 1 &&
53+
filesort->tables[0]->pos_in_table_list == nullptr) {
54+
// This can happen if we sort a single temporary table
55+
// which is not in the table list (e.g., one that was
56+
// specifically created for us). Filesort has special-casing
57+
// to always get the row ID in this case.
58+
path->sort().tables_to_get_rowid_for = 0;
59+
} else {
60+
FindTablesToGetRowidFor(path);
61+
}
62+
}
63+
64+
return path;
65+
}
66+
4167
template <class Func>
4268
void WalkAccessPaths(AccessPath *path, bool cross_query_blocks, Func &&func) {
4369
if (func(path)) {
@@ -542,8 +568,9 @@ unique_ptr_destroy_only<RowIterator> CreateIteratorFromAccessPath(
542568
? HA_POS_ERROR
543569
: lrint(path->sort().child->num_output_rows);
544570
Filesort *filesort = path->sort().filesort;
545-
iterator = NewIterator<SortingIterator>(thd, filesort, move(child),
546-
num_rows_estimate, examined_rows);
571+
iterator = NewIterator<SortingIterator>(
572+
thd, filesort, move(child), num_rows_estimate,
573+
path->sort().tables_to_get_rowid_for, examined_rows);
547574
if (filesort->m_remove_duplicates) {
548575
filesort->tables[0]->duplicate_removal_iterator =
549576
down_cast<SortingIterator *>(iterator->real_iterator());
@@ -794,6 +821,12 @@ void FindTablesToGetRowidFor(AccessPath *path) {
794821
path->weedout().tables_to_get_rowid_for =
795822
GetUsedTables(path) & ~handled_by_others;
796823
break;
824+
case AccessPath::SORT:
825+
WalkAccessPaths(path, /*cross_query_blocks=*/false,
826+
add_tables_handled_by_others);
827+
path->sort().tables_to_get_rowid_for =
828+
GetUsedTables(path) & ~handled_by_others;
829+
break;
797830
default:
798831
abort();
799832
}

sql/join_optimizer/access_path.h

+6-11
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,7 @@ struct AccessPath {
592592
struct {
593593
AccessPath *child;
594594
Filesort *filesort;
595+
table_map tables_to_get_rowid_for;
595596
} sort;
596597
struct {
597598
AccessPath *child;
@@ -894,16 +895,10 @@ inline AccessPath *NewFilterAccessPath(THD *thd, AccessPath *child,
894895
return path;
895896
}
896897

897-
inline AccessPath *NewSortAccessPath(THD *thd, AccessPath *child,
898-
Filesort *filesort,
899-
bool count_examined_rows) {
900-
AccessPath *path = new (thd->mem_root) AccessPath;
901-
path->type = AccessPath::SORT;
902-
path->count_examined_rows = count_examined_rows;
903-
path->sort().child = child;
904-
path->sort().filesort = filesort;
905-
return path;
906-
}
898+
// Not inline, because it needs access to filesort internals
899+
// (which are forward-declared in this file).
900+
AccessPath *NewSortAccessPath(THD *thd, AccessPath *child, Filesort *filesort,
901+
bool count_examined_rows);
907902

908903
inline AccessPath *NewAggregateAccessPath(THD *thd, AccessPath *child,
909904
Temp_table_param *temp_table_param,
@@ -1138,7 +1133,7 @@ inline AccessPath *NewInvalidatorAccessPath(THD *thd, AccessPath *child,
11381133
return path;
11391134
}
11401135

1141-
void FindTablesToGetRowidFor(AccessPath *weedout_path);
1136+
void FindTablesToGetRowidFor(AccessPath *path);
11421137
unique_ptr_destroy_only<RowIterator> CreateIteratorFromAccessPath(
11431138
THD *thd, AccessPath *path, JOIN *join, bool eligible_for_batch_mode);
11441139
void SetCostOnTableAccessPath(const Cost_model_server &cost_model,

sql/sorting_iterator.cc

+5-3
Original file line numberDiff line numberDiff line change
@@ -397,11 +397,13 @@ void SortBufferIndirectIterator::SetNullRowFlag(bool is_null_row) {
397397
SortingIterator::SortingIterator(THD *thd, Filesort *filesort,
398398
unique_ptr_destroy_only<RowIterator> source,
399399
ha_rows num_rows_estimate,
400+
table_map tables_to_get_rowid_for,
400401
ha_rows *examined_rows)
401402
: RowIterator(thd),
402403
m_filesort(filesort),
403404
m_source_iterator(move(source)),
404405
m_num_rows_estimate(num_rows_estimate),
406+
m_tables_to_get_rowid_for(tables_to_get_rowid_for),
405407
m_examined_rows(examined_rows) {}
406408

407409
SortingIterator::~SortingIterator() {
@@ -510,9 +512,9 @@ int SortingIterator::DoSort() {
510512
MYF(MY_WME | MY_ZEROFILL));
511513

512514
ha_rows found_rows;
513-
bool error =
514-
filesort(thd(), m_filesort, m_source_iterator.get(), m_num_rows_estimate,
515-
&m_fs_info, &m_sort_result, &found_rows);
515+
bool error = filesort(thd(), m_filesort, m_source_iterator.get(),
516+
m_tables_to_get_rowid_for, m_num_rows_estimate,
517+
&m_fs_info, &m_sort_result, &found_rows);
516518
for (TABLE *table : m_filesort->tables) {
517519
table->set_keyread(false); // Restore if we used indexes
518520
}

sql/sorting_iterator.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
#include "my_alloc.h"
3131
#include "my_base.h"
32+
#include "my_table_map.h"
3233
#include "sql/basic_row_iterators.h"
3334
#include "sql/row_iterator.h"
3435
#include "sql/sql_sort.h"
@@ -64,7 +65,8 @@ class SortingIterator final : public RowIterator {
6465
// RAM, we never use the priority queue.
6566
SortingIterator(THD *thd, Filesort *filesort,
6667
unique_ptr_destroy_only<RowIterator> source,
67-
ha_rows num_rows_estimate, ha_rows *examined_rows);
68+
ha_rows num_rows_estimate, table_map tables_to_get_rowid_for,
69+
ha_rows *examined_rows);
6870
~SortingIterator() override;
6971

7072
// Calls Init() on the source iterator, then does the actual sort.
@@ -128,6 +130,7 @@ class SortingIterator final : public RowIterator {
128130
Sort_result m_sort_result;
129131

130132
const ha_rows m_num_rows_estimate;
133+
const table_map m_tables_to_get_rowid_for;
131134
ha_rows *m_examined_rows;
132135

133136
// Holds one out of all RowIterator implementations we need so that it is

0 commit comments

Comments
 (0)