Skip to content

Commit a406de2

Browse files
author
Steinar H. Gunderson
committed
Bug #30254797: REPLACE VECTOR<QEP_TAB *> WITH BITMAPS
There are several places in the translation of pre-iterator executor code (in particular, to hash joins, but also in a few other places) where we should juse use a bitmap. Do so, and also add a convenience class for looping over such bitmaps using range-based for loops. This saves ~1.3 kB of generated code in an optimized build. Change-Id: I7fa2a8982f51d2dcfc6fe1293156a644fb5ba0ef
1 parent de10ddf commit a406de2

12 files changed

+254
-143
lines changed

Diff for: include/my_table_map.h

+16-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
2+
Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
33
44
This program is free software; you can redistribute it and/or modify
55
it under the terms of the GNU General Public License, version 2.0,
@@ -24,12 +24,22 @@
2424
#ifndef MY_TABLE_MAP_INCLUDED
2525
#define MY_TABLE_MAP_INCLUDED
2626

27+
#include "my_dbug.h"
2728
#include "my_inttypes.h"
2829

29-
/*
30-
TODO Convert these to use Bitmap class.
31-
*/
32-
typedef ulonglong table_map; /* Used for table bits in join */
33-
typedef ulonglong nesting_map; /* Used for flags of nesting constructs */
30+
using table_map = uint64_t; // Used for table bits in join.
31+
using nesting_map = uint64_t; // Used for flags of nesting constructs.
32+
using qep_tab_map = uint64_t; // Used for indexing QEP_TABs in a JOIN.
33+
34+
// Returns a bitmap representing the semi-open interval [start, end).
35+
static inline uint64_t TablesBetween(unsigned start, unsigned end) {
36+
DBUG_ASSERT(end >= start);
37+
return (uint64_t{1} << end) - (uint64_t{1} << start);
38+
}
39+
40+
// Test whether "map" contains the given table.
41+
static inline bool ContainsTable(uint64_t map, unsigned idx) {
42+
return map & (uint64_t{1} << idx);
43+
}
3444

3545
#endif // MY_TABLE_MAP_INCLUDED

Diff for: include/tables_contained_in.h

+93
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/*
2+
Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
3+
4+
This program is free software; you can redistribute it and/or modify
5+
it under the terms of the GNU General Public License, version 2.0,
6+
as published by the Free Software Foundation.
7+
8+
This program is also distributed with certain software (including
9+
but not limited to OpenSSL) that is licensed under separate terms,
10+
as designated in a particular file or component or in included license
11+
documentation. The authors of MySQL hereby grant you an additional
12+
permission to link the program and your derivative works with the
13+
separately licensed software that they have included with MySQL.
14+
15+
This program is distributed in the hope that it will be useful,
16+
but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
GNU General Public License, version 2.0, for more details.
19+
20+
You should have received a copy of the GNU General Public License
21+
along with this program; if not, write to the Free Software
22+
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23+
24+
#ifndef TABLES_CONTAINED_IN_H_
25+
#define TABLES_CONTAINED_IN_H_
26+
27+
#include <string.h>
28+
29+
#include "my_inttypes.h"
30+
#include "sql/sql_optimizer.h"
31+
32+
#ifdef _MSC_VER
33+
#include <intrin.h>
34+
#pragma intrinsic(_BitScanForward64)
35+
#endif
36+
37+
// A utility class to loop over all set bits in the given map.
38+
// Use as:
39+
//
40+
// qep_tab_map = ...;
41+
// for (QEP_TAB *qep_tab : TablesContainedIn(join, qep_tab_map)) {
42+
// ...
43+
// }
44+
class TablesContainedIn {
45+
public:
46+
class iterator {
47+
private:
48+
const JOIN *const m_join;
49+
qep_tab_map m_bits_left;
50+
51+
public:
52+
iterator(const JOIN *join, qep_tab_map map)
53+
: m_join(join), m_bits_left(map) {}
54+
bool operator==(const iterator &other) const {
55+
DBUG_ASSERT(m_join == other.m_join);
56+
return m_bits_left == other.m_bits_left;
57+
}
58+
bool operator!=(const iterator &other) const {
59+
DBUG_ASSERT(m_join == other.m_join);
60+
return m_bits_left != other.m_bits_left;
61+
}
62+
QEP_TAB *operator*() const {
63+
// Find the QEP_TAB that corresponds to the lowest set bit.
64+
DBUG_ASSERT(m_bits_left != 0);
65+
#ifdef _MSC_VER
66+
unsigned long idx;
67+
_BitScanForward64(&idx, m_bits_left);
68+
#else
69+
size_t idx = ffsll(m_bits_left) - 1;
70+
#endif
71+
DBUG_ASSERT(idx < m_join->tables);
72+
return &m_join->qep_tab[idx];
73+
}
74+
iterator &operator++() {
75+
// Clear the lowest set bit.
76+
DBUG_ASSERT(m_bits_left != 0);
77+
m_bits_left &= (m_bits_left - 1);
78+
return *this;
79+
}
80+
};
81+
82+
TablesContainedIn(const JOIN *join, qep_tab_map map)
83+
: m_join(join), m_initial_map(map) {}
84+
85+
iterator begin() const { return {m_join, m_initial_map}; }
86+
iterator end() const { return {m_join, 0}; }
87+
88+
private:
89+
const JOIN *const m_join;
90+
const qep_tab_map m_initial_map;
91+
};
92+
93+
#endif // TABLES_CONTAINED_IN_H_

Diff for: sql/bka_iterator.cc

+9-8
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,9 @@
5252
using std::string;
5353
using std::vector;
5454

55-
BKAIterator::BKAIterator(THD *thd,
55+
BKAIterator::BKAIterator(THD *thd, JOIN *join,
5656
unique_ptr_destroy_only<RowIterator> outer_input,
57-
const std::vector<QEP_TAB *> &outer_input_tables,
57+
qep_tab_map outer_input_tables,
5858
unique_ptr_destroy_only<RowIterator> inner_input,
5959
size_t max_memory_available,
6060
size_t mrr_bytes_needed_for_single_inner_row,
@@ -65,7 +65,7 @@ BKAIterator::BKAIterator(THD *thd,
6565
m_inner_input(move(inner_input)),
6666
m_mem_root(key_memory_hash_join, 16384 /* 16 kB */),
6767
m_rows(&m_mem_root),
68-
m_outer_input_tables(outer_input_tables),
68+
m_outer_input_tables(join, outer_input_tables),
6969
m_max_memory_available(max_memory_available),
7070
m_mrr_bytes_needed_for_single_inner_row(
7171
mrr_bytes_needed_for_single_inner_row),
@@ -244,18 +244,19 @@ int BKAIterator::Read() {
244244
}
245245
}
246246

247-
MultiRangeRowIterator::MultiRangeRowIterator(
248-
THD *thd, const std::vector<QEP_TAB *> &outer_input_tables,
249-
Item *cache_idx_cond, TABLE *table, bool keep_current_rowid, TABLE_REF *ref,
250-
int mrr_flags)
247+
MultiRangeRowIterator::MultiRangeRowIterator(THD *thd, JOIN *join,
248+
qep_tab_map outer_input_tables,
249+
Item *cache_idx_cond, TABLE *table,
250+
bool keep_current_rowid,
251+
TABLE_REF *ref, int mrr_flags)
251252
: TableRowIterator(thd, table),
252253
m_cache_idx_cond(cache_idx_cond),
253254
m_keep_current_rowid(keep_current_rowid),
254255
m_table(table),
255256
m_file(table->file),
256257
m_ref(ref),
257258
m_mrr_flags(mrr_flags),
258-
m_outer_input_tables(outer_input_tables) {}
259+
m_outer_input_tables(join, outer_input_tables) {}
259260

260261
bool MultiRangeRowIterator::Init() {
261262
/*

Diff for: sql/bka_iterator.h

+6-4
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ class BKAIterator final : public RowIterator {
8080
public:
8181
/**
8282
@param thd Thread handle.
83+
@param join The JOIN we are part of.
8384
@param outer_input The iterator to read the outer rows from.
8485
@param outer_input_tables QEP_TAB for each outer table involved.
8586
Used to know which fields we are to read into our buffer.
@@ -97,8 +98,9 @@ class BKAIterator final : public RowIterator {
9798
@param mrr_iterator Pointer to the MRR iterator at the bottom of
9899
inner_input. Used to send row ranges and buffers.
99100
*/
100-
BKAIterator(THD *thd, unique_ptr_destroy_only<RowIterator> outer_input,
101-
const std::vector<QEP_TAB *> &outer_input_tables,
101+
BKAIterator(THD *thd, JOIN *join,
102+
unique_ptr_destroy_only<RowIterator> outer_input,
103+
qep_tab_map outer_input_tables,
102104
unique_ptr_destroy_only<RowIterator> inner_input,
103105
size_t max_memory_available,
104106
size_t mrr_bytes_needed_for_single_inner_row,
@@ -233,6 +235,7 @@ class MultiRangeRowIterator final : public TableRowIterator {
233235
public:
234236
/**
235237
@param thd Thread handle.
238+
@param join The JOIN we are part of.
236239
@param outer_input_tables QEP_TAB for each outer table involved.
237240
Used to know which fields we are to read back from the buffer.
238241
@param cache_idx_cond See m_cache_idx_cond.
@@ -243,8 +246,7 @@ class MultiRangeRowIterator final : public TableRowIterator {
243246
@param ref The index condition we are looking up on.
244247
@param mrr_flags Flags passed on to MRR.
245248
*/
246-
MultiRangeRowIterator(THD *thd,
247-
const std::vector<QEP_TAB *> &outer_input_tables,
249+
MultiRangeRowIterator(THD *thd, JOIN *join, qep_tab_map outer_input_tables,
248250
Item *cache_idx_cond, TABLE *table,
249251
bool keep_current_rowid, TABLE_REF *ref, int mrr_flags);
250252

Diff for: sql/hash_join_buffer.cc

+45-42
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@
4141
#include "sql/psi_memory_key.h"
4242
#include "sql/sql_executor.h"
4343
#include "sql/sql_join_buffer.h"
44+
#include "sql/sql_optimizer.h"
4445
#include "sql/table.h"
46+
#include "tables_contained_in.h"
4547
#include "template_utils.h"
4648

4749
namespace hash_join_buffer {
@@ -92,56 +94,57 @@ Table::Table(QEP_TAB *qep_tab)
9294
// with no columns, like t2 in the following query:
9395
//
9496
// SELECT t1.col1 FROM t1, t2; # t2 will be included without any columns.
95-
TableCollection::TableCollection(const std::vector<QEP_TAB *> &tables)
96-
: m_tables_bitmap(0),
97-
m_ref_and_null_bytes_size(0),
98-
m_has_blob_column(false) {
99-
for (QEP_TAB *qep_tab : tables) {
100-
m_tables_bitmap |= qep_tab->table_ref->map();
101-
102-
// When constructing the iterator tree, we might end up adding a
103-
// WeedoutIterator _after_ a HashJoinIterator has been constructed.
104-
// When adding the WeedoutIterator, QEP_TAB::rowid_status will be changed
105-
// indicate that a row ID is needed. A side effect of this is that
106-
// rowid_status might say that no row ID is needed here, while it says
107-
// otherwise while hash join is executing. As such, we may write outside of
108-
// the allocated buffers since we did not take the size of the row ID into
109-
// account here. To overcome this, we always assume that the row ID should
110-
// be kept; reserving some extra bytes in a few buffers should not be an
111-
// issue.
112-
m_ref_and_null_bytes_size += qep_tab->table()->file->ref_length;
113-
114-
if (qep_tab->table()->is_nullable()) {
115-
m_ref_and_null_bytes_size += sizeof(qep_tab->table()->null_row);
116-
}
97+
TableCollection::TableCollection(const JOIN *join, qep_tab_map tables) {
98+
for (QEP_TAB *qep_tab : TablesContainedIn(join, tables)) {
99+
AddTable(qep_tab);
100+
}
101+
}
117102

118-
Table table(qep_tab);
119-
for (const hash_join_buffer::Column &column : table.columns) {
120-
// Field_typed_array will mask away the BLOB_FLAG for all types. Hence,
121-
// we will treat all Field_typed_array as blob columns.
122-
if ((column.field->flags & BLOB_FLAG) > 0 || column.field->is_array()) {
123-
m_has_blob_column = true;
124-
}
103+
void TableCollection::AddTable(QEP_TAB *qep_tab) {
104+
m_tables_bitmap |= qep_tab->table_ref->map();
105+
106+
// When constructing the iterator tree, we might end up adding a
107+
// WeedoutIterator _after_ a HashJoinIterator has been constructed.
108+
// When adding the WeedoutIterator, QEP_TAB::rowid_status will be changed
109+
// indicate that a row ID is needed. A side effect of this is that
110+
// rowid_status might say that no row ID is needed here, while it says
111+
// otherwise while hash join is executing. As such, we may write outside of
112+
// the allocated buffers since we did not take the size of the row ID into
113+
// account here. To overcome this, we always assume that the row ID should
114+
// be kept; reserving some extra bytes in a few buffers should not be an
115+
// issue.
116+
m_ref_and_null_bytes_size += qep_tab->table()->file->ref_length;
117+
118+
if (qep_tab->table()->is_nullable()) {
119+
m_ref_and_null_bytes_size += sizeof(qep_tab->table()->null_row);
120+
}
125121

126-
// If a column is marked as nullable, we need to copy the NULL flags.
127-
if ((column.field->flags & NOT_NULL_FLAG) == 0) {
128-
table.copy_null_flags = true;
129-
}
122+
Table table(qep_tab);
123+
for (const hash_join_buffer::Column &column : table.columns) {
124+
// Field_typed_array will mask away the BLOB_FLAG for all types. Hence,
125+
// we will treat all Field_typed_array as blob columns.
126+
if ((column.field->flags & BLOB_FLAG) > 0 || column.field->is_array()) {
127+
m_has_blob_column = true;
128+
}
130129

131-
// BIT fields stores some of its data in the NULL flags of the table. So
132-
// if we have a BIT field, we must copy the NULL flags.
133-
if (column.field->type() == MYSQL_TYPE_BIT &&
134-
down_cast<const Field_bit *>(column.field)->bit_len > 0) {
135-
table.copy_null_flags = true;
136-
}
130+
// If a column is marked as nullable, we need to copy the NULL flags.
131+
if ((column.field->flags & NOT_NULL_FLAG) == 0) {
132+
table.copy_null_flags = true;
137133
}
138134

139-
if (table.copy_null_flags) {
140-
m_ref_and_null_bytes_size += qep_tab->table()->s->null_bytes;
135+
// BIT fields stores some of its data in the NULL flags of the table. So
136+
// if we have a BIT field, we must copy the NULL flags.
137+
if (column.field->type() == MYSQL_TYPE_BIT &&
138+
down_cast<const Field_bit *>(column.field)->bit_len > 0) {
139+
table.copy_null_flags = true;
141140
}
141+
}
142142

143-
m_tables.push_back(table);
143+
if (table.copy_null_flags) {
144+
m_ref_and_null_bytes_size += qep_tab->table()->s->null_bytes;
144145
}
146+
147+
m_tables.push_back(table);
145148
}
146149

147150
// Calculate how many bytes the data in the column uses. We don't bother

Diff for: sql/hash_join_buffer.h

+11-4
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,12 @@ class TableCollection {
119119
public:
120120
TableCollection() = default;
121121

122-
explicit TableCollection(const std::vector<QEP_TAB *> &tables);
122+
explicit TableCollection(QEP_TAB *qep_tab) {
123+
// Single table.
124+
AddTable(qep_tab);
125+
}
126+
127+
TableCollection(const JOIN *join, qep_tab_map tables); // Multiple tables.
123128

124129
const Prealloced_array<Table, 4> &tables() const { return m_tables; }
125130

@@ -130,21 +135,23 @@ class TableCollection {
130135
bool has_blob_column() const { return m_has_blob_column; }
131136

132137
private:
138+
void AddTable(QEP_TAB *qep_tab);
139+
133140
Prealloced_array<Table, 4> m_tables{PSI_NOT_INSTRUMENTED};
134141

135142
// We frequently use the bitmap to determine which side of the join an Item
136143
// belongs to, so precomputing the bitmap saves quite some time.
137-
table_map m_tables_bitmap;
144+
table_map m_tables_bitmap = 0;
138145

139146
// Sum of the NULL bytes and the row ID for all of the tables.
140-
size_t m_ref_and_null_bytes_size;
147+
size_t m_ref_and_null_bytes_size = 0;
141148

142149
// Whether any of the tables has a BLOB/TEXT column. This is used to determine
143150
// whether we need to estimate the row size every time we store a row to the
144151
// row buffer or to a chunk file on disk. If this is set to false, we can
145152
// pre-allocate any necessary buffers we need during the hash join, and thus
146153
// eliminate the need for recalculating the row size every time.
147-
bool m_has_blob_column;
154+
bool m_has_blob_column = false;
148155
};
149156

150157
/// Count up how many bytes a single row from the given tables will occupy,

Diff for: sql/hash_join_iterator.cc

+4-3
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include "sql/row_iterator.h"
4646
#include "sql/sql_class.h"
4747
#include "sql/sql_executor.h"
48+
#include "sql/sql_optimizer.h"
4849
#include "sql/sql_select.h"
4950
#include "sql/table.h"
5051

@@ -65,7 +66,7 @@ static std::vector<HashJoinCondition> ItemToHashJoinConditions(
6566

6667
HashJoinIterator::HashJoinIterator(
6768
THD *thd, unique_ptr_destroy_only<RowIterator> build_input,
68-
const std::vector<QEP_TAB *> &build_input_tables,
69+
qep_tab_map build_input_tables,
6970
unique_ptr_destroy_only<RowIterator> probe_input,
7071
QEP_TAB *probe_input_table, size_t max_memory_available,
7172
const std::vector<Item_func_eq *> &join_conditions,
@@ -74,8 +75,8 @@ HashJoinIterator::HashJoinIterator(
7475
m_state(State::READING_ROW_FROM_PROBE_ITERATOR),
7576
m_build_input(move(build_input)),
7677
m_probe_input(move(probe_input)),
77-
m_probe_input_table({probe_input_table}),
78-
m_build_input_tables(build_input_tables),
78+
m_probe_input_table(probe_input_table),
79+
m_build_input_tables(probe_input_table->join(), build_input_tables),
7980
m_row_buffer(m_build_input_tables,
8081
ItemToHashJoinConditions(join_conditions, thd->mem_root),
8182
max_memory_available),

0 commit comments

Comments
 (0)