Skip to content

Commit 36215a8

Browse files
author
Steinar H. Gunderson
committed
Bug #33521622: Allow developers to force certain subplans in the hypergraph join optimizer
Add a new debug option “subplan_tokens”. If active, each access path gets a digest based on the subplan that it represents. This can be used by developers to force a given subplan, to investigate e.g. whether a given choice is actually faster in practice, force-apply a plan from the old join optimizer (or at least the types of subplans that are ever considered; e.g. aggregation through temporary tables are not) into the hypergraph join optimizer (to see how it's costed), or whether a given plan is even generated. If DEBUG contains force_subplan_0x<token>, subplans with the given token are unconditionally preferred over all others. The token returned is “0x<digest>”, where <digest> is the first 64 bits of the MD5 sum of this string: desc1,desc2,...,[child1_desc:]0xchild1,[child2_desc:]0xchild2,<more children> where desc1, desc2, etc. are the description lines given by EXPLAIN, and 0xchild1 is the token for children. The normal way to generate such tokens is to use SET DEBUG='+d,subplan_tokens' and look at the EXPLAIN FORMAT=tree, but in a pinch, you can also write them by hand and use md5sum or a similar tool. Only the hypergraph join optimizer honors token preferences, but EXPLAIN FORMAT=tree shows computed tokens for both optimizers. Change-Id: Ieaf35698dba5a54529ae86c61ed236999b6a65f0
1 parent d61b377 commit 36215a8

8 files changed

+235
-24
lines changed

mysql-test/r/subplan_tokens.result

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
CREATE TABLE t1 ( a INTEGER, KEY (a) );
2+
INSERT INTO t1 VALUES (1),(2),(3);
3+
ANALYZE TABLE t1;
4+
Table Op Msg_type Msg_text
5+
test.t1 analyze status OK
6+
SET DEBUG='+d,subplan_tokens';
7+
EXPLAIN FORMAT=tree SELECT * FROM t1;
8+
EXPLAIN
9+
-> [0x383f9723be72015d] Table scan on t1 (cost=0.08..0.25 rows=3)
10+
11+
To force this plan, use:
12+
SET DEBUG='+d,subplan_tokens,force_subplan_0x383f9723be72015d';
13+
14+
EXPLAIN FORMAT=tree SELECT * FROM t1 ORDER BY a;
15+
EXPLAIN
16+
-> [0x4f56594d4fd521c9] Index scan on t1 using a (cost=0.08..0.25 rows=3)
17+
18+
To force this plan, use:
19+
SET DEBUG='+d,subplan_tokens,force_subplan_0x4f56594d4fd521c9';
20+
21+
SET DEBUG='+d,subplan_tokens,force_subplan_0x383f9723be72015d';
22+
EXPLAIN FORMAT=tree SELECT * FROM t1 ORDER BY a;
23+
EXPLAIN
24+
-> [0x685afbd3f088dc87] Sort: t1.a (cost=1.03..1.03 rows=3)
25+
-> [0x383f9723be72015d] Table scan on t1 (cost=0.08..0.25 rows=3)
26+
27+
To force this plan, use:
28+
SET DEBUG='+d,subplan_tokens,force_subplan_0x685afbd3f088dc87,force_subplan_0x383f9723be72015d';
29+
30+
SET DEBUG='+d,subplan_tokens,force_subplan_0x90b6f02248424220';
31+
EXPLAIN FORMAT=tree SELECT * FROM t1 JOIN t1 AS t2 ON t1.a=t2.a ORDER BY t1.a;
32+
EXPLAIN
33+
-> [0x9fe1924c5f48742c] Sort: t1.a (cost=2.09..2.09 rows=3)
34+
-> [0x90b6f02248424220] Inner hash join (t1.a = t2.a) (cost=0.80..1.31 rows=3)
35+
-> [0x383f9723be72015d] Table scan on t1 (cost=0.08..0.25 rows=3)
36+
-> Hash
37+
-> [0xedd60db81c76aaf5] Table scan on t2 (cost=0.08..0.25 rows=3)
38+
39+
To force this plan, use:
40+
SET DEBUG='+d,subplan_tokens,force_subplan_0x9fe1924c5f48742c,force_subplan_0x90b6f02248424220,force_subplan_0x383f9723be72015d,force_subplan_0xedd60db81c76aaf5';
41+
42+
SET DEBUG='-d,subplan_tokens';
43+
EXPLAIN FORMAT=tree SELECT * FROM t1 JOIN t1 AS t2 ON t1.a=t2.a ORDER BY t1.a;
44+
EXPLAIN
45+
-> Nested loop inner join (cost=0.33..1.00 rows=3)
46+
-> Index scan on t2 using a (cost=0.08..0.25 rows=3)
47+
-> Index lookup on t1 using a (a=t2.a) (cost=0.25..0.25 rows=1)
48+
49+
DROP TABLE t1;

mysql-test/t/subplan_tokens.test

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
--source include/have_debug.inc
2+
--source include/have_hypergraph.inc
3+
4+
CREATE TABLE t1 ( a INTEGER, KEY (a) );
5+
INSERT INTO t1 VALUES (1),(2),(3);
6+
ANALYZE TABLE t1;
7+
8+
# Simple table scan, with subplan tokens.
9+
SET DEBUG='+d,subplan_tokens';
10+
EXPLAIN FORMAT=tree SELECT * FROM t1;
11+
12+
# Demonstrate that we'd normally use an index to satisfy ORDER BY.
13+
EXPLAIN FORMAT=tree SELECT * FROM t1 ORDER BY a;
14+
15+
# However, if we force the table scan variant from earlier,
16+
# we should get a sort instead.
17+
SET DEBUG='+d,subplan_tokens,force_subplan_0x383f9723be72015d';
18+
EXPLAIN FORMAT=tree SELECT * FROM t1 ORDER BY a;
19+
20+
# Force a hash join; this is the truncated MD5 checksum of:
21+
# Inner hash join (t1.a = t2.a),0x383f9723be72015d,Hash:0xedd60db81c76aaf5
22+
SET DEBUG='+d,subplan_tokens,force_subplan_0x90b6f02248424220';
23+
EXPLAIN FORMAT=tree SELECT * FROM t1 JOIN t1 AS t2 ON t1.a=t2.a ORDER BY t1.a;
24+
25+
# When not forcing anything, we should prefer the nested loop join.
26+
SET DEBUG='-d,subplan_tokens';
27+
EXPLAIN FORMAT=tree SELECT * FROM t1 JOIN t1 AS t2 ON t1.a=t2.a ORDER BY t1.a;
28+
29+
DROP TABLE t1;

sql/join_optimizer/access_path.h

+6
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,12 @@ struct AccessPath {
275275
/// Whether it is safe to get row IDs (for sorting) from this access path.
276276
Safety safe_for_rowid = SAFE;
277277

278+
#ifndef NDEBUG
279+
/// Whether this access path is forced preferred over all others by means
280+
/// of a SET DEBUG force_subplan_0x... statement.
281+
bool forced_by_dbug = false;
282+
#endif
283+
278284
/// Which ordering the rows produced by this path follow, if any
279285
/// (see interesting_orders.h). This is really a LogicalOrderings::StateIndex,
280286
/// but we don't want to add a dependency on interesting_orders.h from

sql/join_optimizer/explain_access_path.cc

+77-15
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,10 @@
2020
along with this program; if not, write to the Free Software
2121
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
2222

23-
#include "sql/join_optimizer/access_path.h"
23+
#include "sql/join_optimizer/explain_access_path.h"
2424

25+
#include "my_md5.h"
26+
#include "my_md5_size.h"
2527
#include "sql/filesort.h"
2628
#include "sql/item_sum.h"
2729
#include "sql/iterators/basic_row_iterators.h"
@@ -31,6 +33,7 @@
3133
#include "sql/iterators/ref_row_iterators.h"
3234
#include "sql/iterators/sorting_iterator.h"
3335
#include "sql/iterators/timing_iterator.h"
36+
#include "sql/join_optimizer/access_path.h"
3437
#include "sql/join_optimizer/print_utils.h"
3538
#include "sql/join_optimizer/relational_expression.h"
3639
#include "sql/range_optimizer/group_index_skip_scan_plan.h"
@@ -44,7 +47,6 @@
4447
#include <string>
4548
#include <vector>
4649

47-
using std::function;
4850
using std::string;
4951
using std::vector;
5052

@@ -179,7 +181,8 @@ vector<ExplainData::Child> GetAccessPathsFromSelectList(JOIN *join) {
179181
return ret;
180182
}
181183

182-
ExplainData ExplainAccessPath(const AccessPath *path, JOIN *join);
184+
ExplainData ExplainAccessPath(const AccessPath *path, JOIN *join,
185+
bool include_costs);
183186

184187
// The table iterator could be a slightly more complicated iterator than
185188
// the basic iterators (in particular, ALTERNATIVE), so show the entire
@@ -188,7 +191,8 @@ static void AddTableIteratorDescription(const AccessPath *path, JOIN *join,
188191
vector<string> *description) {
189192
const AccessPath *subpath = path;
190193
for (;;) {
191-
ExplainData explain = ExplainAccessPath(subpath, join);
194+
ExplainData explain =
195+
ExplainAccessPath(subpath, join, /*include_costs=*/true);
192196
for (string str : explain.description) {
193197
if (explain.children.size() > 1) {
194198
// This can happen if we have AlternativeIterator.
@@ -466,7 +470,8 @@ static string PrintRanges(const QUICK_RANGE *const *ranges, unsigned num_ranges,
466470
return ret;
467471
}
468472

469-
ExplainData ExplainAccessPath(const AccessPath *path, JOIN *join) {
473+
ExplainData ExplainAccessPath(const AccessPath *path, JOIN *join,
474+
bool include_costs) {
470475
vector<string> description;
471476
vector<ExplainData::Child> children;
472477
switch (path->type) {
@@ -766,6 +771,17 @@ ExplainData ExplainAccessPath(const AccessPath *path, JOIN *join) {
766771
break;
767772
case AccessPath::SORT: {
768773
string ret;
774+
if (path->sort().filesort == nullptr) {
775+
// This is a hack for when computing digests for forcing subplans (which
776+
// happens on non-finalized plans, which don't have a filesort object
777+
// yet). It means that sorts won't be correctly forced.
778+
// TODO(sgunders): Print based on the flags and order instead of the
779+
// filesort object, when using the hypergraph join optimizer.
780+
description.push_back("Sort");
781+
children.push_back({path->sort().child});
782+
break;
783+
}
784+
769785
if (path->sort().filesort->using_addon_fields()) {
770786
ret = "Sort";
771787
} else {
@@ -838,8 +854,8 @@ ExplainData ExplainAccessPath(const AccessPath *path, JOIN *join) {
838854
// We don't list the table iterator as an explicit child; we mark it in
839855
// our description instead. (Anything else would look confusingly much
840856
// like a join.)
841-
ExplainData table_explain =
842-
ExplainAccessPath(path->temptable_aggregate().table_path, join);
857+
ExplainData table_explain = ExplainAccessPath(
858+
path->temptable_aggregate().table_path, join, include_costs);
843859
description = move(table_explain.description);
844860
description.emplace_back("Aggregate using temporary table");
845861
children.push_back({path->temptable_aggregate().subquery_path});
@@ -1021,7 +1037,7 @@ ExplainData ExplainAccessPath(const AccessPath *path, JOIN *join) {
10211037
break;
10221038
}
10231039
}
1024-
if (path->num_output_rows >= 0.0) {
1040+
if (include_costs && path->num_output_rows >= 0.0) {
10251041
double first_row_cost;
10261042
if (path->num_output_rows <= 1.0) {
10271043
first_row_cost = path->cost;
@@ -1061,7 +1077,8 @@ ExplainData ExplainAccessPath(const AccessPath *path, JOIN *join) {
10611077
}
10621078
description.back() += str;
10631079
}
1064-
if (current_thd->lex->is_explain_analyze && path->iterator != nullptr) {
1080+
if (include_costs && current_thd->lex->is_explain_analyze &&
1081+
path->iterator != nullptr) {
10651082
if (path->num_output_rows < 0.0) {
10661083
// We always want a double space between the iterator name and the costs.
10671084
description.back().push_back(' ');
@@ -1073,21 +1090,31 @@ ExplainData ExplainAccessPath(const AccessPath *path, JOIN *join) {
10731090
}
10741091

10751092
string PrintQueryPlan(int level, AccessPath *path, JOIN *join,
1076-
bool is_root_of_join) {
1093+
bool is_root_of_join,
1094+
vector<string> *tokens_for_force_subplan) {
10771095
string ret;
10781096

10791097
if (path == nullptr) {
10801098
ret.assign(level * 4, ' ');
10811099
return ret + "<not executable by iterator executor>\n";
10821100
}
10831101

1084-
ExplainData explain = ExplainAccessPath(path, join);
1102+
ExplainData explain = ExplainAccessPath(path, join, /*include_costs=*/true);
10851103

10861104
int top_level = level;
10871105

1106+
bool print_token = (tokens_for_force_subplan != nullptr);
10881107
for (const string &str : explain.description) {
10891108
ret.append(level * 4, ' ');
10901109
ret += "-> ";
1110+
if (print_token) {
1111+
string token = GetForceSubplanToken(path, join);
1112+
ret += '[';
1113+
ret += token;
1114+
ret += "] ";
1115+
print_token = false;
1116+
tokens_for_force_subplan->push_back(move(token));
1117+
}
10911118
ret += str;
10921119
ret += "\n";
10931120
++level;
@@ -1113,10 +1140,11 @@ string PrintQueryPlan(int level, AccessPath *path, JOIN *join,
11131140
ret.append("-> ");
11141141
ret.append(child.description);
11151142
ret.append("\n");
1116-
ret +=
1117-
PrintQueryPlan(level + 1, child.path, subjoin, child_is_root_of_join);
1143+
ret += PrintQueryPlan(level + 1, child.path, subjoin,
1144+
child_is_root_of_join, tokens_for_force_subplan);
11181145
} else {
1119-
ret += PrintQueryPlan(level, child.path, subjoin, child_is_root_of_join);
1146+
ret += PrintQueryPlan(level, child.path, subjoin, child_is_root_of_join,
1147+
tokens_for_force_subplan);
11201148
}
11211149
}
11221150
if (is_root_of_join) {
@@ -1134,8 +1162,42 @@ string PrintQueryPlan(int level, AccessPath *path, JOIN *join,
11341162
ret.append(child.description);
11351163
ret.append("\n");
11361164
ret += PrintQueryPlan(top_level + 1, child.path, child.join,
1137-
/*is_root_of_join=*/true);
1165+
/*is_root_of_join=*/true, tokens_for_force_subplan);
1166+
}
1167+
}
1168+
return ret;
1169+
}
1170+
1171+
// 0x
1172+
// truncated_md5(desc1,desc2,...,[child1_desc:]0xchild1,[child2_desc:]0xchild2,...)
1173+
string GetForceSubplanToken(AccessPath *path, JOIN *join) {
1174+
if (path == nullptr) {
1175+
return "";
1176+
}
1177+
ExplainData explain = ExplainAccessPath(path, join, /*include_costs=*/false);
1178+
1179+
string digest = explain.description[0];
1180+
for (size_t desc_idx = 1; desc_idx < explain.description.size(); ++desc_idx) {
1181+
digest += ',';
1182+
digest += explain.description[desc_idx];
1183+
}
1184+
1185+
for (const ExplainData::Child &child : explain.children) {
1186+
digest += ',';
1187+
if (!child.description.empty()) {
1188+
digest += child.description;
1189+
digest += ':';
11381190
}
1191+
digest += GetForceSubplanToken(child.path, join);
11391192
}
1193+
1194+
unsigned char md5sum[MD5_HASH_SIZE];
1195+
compute_md5_hash(pointer_cast<char *>(md5sum), digest.data(), digest.size());
1196+
1197+
char ret[8 * 2 + 2 + 1];
1198+
snprintf(ret, sizeof(ret), "0x%02x%02x%02x%02x%02x%02x%02x%02x", md5sum[0],
1199+
md5sum[1], md5sum[2], md5sum[3], md5sum[4], md5sum[5], md5sum[6],
1200+
md5sum[7]);
1201+
11401202
return ret;
11411203
}

sql/join_optimizer/explain_access_path.h

+32-2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#define SQL_JOIN_OPTIMIZER_EXPLAIN_ACCESS_PATH_H
2525

2626
#include <string>
27+
#include <vector>
2728

2829
struct AccessPath;
2930
class JOIN;
@@ -34,7 +35,36 @@ class JOIN;
3435
"join" should be set to the JOIN that "path" is part of (or nullptr
3536
if it is not, e.g. if it's part of executing a UNION).
3637
*/
37-
std::string PrintQueryPlan(int level, AccessPath *path, JOIN *join,
38-
bool is_root_of_join);
38+
std::string PrintQueryPlan(
39+
int level, AccessPath *path, JOIN *join, bool is_root_of_join,
40+
std::vector<std::string> *tokens_for_force_subplan = nullptr);
41+
42+
/**
43+
Generate a digest based on the subplan that the given access path
44+
represents. This can be used by developers to force a given subplan,
45+
to investigate e.g. whether a given choice is actually faster in practice,
46+
force-apply a plan from the old join optimizer (or at least the types of
47+
subplans that are ever considered; e.g. aggregation through temporary
48+
tables are not) into the hypergraph join optimizer (to see how it's costed),
49+
or whether a given plan is even generated. If DEBUG contains
50+
force_subplan_0x<token>, subplans with the given token are unconditionally
51+
preferred over all others.
52+
53+
The token returned is “0x<digest>”, where <digest> is the first 64 bits
54+
of the MD5 sum of this string:
55+
56+
desc1,desc2,...,[child1_desc:]0xchild1,[child2_desc:]0xchild2,<more
57+
children>
58+
59+
where desc1, desc2, etc. are the description lines given by EXPLAIN,
60+
and 0xchild1 is the token for children. The normal way to generate such
61+
tokens is to use SET DEBUG='+d,subplan_tokens' and look at the EXPLAIN
62+
FORMAT=tree, but in a pinch, you can also write them by hand and use
63+
md5sum or a similar tool.
64+
65+
Only the hypergraph join optimizer honors token preferences, but EXPLAIN
66+
FORMAT=tree shows computed tokens for both optimizers.
67+
*/
68+
std::string GetForceSubplanToken(AccessPath *path, JOIN *join);
3969

4070
#endif // SQL_JOIN_OPTIMIZER_EXPLAIN_ACCESS_PATH_H

sql/join_optimizer/join_optimizer.cc

+16
Original file line numberDiff line numberDiff line change
@@ -2262,6 +2262,16 @@ enum class PathComparisonResult {
22622262
static inline PathComparisonResult CompareAccessPaths(
22632263
const LogicalOrderings &orderings, const AccessPath &a, const AccessPath &b,
22642264
OrderingSet obsolete_orderings) {
2265+
#ifndef NDEBUG
2266+
// Manual preference overrides everything else.
2267+
// If they're both preferred, tie-break by ordering.
2268+
if (a.forced_by_dbug) {
2269+
return PathComparisonResult::FIRST_DOMINATES;
2270+
} else if (b.forced_by_dbug) {
2271+
return PathComparisonResult::SECOND_DOMINATES;
2272+
}
2273+
#endif
2274+
22652275
bool a_is_better = false, b_is_better = false;
22662276
if (a.cost < b.cost) {
22672277
a_is_better = true;
@@ -2469,6 +2479,12 @@ AccessPath *CostingReceiver::ProposeAccessPath(
24692479
}
24702480
}
24712481

2482+
DBUG_EXECUTE_IF("subplan_tokens", {
2483+
string token =
2484+
"force_subplan_" + GetForceSubplanToken(path, m_query_block->join);
2485+
DBUG_EXECUTE_IF(token.c_str(), path->forced_by_dbug = true;);
2486+
});
2487+
24722488
if (existing_paths->empty()) {
24732489
if (m_trace != nullptr) {
24742490
*m_trace += " - " + PrintCost(*path, m_graph, description_for_trace) +

0 commit comments

Comments
 (0)