-
Notifications
You must be signed in to change notification settings - Fork 4k
/
Copy pathsql_planner.h
305 lines (261 loc) · 12.8 KB
/
sql_planner.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
#ifndef SQL_PLANNER_INCLUDED
#define SQL_PLANNER_INCLUDED
/* Copyright (c) 2000, 2025, Oracle and/or its affiliates.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2.0,
as published by the Free Software Foundation.
This program is designed to work with certain software (including
but not limited to OpenSSL) that is licensed under separate terms,
as designated in a particular file or component or in included license
documentation. The authors of MySQL hereby grant you an additional
permission to link the program and your derivative works with the
separately licensed software that they have either included with
the program or referenced in the documentation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License, version 2.0, for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
/**
@file sql/sql_planner.h
Join planner classes.
*/
#include <sys/types.h>
#include "my_inttypes.h"
#include "my_table_map.h"
#include "sql_optimizer.h"
class Cost_model_server;
class JOIN;
class JOIN_TAB;
class Key_use;
class Opt_trace_object;
class THD;
struct TABLE;
class Table_ref;
struct POSITION;
typedef ulonglong nested_join_map;
/**
Find the lateral dependencies of 'tab'.
*/
inline table_map get_lateral_deps(const JOIN_TAB &tab) {
return (tab.table_ref != nullptr && tab.table_ref->is_derived())
? tab.table_ref->derived_query_expression()->m_lateral_deps
: 0;
}
/**
This class determines the optimal join order for tables within
a basic query block, ie a query specification clause, possibly extended
with semi-joined tables from embedded subqueries.
This class takes as prerequisite a join class where all dependencies among
tables have been sorted out, all possible access paths have been
sorted out, and all statistics information has been filled in.
The class has a sole public function that will calculate the most
optimal plan based on the inputs and the environment, such as prune level
and greedy optimizer search depth. For more information, see the
function headers for the private functions greedy_search(),
best_extension_by_limited_search() and eq_ref_extension_by_limited_search().
*/
class Optimize_table_order {
public:
Optimize_table_order(THD *thd_arg, JOIN *join_arg, Table_ref *sjm_nest_arg);
~Optimize_table_order() = default;
/**
Entry point to table join order optimization.
For further description, see class header and private function headers.
@return false if successful, true if error
*/
bool choose_table_order();
void recalculate_lateral_deps(uint first_tab_no);
void recalculate_lateral_deps_incrementally(uint first_tab_no);
private:
THD *const thd; // Pointer to current THD
JOIN *const join; // Pointer to the current plan being developed
const uint search_depth; // Maximum search depth to apply in greedy search
const uint prune_level; // pruning heuristics to be applied
// (0 = EXHAUSTIVE, 1 = PRUNE_BY_TIME_OR_ROWS)
/**
Bitmap of all join nests embedding the last table appended to the current
partial join.
*/
nested_join_map cur_embedding_map;
/**
If non-NULL, we are optimizing a materialized semi-join nest.
If NULL, we are optimizing a complete join plan.
*/
const Table_ref *const emb_sjm_nest;
/**
When calculating a plan for a materialized semi-join nest,
best_access_path() needs to know not only the remaining tables within the
semi-join nest, but also all tables outside of this nest, because there may
be key references between the semi-join nest and the outside tables
that should not be considered when materializing the semi-join nest.
@c excluded_tables tracks these tables.
*/
const table_map excluded_tables;
/*
No need to call advance_sj_state() when
1) there are no semijoin nests or
2) we are optimizing a materialized semijoin nest.
*/
const bool has_sj;
/**
If true, find_best_ref() must go through all keys, no shortcutting
allowed.
*/
bool test_all_ref_keys;
/// True if we found a complete plan using only allowed semijoin strategies.
bool found_plan_with_allowed_sj;
/**
False/true at start/end of choose_table_order().
Helps member functions know if current plan is in join->positions or
join->best_positions.
*/
bool got_final_plan;
/// Set true when we have decided to return with a "good enough" plan.
bool use_best_so_far{false};
inline Key_use *find_best_ref(const JOIN_TAB *tab,
const table_map remaining_tables,
const uint idx, const double prefix_rowcount,
bool *found_condition,
table_map *ref_depends_map,
uint *used_key_parts);
double calculate_scan_cost(const JOIN_TAB *tab, const uint idx,
const Key_use *best_ref,
const double prefix_rowcount,
const bool found_condition,
const bool disable_jbuf,
double *rows_after_filtering,
Opt_trace_object *trace_access_scan);
void best_access_path(JOIN_TAB *tab, const table_map remaining_tables,
const uint idx, bool disable_jbuf,
const double prefix_rowcount, POSITION *pos);
bool semijoin_loosescan_fill_driving_table_position(
const JOIN_TAB *s, table_map remaining_tables, uint idx,
double prefix_rowcount, POSITION *loose_scan_pos);
bool check_interleaving_with_nj(JOIN_TAB *next_tab);
void advance_sj_state(table_map remaining_tables, const JOIN_TAB *tab,
uint idx);
void backout_nj_state(const table_map remaining_tables, const JOIN_TAB *tab);
void optimize_straight_join(table_map join_tables);
bool greedy_search(table_map remaining_tables);
bool best_extension_by_limited_search(table_map remaining_tables, uint idx,
uint current_search_depth);
table_map eq_ref_extension_by_limited_search(table_map remaining_tables,
uint idx,
uint current_search_depth);
bool consider_plan(uint idx, Opt_trace_object *trace_obj);
bool fix_semijoin_strategies();
bool semijoin_firstmatch_loosescan_access_paths(uint first_tab, uint last_tab,
table_map remaining_tables,
bool loosescan,
double *newcount,
double *newcost);
void semijoin_mat_scan_access_paths(uint last_inner_tab, uint last_outer_tab,
table_map remaining_tables,
Table_ref *sjm_nest, double *newcount,
double *newcost);
void semijoin_mat_lookup_access_paths(uint last_inner, Table_ref *sjm_nest,
double *newcount, double *newcost);
void semijoin_dupsweedout_access_paths(uint first_tab, uint last_tab,
double *newcount, double *newcost);
double lateral_derived_cost(const JOIN_TAB *tab, const uint idx,
const double prefix_rowcount,
const Cost_model_server *cost_model);
table_map calculate_lateral_deps_of_final_plan(uint tab_no) const;
bool plan_has_duplicate_tabs() const;
static uint determine_search_depth(uint search_depth, uint table_count);
};
void get_partial_join_cost(JOIN *join, uint n_tables, double *cost_arg,
double *rowcount_arg);
/**
Calculate 'Post read filtering' effect of JOIN::conds for table
'tab'. Only conditions that are not directly involved in the chosen
access method shall be included in the calculation of this 'Post
read filtering' effect.
The function first identifies fields that are directly used by the
access method. This includes columns used by range and ref access types,
and predicates on the identified columns (if any) will not be taken into
account when the filtering effect is calculated.
The function will then calculate the filtering effect of any predicate
that applies to 'tab' and is not depending on the columns used by the
access method. The source of information with highest accuracy is
always preferred and is as follows:
1) Row estimates from the range optimizer
2) Row estimates from index statistics (records per key)
3) Guesstimates
Thus, after identifying columns that are used by the access method,
the function will look for rows estimates made by the range optimizer.
If found, the estimates from the range optimizer are calculated into
the filtering effect.
The function then goes through JOIN::conds to get estimates from any
remaining predicate that applies to 'tab' and does not depend on any
tables that come later in the join sequence. Predicates that depend on
columns that are either used by the access method or used in the row
estimate from the range optimizer will not be considered in this phase.
@param tab The table condition filtering effect is calculated
for
@param keyuse Describes the 'ref' access method (if any) that is
chosen
@param used_tables Tables earlier in the join sequence than 'tab'
@param fanout The number of rows read by the chosen access
method for each row combination of previous tables
@param is_join_buffering Whether or not condition filtering is about
to be calculated for an access method using join
buffering.
@param write_to_trace Whether we should print the filtering effect calculated
by histogram statistics and the final aggregated filtering
effect to optimizer trace.
@param parent_trace The parent trace object where the final aggregated
filtering effect will be printed if "write_to_trace" is
set to true.
@return the 'post read filtering' effect (between 0 and 1) of
JOIN::conds
*/
float calculate_condition_filter(const JOIN_TAB *const tab,
const Key_use *const keyuse,
table_map used_tables, double fanout,
bool is_join_buffering, bool write_to_trace,
Opt_trace_object &parent_trace);
/**
Find the cost for a ref lookup on the given index, assumed to return
“num_rows” rows. The cost will be capped by “worst_seeks”
(see find_worst_seeks()).
*/
double find_cost_for_ref(const THD *thd, TABLE *table, unsigned keyno,
double num_rows, double worst_seeks);
class Join_tab_compare_default {
public:
/**
"Less than" comparison function object used to compare two JOIN_TAB
objects based on a number of factors in this order:
- table before another table that depends on it (straight join,
outer join etc), then
- table before another table that depends on it to use a key
as access method, then
- table with smallest number of records first, then
- the table with lowest-value pointer (i.e., the one located
in the lowest memory address) first.
@param jt1 first JOIN_TAB object
@param jt2 second JOIN_TAB object
@note The order relation implemented by Join_tab_compare_default is not
transitive, i.e. it is possible to choose a, b and c such that
(a @< b) && (b @< c) but (c @< a). This is the case in the
following example:
a: dependent = @<none@> found_records = 3
b: dependent = @<none@> found_records = 4
c: dependent = b found_records = 2
a @< b: because a has fewer records
b @< c: because c depends on b (e.g outer join dependency)
c @< a: because c has fewer records
This implies that the result of a sort using the relation
implemented by Join_tab_compare_default () depends on the order in
which elements are compared, i.e. the result is
implementation-specific.
@return
true if jt1 is smaller than jt2, false otherwise
*/
bool operator()(const JOIN_TAB *jt1, const JOIN_TAB *jt2) const;
};
#endif /* SQL_PLANNER_INCLUDED */