@@ -112,12 +112,13 @@ constexpr double kMaterializeOneRowCost = 0.1;
112
112
class CostingReceiver {
113
113
public:
114
114
CostingReceiver (
115
- THD *thd, const JoinHypergraph &graph,
115
+ THD *thd, const JoinHypergraph &graph, bool need_rowid,
116
116
uint64_t supported_access_path_types,
117
117
secondary_engine_modify_access_path_cost_t secondary_engine_cost_hook,
118
118
string *trace)
119
119
: m_thd(thd),
120
120
m_graph (graph),
121
+ m_need_rowid(need_rowid),
121
122
m_supported_access_path_types(supported_access_path_types),
122
123
m_secondary_engine_cost_hook(secondary_engine_cost_hook),
123
124
m_trace(trace) {
@@ -162,6 +163,14 @@ class CostingReceiver {
162
163
// / The graph we are running over.
163
164
const JoinHypergraph &m_graph;
164
165
166
+ // / Whether we will be needing row IDs from our tables, typically for
167
+ // / a later sort. If this happens, derived tables cannot use streaming,
168
+ // / but need an actual materialization, since filesort expects to be
169
+ // / able to go back and ask for a given row. (This is different from
170
+ // / when we need row IDs for weedout, which doesn't preclude streaming.
171
+ // / The hypergraph optimizer does not use weedout.)
172
+ bool m_need_rowid;
173
+
165
174
// / The supported access path types. Access paths of types not in
166
175
// / this set should not be created. It is currently only used to
167
176
// / limit which join types to use, so any bit that does not
@@ -588,9 +597,9 @@ bool CostingReceiver::ProposeTableScan(TABLE *table, int node_idx) {
588
597
// Handled in clear_corr_something_something, not here
589
598
rematerialize = false ;
590
599
}
591
- materialize_path =
592
- GetAccessPathForDerivedTable ( m_thd, tl, table, rematerialize,
593
- /* invalidators=*/ nullptr , path);
600
+ materialize_path = GetAccessPathForDerivedTable (
601
+ m_thd, tl, table, rematerialize,
602
+ /* invalidators=*/ nullptr , m_need_rowid , path);
594
603
}
595
604
596
605
// TODO(sgunders): Take rematerialization cost into account,
@@ -1588,6 +1597,19 @@ AccessPath *FindBestQueryPlan(THD *thd, SELECT_LEX *select_lex, string *trace) {
1588
1597
}
1589
1598
}
1590
1599
1600
+ // Figure out if any later sort will need row IDs.
1601
+ bool need_rowid = false ;
1602
+ if (select_lex->is_explicitly_grouped () || select_lex->is_ordered () ||
1603
+ join->select_distinct ) {
1604
+ for (TABLE_LIST *tl = select_lex->leaf_tables ; tl != nullptr ;
1605
+ tl = tl->next_leaf ) {
1606
+ if (SortWillBeOnRowId (tl->table )) {
1607
+ need_rowid = true ;
1608
+ break ;
1609
+ }
1610
+ }
1611
+ }
1612
+
1591
1613
// Run the actual join optimizer algorithm. This creates an access path
1592
1614
// for the join as a whole (with lowest possible cost, and thus also
1593
1615
// hopefully optimal execution time), with all pushable predicates applied.
@@ -1599,7 +1621,8 @@ AccessPath *FindBestQueryPlan(THD *thd, SELECT_LEX *select_lex, string *trace) {
1599
1621
}
1600
1622
const secondary_engine_modify_access_path_cost_t secondary_engine_cost_hook =
1601
1623
SecondaryEngineCostHook (thd);
1602
- CostingReceiver receiver (thd, graph, SupportedAccessPathTypes (thd),
1624
+ CostingReceiver receiver (thd, graph, need_rowid,
1625
+ SupportedAccessPathTypes (thd),
1603
1626
secondary_engine_cost_hook, trace);
1604
1627
if (EnumerateAllConnectedPartitions (graph.graph , &receiver) &&
1605
1628
!thd->is_error ()) {
0 commit comments