Skip to content

Commit b8e1fd2

Browse files
author
Darko Makreshanski
committed
WL#15730: Lakehouse full query support
- This provides random scan and parallel scan implementation for Lakehouse tables to use MySQL query engine as fallback for queries not supported by the Rapid secondary engine. - It issues a select query on rapid everytime a scan needs to be performed - For random scan support (general MySQL queries), a separate thread is spawned for processing the results from the Rapid secondary engine. This enables pipelining between parsing of results and feeding the MySQL query engine with rows. If the MySQL query finishes the random scan, the Rapid query is aborted. - Implementation of the modules for basic accesses for a lakehouse table by processing the next record or a stored record. - Performance schema statistics per query and per lakehouse table. Show for each query offloaded to lakehouse how many scans it performed and any failure message. Similarly for each loaded lakehouse table that participates in a query offloaded to rapid show the number of started scans, successful scans, failed scans and aborted scans. Change-Id: If54204be287bc689d757dd011f2b5e1bbb124fa4
1 parent 53cc761 commit b8e1fd2

File tree

7 files changed

+73
-55
lines changed

7 files changed

+73
-55
lines changed

sql/handler.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2442,6 +2442,16 @@ using compare_secondary_engine_cost_t = bool (*)(THD *thd, const JOIN &join,
24422442
using secondary_engine_modify_access_path_cost_t = bool (*)(
24432443
THD *thd, const JoinHypergraph &hypergraph, AccessPath *access_path);
24442444

2445+
/**
2446+
Checks whether the tables used in an explain query are loaded in the secondary
2447+
engine.
2448+
@param thd thread context.
2449+
2450+
@retval true if there is a table not loaded to the secondary engine, false
2451+
otherwise
2452+
*/
2453+
using external_engine_explain_check_t = bool (*)(THD *thd);
2454+
24452455
/**
24462456
Looks up and returns a specific secondary engine query offload or exec
24472457
failure reason as a string given a thread context (representing the query)
@@ -2914,6 +2924,12 @@ struct handlerton {
29142924
/// does not support the hypergraph join optimizer.
29152925
SecondaryEngineFlags secondary_engine_flags;
29162926

2927+
/// Pointer to a function that checks if the table is loaded in the
2928+
/// secondary engine in the case of an explain statement.
2929+
///
2930+
/// @see external_engine_explain_check_t for function signature.
2931+
external_engine_explain_check_t external_engine_explain_check;
2932+
29172933
/// Pointer to a function that evaluates the cost of executing an access path
29182934
/// in a secondary storage engine.
29192935
///

sql/sql_base.cc

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6915,14 +6915,6 @@ bool open_tables_for_query(THD *thd, Table_ref *tables, uint flags) {
69156915
&prelocking_strategy))
69166916
goto end;
69176917

6918-
for (Table_ref *tr = tables; tr != nullptr; tr = tr->next_global) {
6919-
if (tr->is_external()) {
6920-
thd->lex->set_execute_only_in_secondary_engine(
6921-
/*execute_only_in_secondary_engine_param=*/true, EXTERNAL);
6922-
break;
6923-
}
6924-
}
6925-
69266918
if (open_secondary_engine_tables(thd, flags)) goto end;
69276919

69286920
return false;

sql/sql_lex.cc

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4344,24 +4344,6 @@ bool LEX::locate_var_assignment(const Name_string &name) {
43444344
return false;
43454345
}
43464346

4347-
/**
4348-
Get the reason why a query cannot be executed in the primary engine.
4349-
4350-
@returns a text representation of the reason the query is not supported in the
4351-
primary engine.
4352-
*/
4353-
const char *LEX::get_not_supported_in_primary_reason() {
4354-
assert(can_execute_only_in_secondary_engine());
4355-
switch (m_execute_only_in_secondary_engine_reason) {
4356-
case CUBE:
4357-
return "CUBE";
4358-
case EXTERNAL:
4359-
return "External engine as primary storage engine";
4360-
default:
4361-
return "UNDEFINED";
4362-
}
4363-
}
4364-
43654347
/**
43664348
Save properties for ORDER clauses so that they can be reconstructed
43674349
for a new optimization of the query block.

sql/sql_lex.h

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3725,7 +3725,7 @@ class LEX_GRANT_AS {
37253725
"execute_only_in_secondary_reasons" retains the explanations for queries that
37263726
cannot be executed using the primary engine.
37273727
*/
3728-
enum execute_only_in_secondary_reasons { SUPPORTED_IN_PRIMARY, CUBE, EXTERNAL };
3728+
enum execute_only_in_secondary_reasons { SUPPORTED_IN_PRIMARY, CUBE };
37293729

37303730
/*
37313731
Some queries can be executed only in using the hypergraph optimizer. The enum
@@ -3873,10 +3873,6 @@ struct LEX : public Query_tables_list {
38733873
/// Leaf table being inserted into (always a base table)
38743874
Table_ref *insert_table_leaf;
38753875

3876-
bool has_external_tables() const {
3877-
return (m_execute_only_in_secondary_engine_reason == EXTERNAL);
3878-
}
3879-
38803876
/** SELECT of CREATE VIEW statement */
38813877
LEX_STRING create_view_query_block;
38823878

@@ -3955,8 +3951,15 @@ struct LEX : public Query_tables_list {
39553951
reason == SUPPORTED_IN_PRIMARY);
39563952
}
39573953

3958-
const char *get_not_supported_in_primary_reason();
3959-
3954+
const char *get_not_supported_in_primary_reason() {
3955+
assert(can_execute_only_in_secondary_engine());
3956+
switch (m_execute_only_in_secondary_engine_reason) {
3957+
case CUBE:
3958+
return "CUBE";
3959+
default:
3960+
return "UNDEFINED";
3961+
}
3962+
}
39603963
bool can_execute_only_in_hypergraph_optimizer() const {
39613964
return m_can_execute_only_in_hypergraph_optimizer;
39623965
}

sql/sql_select.cc

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -377,19 +377,6 @@ std::string_view get_secondary_engine_fail_reason(const LEX *lex) {
377377
return {};
378378
}
379379

380-
void set_external_engine_fail_reason(const LEX *lex, const char *reason) {
381-
if (!lex->thd->is_secondary_engine_forced() && reason != nullptr) {
382-
for (Table_ref *ref = lex->query_tables; ref != nullptr;
383-
ref = ref->next_global) {
384-
if (ref->is_external()) {
385-
ref->table->get_primary_handler()->set_external_table_offload_error(
386-
reason);
387-
break;
388-
}
389-
}
390-
}
391-
}
392-
393380
std::string_view find_secondary_engine_fail_reason(const LEX *lex) {
394381
const auto *hton = get_secondary_engine_handlerton(lex);
395382
if (hton != nullptr &&
@@ -673,6 +660,19 @@ bool Sql_cmd_select::prepare_inner(THD *thd) {
673660
return false;
674661
}
675662

663+
bool has_external_table(const LEX *lex) {
664+
if (lex->m_sql_cmd == nullptr) {
665+
return false;
666+
}
667+
for (Table_ref *ref = lex->query_tables; ref != nullptr;
668+
ref = ref->next_global) {
669+
if (ref->is_external()) {
670+
return true;
671+
}
672+
}
673+
return false;
674+
}
675+
676676
bool Sql_cmd_dml::execute(THD *thd) {
677677
DBUG_TRACE;
678678

@@ -909,8 +909,11 @@ static bool retry_with_secondary_engine(THD *thd) {
909909

910910
// Only attempt to use the secondary engine if the estimated cost of the query
911911
// is higher than the specified cost threshold.
912-
if (thd->m_current_query_cost <=
913-
thd->variables.secondary_engine_cost_threshold) {
912+
// We allow any query to be executed in the secondary_engine when it involves
913+
// external tables.
914+
if (!has_external_table(thd->lex) &&
915+
(thd->m_current_query_cost <=
916+
thd->variables.secondary_engine_cost_threshold)) {
914917
Opt_trace_context *const trace = &thd->opt_trace;
915918
if (trace->is_started()) {
916919
const Opt_trace_object wrapper(trace);
@@ -1000,6 +1003,16 @@ bool Sql_cmd_dml::execute_inner(THD *thd) {
10001003
// We know by now that execution will complete (successful or with error)
10011004
lex->set_exec_completed();
10021005
if (lex->is_explain()) {
1006+
for (Table_ref *ref = lex->query_tables; ref != nullptr;
1007+
ref = ref->next_global) {
1008+
if (ref->table != nullptr && ref->table->file != nullptr) {
1009+
handlerton *hton = ref->table->file->ht;
1010+
if (hton->external_engine_explain_check != nullptr) {
1011+
if (hton->external_engine_explain_check(thd)) return true;
1012+
}
1013+
}
1014+
}
1015+
10031016
if (explain_query(thd, thd, unit)) return true; /* purecov: inspected */
10041017
} else {
10051018
if (unit->execute(thd)) return true;

sql/sql_select.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1060,6 +1060,14 @@ void accumulate_statement_cost(const LEX *lex);
10601060
*/
10611061
const handlerton *get_secondary_engine_handlerton(const LEX *lex);
10621062

1063+
/**
1064+
Checks if any of the tables referenced belong to an external engine.
1065+
If an external table is found, return true, false otherwise.
1066+
1067+
@param lex the statement
1068+
*/
1069+
bool has_external_table(const LEX *lex);
1070+
10631071
/**
10641072
Sets the reason of failure for the statement to the external engine.
10651073

sql/table.cc

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7324,13 +7324,17 @@ uint Table_ref::get_hidden_field_count_for_derived() const {
73247324
}
73257325

73267326
bool Table_ref::is_external() const {
7327-
if (is_placeholder()) return false;
7328-
assert(table != nullptr);
7329-
handler *primary_handler = table->get_primary_handler();
7330-
assert(primary_handler != nullptr);
7331-
7332-
return Overlaps(primary_handler->ht->flags, HTON_SUPPORTS_EXTERNAL_SOURCE) &&
7333-
primary_handler->get_table_share()->has_secondary_engine();
7327+
if (m_table_ref_type == TABLE_REF_BASE_TABLE && table != nullptr &&
7328+
table->file != nullptr) {
7329+
if (is_placeholder()) return false;
7330+
handler *primary_handler = table->get_primary_handler();
7331+
return primary_handler != nullptr &&
7332+
Overlaps(primary_handler->ht->flags,
7333+
HTON_SUPPORTS_EXTERNAL_SOURCE) &&
7334+
primary_handler->get_table_share() != nullptr &&
7335+
primary_handler->get_table_share()->has_secondary_engine();
7336+
}
7337+
return false;
73347338
}
73357339

73367340
void LEX_MFA::copy(LEX_MFA *m, MEM_ROOT *alloc) {

0 commit comments

Comments
 (0)