From 8ea2f846d263a7958def001933b34dbebfd28472 Mon Sep 17 00:00:00 2001 From: yixinglu <2520865+yixinglu@users.noreply.github.com> Date: Sat, 3 Dec 2022 11:09:34 +0800 Subject: [PATCH 01/14] Push filter down hash inner join --- src/graph/context/Symbols.cpp | 26 +-- src/graph/context/Symbols.h | 6 - src/graph/optimizer/CMakeLists.txt | 1 + src/graph/optimizer/OptGroup.cpp | 22 +- src/graph/optimizer/OptGroup.h | 16 ++ src/graph/optimizer/OptRule.cpp | 32 ++- src/graph/optimizer/OptRule.h | 10 +- src/graph/optimizer/Optimizer.cpp | 5 + src/graph/optimizer/Optimizer.h | 2 + .../rule/PushFilterDownAggregateRule.cpp | 12 +- .../rule/PushFilterDownHashInnerJoinRule.cpp | 133 ++++++++++++ .../rule/PushFilterDownHashInnerJoinRule.h | 37 ++++ .../rule/PushFilterDownInnerJoinRule.cpp | 22 +- .../rule/PushFilterDownLeftJoinRule.cpp | 22 +- .../rule/PushFilterDownProjectRule.cpp | 41 ++-- .../rule/PushFilterDownProjectRule.h | 2 + .../optimizer/rule/RemoveNoopProjectRule.cpp | 51 +++-- .../optimizer/rule/RemoveNoopProjectRule.h | 4 +- src/graph/planner/match/ArgumentFinder.cpp | 3 - src/graph/planner/plan/PlanNode.cpp | 15 +- src/graph/planner/plan/PlanNode.h | 2 + src/graph/util/ExpressionUtils.cpp | 18 ++ src/graph/util/ExpressionUtils.h | 7 +- .../features/match/AllShortestPaths.feature | 14 ++ .../match/MultiLineMultiQueryParts.feature | 21 +- .../PushFilterDownHashInnerJoinRule.feature | 205 ++++++++++++++++++ 26 files changed, 588 insertions(+), 141 deletions(-) create mode 100644 src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.cpp create mode 100644 src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.h create mode 100644 tests/tck/features/optimizer/PushFilterDownHashInnerJoinRule.feature diff --git a/src/graph/context/Symbols.cpp b/src/graph/context/Symbols.cpp index e360ee897d1..83a493715b2 100644 --- a/src/graph/context/Symbols.cpp +++ b/src/graph/context/Symbols.cpp @@ -81,14 +81,6 @@ bool SymbolTable::deleteWrittenBy(const std::string& varName, PlanNode* node) { if (var == vars_.end()) { return false; } - for (auto& alias : var->second->colNames) { - auto found = aliasGeneratedBy_.find(alias); - if (found != aliasGeneratedBy_.end()) { - if (found->second == varName) { - aliasGeneratedBy_.erase(alias); - } - } - } var->second->writtenBy.erase(node); return true; } @@ -106,6 +98,7 @@ bool SymbolTable::updateWrittenBy(const std::string& oldVar, } Variable* SymbolTable::getVar(const std::string& varName) { + DCHECK(!varName.empty()) << "the variable name is empty"; auto var = vars_.find(varName); if (var == vars_.end()) { return nullptr; @@ -114,22 +107,5 @@ Variable* SymbolTable::getVar(const std::string& varName) { } } -void SymbolTable::setAliasGeneratedBy(const std::vector& aliases, - const std::string& varName) { - for (auto& alias : aliases) { - if (aliasGeneratedBy_.count(alias) == 0) { - aliasGeneratedBy_.emplace(alias, varName); - } - } -} - -StatusOr SymbolTable::getAliasGeneratedBy(const std::string& alias) { - auto found = aliasGeneratedBy_.find(alias); - if (found == aliasGeneratedBy_.end()) { - return Status::Error("Not found a variable that generates the alias: %s", alias.c_str()); - } else { - return found->second; - } -} } // namespace graph } // namespace nebula diff --git a/src/graph/context/Symbols.h b/src/graph/context/Symbols.h index fb30d957770..bcf3b44b815 100644 --- a/src/graph/context/Symbols.h +++ b/src/graph/context/Symbols.h @@ -72,10 +72,6 @@ class SymbolTable final { Variable* getVar(const std::string& varName); - void setAliasGeneratedBy(const std::vector& aliases, const std::string& varName); - - StatusOr getAliasGeneratedBy(const std::string& alias); - std::string toString() const; private: @@ -85,8 +81,6 @@ class SymbolTable final { ExecutionContext* ectx_{nullptr}; // var name -> variable std::unordered_map vars_; - // alias -> first variable that generate the alias - std::unordered_map aliasGeneratedBy_; }; } // namespace graph diff --git a/src/graph/optimizer/CMakeLists.txt b/src/graph/optimizer/CMakeLists.txt index f5ec5cc28c1..cae7300d687 100644 --- a/src/graph/optimizer/CMakeLists.txt +++ b/src/graph/optimizer/CMakeLists.txt @@ -27,6 +27,7 @@ nebula_add_library( rule/PushFilterDownAggregateRule.cpp rule/PushFilterDownProjectRule.cpp rule/PushFilterDownLeftJoinRule.cpp + rule/PushFilterDownHashInnerJoinRule.cpp rule/PushFilterDownInnerJoinRule.cpp rule/PushFilterDownNodeRule.cpp rule/PushFilterDownScanVerticesRule.cpp diff --git a/src/graph/optimizer/OptGroup.cpp b/src/graph/optimizer/OptGroup.cpp index 5233481d582..2b68497d229 100644 --- a/src/graph/optimizer/OptGroup.cpp +++ b/src/graph/optimizer/OptGroup.cpp @@ -96,7 +96,7 @@ Status OptGroup::explore(const OptRule *rule) { // inconsistent. For now, let the optimization rules themselves guarantee correctness. if (result.eraseAll) { for (auto gnode : groupNodes_) { - gnode->node()->releaseSymbols(); + gnode->release(); } groupNodes_.clear(); for (auto ngn : result.newGroupNodes) { @@ -114,7 +114,7 @@ Status OptGroup::explore(const OptRule *rule) { } if (result.eraseCurr) { - (*iter)->node()->releaseSymbols(); + (*iter)->release(); iter = groupNodes_.erase(iter); } else { ++iter; @@ -159,6 +159,17 @@ const PlanNode *OptGroup::getPlan() const { return minGroupNode->getPlan(); } +void OptGroup::deleteRefGroupNode(const OptGroupNode *node) { + groupNodesReferenced_.erase(node); + if (groupNodesReferenced_.empty()) { + // Cleanup all opt group nodes in current opt group if it's NOT referenced by any other opt + // group nodes + for (auto *n : groupNodes_) { + n->release(); + } + } +} + OptGroupNode *OptGroupNode::create(OptContext *ctx, PlanNode *node, const OptGroup *group) { auto optGNode = ctx->objPool()->makeAndAdd(node, group); ctx->addPlanNodeAndOptGroupNode(node->id(), optGNode); @@ -224,5 +235,12 @@ const PlanNode *OptGroupNode::getPlan() const { return node_; } +void OptGroupNode::release() { + node_->releaseSymbols(); + for (auto *dep : dependencies_) { + dep->deleteRefGroupNode(this); + } +} + } // namespace opt } // namespace nebula diff --git a/src/graph/optimizer/OptGroup.h b/src/graph/optimizer/OptGroup.h index 276bcc75bb8..71e96921b94 100644 --- a/src/graph/optimizer/OptGroup.h +++ b/src/graph/optimizer/OptGroup.h @@ -48,6 +48,12 @@ class OptGroup final { return outputVar_; } + void addRefGroupNode(const OptGroupNode *node) { + groupNodesReferenced_.insert(node); + } + + void deleteRefGroupNode(const OptGroupNode *node); + private: friend ObjectPool; explicit OptGroup(OptContext *ctx) noexcept; @@ -61,6 +67,9 @@ class OptGroup final { std::vector exploredRules_; // The output variable should be same across the whole group. std::string outputVar_; + + // Save the OptGroupNode which references this OptGroup + std::unordered_set groupNodesReferenced_; }; class OptGroupNode final { @@ -69,6 +78,7 @@ class OptGroupNode final { void dependsOn(OptGroup *dep) { dependencies_.emplace_back(dep); + dep->addRefGroupNode(this); } const std::vector &dependencies() const { @@ -77,6 +87,9 @@ class OptGroupNode final { void setDeps(std::vector deps) { dependencies_ = deps; + for (auto *dep : deps) { + dep->addRefGroupNode(this); + } } void addBody(OptGroup *body) { @@ -109,6 +122,9 @@ class OptGroupNode final { double getCost() const; const graph::PlanNode *getPlan() const; + // Release the opt group node from its opt group + void release(); + private: friend ObjectPool; OptGroupNode(graph::PlanNode *node, const OptGroup *group) noexcept; diff --git a/src/graph/optimizer/OptRule.cpp b/src/graph/optimizer/OptRule.cpp index 1b7eab7ecc9..68b934f1686 100644 --- a/src/graph/optimizer/OptRule.cpp +++ b/src/graph/optimizer/OptRule.cpp @@ -17,8 +17,13 @@ namespace nebula { namespace opt { const PlanNode *MatchedResult::planNode(const std::vector &pos) const { + auto *pnode = DCHECK_NOTNULL(result(pos).node)->node(); + return DCHECK_NOTNULL(pnode); +} + +const MatchedResult &MatchedResult::result(const std::vector &pos) const { if (pos.empty()) { - return DCHECK_NOTNULL(node)->node(); + return *this; } DCHECK_EQ(pos[0], 0); @@ -28,7 +33,7 @@ const PlanNode *MatchedResult::planNode(const std::vector &pos) const { DCHECK_LT(pos[i], result->dependencies.size()); result = &result->dependencies[pos[i]]; } - return DCHECK_NOTNULL(result->node)->node(); + return *DCHECK_NOTNULL(result); } void MatchedResult::collectBoundary(std::vector &boundary) const { @@ -93,8 +98,9 @@ bool OptRule::TransformResult::checkDataFlow(const std::vector &boun }); } -/*static*/ bool OptRule::TransformResult::checkDataFlow(const OptGroupNode *groupNode, - const std::vector &boundary) { +/*static*/ +bool OptRule::TransformResult::checkDataFlow(const OptGroupNode *groupNode, + const std::vector &boundary) { const auto &deps = groupNode->dependencies(); // reach the boundary if (std::all_of(deps.begin(), deps.end(), [&boundary](OptGroup *dep) { @@ -110,13 +116,26 @@ bool OptRule::TransformResult::checkDataFlow(const std::vector &boun const auto *node = groupNode->node(); if (node->inputVars().size() == deps.size()) { // Don't check when count of dependencies is different from count of input variables + auto checkNumReadBy = [](const graph::Variable *v) -> bool { + switch (v->readBy.size()) { + case 1: + return true; + case 2: + // There is at least one Argument plan node if this variable read by 2 other nodes + return std::any_of(v->readBy.begin(), v->readBy.end(), [](auto *n) { + return n->kind() == graph::PlanNode::Kind::kArgument; + }); + default: + return false; + } + }; for (std::size_t i = 0; i < deps.size(); i++) { const OptGroup *dep = deps[i]; if (node->inputVar(i) != dep->outputVar()) { return false; } // Only use by father plan node - if (node->inputVars()[i]->readBy.size() != 1) { + if (!checkNumReadBy(node->inputVars()[i])) { return false; } return std::all_of( @@ -159,7 +178,8 @@ bool OptRule::checkDataflowDeps(OptContext *ctx, if (!isRoot) { for (auto pnode : outVar->readBy) { auto optGNode = ctx->findOptGroupNodeByPlanNodeId(pnode->id()); - if (!optGNode) continue; + // Ignore the data dependency introduced by Argument plan node + if (!optGNode || optGNode->node()->kind() == graph::PlanNode::Kind::kArgument) continue; const auto &deps = optGNode->dependencies(); auto found = std::find(deps.begin(), deps.end(), node->group()); if (found == deps.end()) { diff --git a/src/graph/optimizer/OptRule.h b/src/graph/optimizer/OptRule.h index f19013e7489..80eaf78fe8e 100644 --- a/src/graph/optimizer/OptRule.h +++ b/src/graph/optimizer/OptRule.h @@ -40,6 +40,7 @@ struct MatchedResult { // {0, 1, 0} | this->dependencies[1].dependencies[0] // {0, 1, 0, 1} | this->dependencies[1].dependencies[0].dependencies[1] const graph::PlanNode *planNode(const std::vector &pos = {}) const; + const MatchedResult &result(const std::vector &pos = {}) const; void collectBoundary(std::vector &boundary) const; }; @@ -52,8 +53,13 @@ class MatchNode { : node_(std::move(kinds)) {} bool match(const graph::PlanNode *node) const { - auto find = node_.find(node->kind()); - return find != node_.end(); + for (auto kind : node_) { + // Any plan node is expected if the kind of node is unknown + if (kind == node->kind() || kind == graph::PlanNode::Kind::kUnknown) { + return true; + } + } + return false; } private: diff --git a/src/graph/optimizer/Optimizer.cpp b/src/graph/optimizer/Optimizer.cpp index 7427ad76d4c..e9e26f5a24e 100644 --- a/src/graph/optimizer/Optimizer.cpp +++ b/src/graph/optimizer/Optimizer.cpp @@ -125,5 +125,10 @@ void Optimizer::addBodyToGroupNode(OptContext *ctx, gnode->addBody(body); } +// static +Status rewriteArgumentInputVar(PlanNode *root) { + return Status::OK(); +} + } // namespace opt } // namespace nebula diff --git a/src/graph/optimizer/Optimizer.h b/src/graph/optimizer/Optimizer.h index b09a48ef4a8..9b1c2039706 100644 --- a/src/graph/optimizer/Optimizer.h +++ b/src/graph/optimizer/Optimizer.h @@ -47,6 +47,8 @@ class Optimizer final { OptGroupNode *gnode, std::unordered_map *visited); + static Status rewriteArgumentInputVar(PlanNode *root); + static constexpr int8_t kMaxIterationRound = 5; std::vector ruleSets_; diff --git a/src/graph/optimizer/rule/PushFilterDownAggregateRule.cpp b/src/graph/optimizer/rule/PushFilterDownAggregateRule.cpp index 315b9237d3d..b15ae22f350 100644 --- a/src/graph/optimizer/rule/PushFilterDownAggregateRule.cpp +++ b/src/graph/optimizer/rule/PushFilterDownAggregateRule.cpp @@ -52,20 +52,16 @@ StatusOr PushFilterDownAggregateRule::transform( if (varProps.empty()) { return TransformResult::noTransform(); } - std::vector propNames; - for (auto* expr : varProps) { - DCHECK_EQ(expr->kind(), Expression::Kind::kVarProperty); - propNames.emplace_back(static_cast(expr)->prop()); - } std::unordered_map rewriteMap; auto colNames = newAggNode->colNames(); DCHECK_EQ(newAggNode->colNames().size(), newAggNode->groupItems().size()); for (size_t i = 0; i < colNames.size(); ++i) { auto& colName = colNames[i]; - auto iter = std::find_if(propNames.begin(), propNames.end(), [&colName](const auto& name) { - return !colName.compare(name); + auto iter = std::find_if(varProps.begin(), varProps.end(), [&colName](const auto* expr) { + DCHECK_EQ(expr->kind(), Expression::Kind::kVarProperty); + return !colName.compare(static_cast(expr)->prop()); }); - if (iter == propNames.end()) continue; + if (iter == varProps.end()) continue; if (graph::ExpressionUtils::findAny(groupItems[i], {Expression::Kind::kAggregate})) { return TransformResult::noTransform(); } diff --git a/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.cpp b/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.cpp new file mode 100644 index 00000000000..b5377aa4ecb --- /dev/null +++ b/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.cpp @@ -0,0 +1,133 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include "graph/optimizer/rule/PushFilterDownHashInnerJoinRule.h" + +#include "graph/optimizer/OptContext.h" +#include "graph/optimizer/OptGroup.h" +#include "graph/planner/plan/PlanNode.h" +#include "graph/planner/plan/Query.h" +#include "graph/util/ExpressionUtils.h" + +using nebula::graph::PlanNode; +using nebula::graph::QueryContext; + +namespace nebula { +namespace opt { + +std::unique_ptr PushFilterDownHashInnerJoinRule::kInstance = + std::unique_ptr(new PushFilterDownHashInnerJoinRule()); + +PushFilterDownHashInnerJoinRule::PushFilterDownHashInnerJoinRule() { + RuleSet::QueryRules().addRule(this); +} + +const Pattern& PushFilterDownHashInnerJoinRule::pattern() const { + static Pattern pattern = Pattern::create( + PlanNode::Kind::kFilter, + {Pattern::create( + PlanNode::Kind::kBiInnerJoin, + {Pattern::create(PlanNode::Kind::kUnknown), Pattern::create(PlanNode::Kind::kUnknown)})}); + return pattern; +} + +StatusOr PushFilterDownHashInnerJoinRule::transform( + OptContext* octx, const MatchedResult& matched) const { + auto* filterGroupNode = matched.node; + auto* oldFilterNode = filterGroupNode->node(); + DCHECK_EQ(oldFilterNode->kind(), PlanNode::Kind::kFilter); + + auto* innerJoinNode = matched.planNode({0, 0}); + DCHECK_EQ(innerJoinNode->kind(), PlanNode::Kind::kBiInnerJoin); + auto* oldInnerJoinNode = static_cast(innerJoinNode); + + const auto* condition = static_cast(oldFilterNode)->condition(); + DCHECK(condition); + + const auto& leftResult = matched.result({0, 0, 0}); + const auto& rightResult = matched.result({0, 0, 1}); + + Expression *leftFilterUnpicked = nullptr, *rightFilterUnpicked = nullptr; + OptGroup* leftGroup = pushFilterDownChild(octx, leftResult, condition, &leftFilterUnpicked); + OptGroup* rightGroup = + pushFilterDownChild(octx, rightResult, leftFilterUnpicked, &rightFilterUnpicked); + + if (!leftGroup && !rightGroup) { + return TransformResult::noTransform(); + } + + leftGroup = leftGroup ? leftGroup : const_cast(leftResult.node->group()); + rightGroup = rightGroup ? rightGroup : const_cast(rightResult.node->group()); + + // produce new InnerJoin node + auto* newInnerJoinNode = static_cast(oldInnerJoinNode->clone()); + auto newJoinGroup = rightFilterUnpicked ? OptGroup::create(octx) : filterGroupNode->group(); + auto newGroupNode = OptGroupNode::create(octx, newInnerJoinNode, newJoinGroup); + newGroupNode->dependsOn(leftGroup); + newGroupNode->dependsOn(rightGroup); + newInnerJoinNode->setLeftVar(leftGroup->outputVar()); + newInnerJoinNode->setRightVar(rightGroup->outputVar()); + + if (rightFilterUnpicked) { + auto newFilterNode = graph::Filter::make(octx->qctx(), nullptr, rightFilterUnpicked); + newFilterNode->setOutputVar(oldFilterNode->outputVar()); + newFilterNode->setColNames(oldFilterNode->colNames()); + newFilterNode->setInputVar(newInnerJoinNode->outputVar()); + newGroupNode = OptGroupNode::create(octx, newFilterNode, filterGroupNode->group()); + newGroupNode->dependsOn(const_cast(newJoinGroup)); + } else { + newInnerJoinNode->setOutputVar(oldFilterNode->outputVar()); + newInnerJoinNode->setColNames(oldFilterNode->colNames()); + } + + TransformResult result; + result.eraseAll = true; + result.newGroupNodes.emplace_back(newGroupNode); + return result; +} + +OptGroup* PushFilterDownHashInnerJoinRule::pushFilterDownChild(OptContext* octx, + const MatchedResult& child, + const Expression* condition, + Expression** unpickedFilter) { + if (!condition) return nullptr; + + const auto* childPlanNode = DCHECK_NOTNULL(child.node->node()); + const auto& colNames = childPlanNode->colNames(); + + // split the `condition` based on whether the varPropExpr comes from the left child + auto picker = [&colNames](const Expression* e) -> bool { + return graph::ExpressionUtils::checkVarPropIfExist(colNames, e); + }; + + Expression* filterPicked = nullptr; + graph::ExpressionUtils::splitFilter(condition, picker, &filterPicked, unpickedFilter); + if (!filterPicked) return nullptr; + + auto* newChildPlanNode = childPlanNode->clone(); + DCHECK_NE(childPlanNode->outputVar(), newChildPlanNode->outputVar()); + newChildPlanNode->setInputVar(childPlanNode->inputVar()); + newChildPlanNode->setColNames(childPlanNode->colNames()); + auto* newChildGroup = OptGroup::create(octx); + auto* newChildGroupNode = newChildGroup->makeGroupNode(newChildPlanNode); + for (auto* g : child.node->dependencies()) { + newChildGroupNode->dependsOn(g); + } + + auto* newFilterNode = graph::Filter::make(octx->qctx(), nullptr, filterPicked); + newFilterNode->setOutputVar(childPlanNode->outputVar()); + newFilterNode->setColNames(colNames); + newFilterNode->setInputVar(newChildPlanNode->outputVar()); + auto* group = OptGroup::create(octx); + group->makeGroupNode(newFilterNode)->dependsOn(newChildGroup); + return group; +} + +std::string PushFilterDownHashInnerJoinRule::toString() const { + return "PushFilterDownHashInnerJoinRule"; +} + +} // namespace opt +} // namespace nebula diff --git a/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.h b/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.h new file mode 100644 index 00000000000..f8fb0976337 --- /dev/null +++ b/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#ifndef GRAPH_OPTIMIZER_RULE_PUSHFILTERDOWNHASHINNERJOINRULE_H_ +#define GRAPH_OPTIMIZER_RULE_PUSHFILTERDOWNHASHINNERJOINRULE_H_ + +#include "graph/optimizer/OptRule.h" + +namespace nebula { +namespace opt { + +// Push down the filter items from the left subplan of [[BiInnerJoin]] +class PushFilterDownHashInnerJoinRule final : public OptRule { + public: + const Pattern &pattern() const override; + + StatusOr transform(OptContext *octx, + const MatchedResult &matched) const override; + + std::string toString() const override; + + private: + PushFilterDownHashInnerJoinRule(); + static OptGroup *pushFilterDownChild(OptContext *octx, + const MatchedResult &child, + const Expression *condition, + Expression **unpickedFilter); + + static std::unique_ptr kInstance; +}; + +} // namespace opt +} // namespace nebula + +#endif // GRAPH_OPTIMIZER_RULE_PUSHFILTERDOWNHASHINNERJOINRULE_H_ diff --git a/src/graph/optimizer/rule/PushFilterDownInnerJoinRule.cpp b/src/graph/optimizer/rule/PushFilterDownInnerJoinRule.cpp index 566d5d1be1c..dabf3105a07 100644 --- a/src/graph/optimizer/rule/PushFilterDownInnerJoinRule.cpp +++ b/src/graph/optimizer/rule/PushFilterDownInnerJoinRule.cpp @@ -47,27 +47,9 @@ StatusOr PushFilterDownInnerJoinRule::transform( auto symTable = octx->qctx()->symTable(); std::vector leftVarColNames = symTable->getVar(leftVar.first)->colNames; - // split the `condition` based on whether the varPropExpr comes from the left - // child + // split the `condition` based on whether the varPropExpr comes from the left child auto picker = [&leftVarColNames](const Expression* e) -> bool { - auto varProps = graph::ExpressionUtils::collectAll(e, {Expression::Kind::kVarProperty}); - if (varProps.empty()) { - return false; - } - std::vector propNames; - for (auto* expr : varProps) { - DCHECK(expr->kind() == Expression::Kind::kVarProperty); - propNames.emplace_back(static_cast(expr)->prop()); - } - for (auto prop : propNames) { - auto iter = std::find_if(leftVarColNames.begin(), - leftVarColNames.end(), - [&prop](std::string item) { return !item.compare(prop); }); - if (iter == leftVarColNames.end()) { - return false; - } - } - return true; + return graph::ExpressionUtils::checkVarPropIfExist(leftVarColNames, e); }; Expression* filterPicked = nullptr; Expression* filterUnpicked = nullptr; diff --git a/src/graph/optimizer/rule/PushFilterDownLeftJoinRule.cpp b/src/graph/optimizer/rule/PushFilterDownLeftJoinRule.cpp index d6c830be679..73c85c057f9 100644 --- a/src/graph/optimizer/rule/PushFilterDownLeftJoinRule.cpp +++ b/src/graph/optimizer/rule/PushFilterDownLeftJoinRule.cpp @@ -47,27 +47,9 @@ StatusOr PushFilterDownLeftJoinRule::transform( auto symTable = octx->qctx()->symTable(); std::vector leftVarColNames = symTable->getVar(leftVar.first)->colNames; - // split the `condition` based on whether the varPropExpr comes from the left - // child + // split the `condition` based on whether the varPropExpr comes from the left child auto picker = [&leftVarColNames](const Expression* e) -> bool { - auto varProps = graph::ExpressionUtils::collectAll(e, {Expression::Kind::kVarProperty}); - if (varProps.empty()) { - return false; - } - std::vector propNames; - for (auto* expr : varProps) { - DCHECK(expr->kind() == Expression::Kind::kVarProperty); - propNames.emplace_back(static_cast(expr)->prop()); - } - for (auto prop : propNames) { - auto iter = std::find_if(leftVarColNames.begin(), - leftVarColNames.end(), - [&prop](std::string item) { return !item.compare(prop); }); - if (iter == leftVarColNames.end()) { - return false; - } - } - return true; + return graph::ExpressionUtils::checkVarPropIfExist(leftVarColNames, e); }; Expression* filterPicked = nullptr; Expression* filterUnpicked = nullptr; diff --git a/src/graph/optimizer/rule/PushFilterDownProjectRule.cpp b/src/graph/optimizer/rule/PushFilterDownProjectRule.cpp index a15872b57b3..9aaf11d2bdf 100644 --- a/src/graph/optimizer/rule/PushFilterDownProjectRule.cpp +++ b/src/graph/optimizer/rule/PushFilterDownProjectRule.cpp @@ -24,6 +24,19 @@ PushFilterDownProjectRule::PushFilterDownProjectRule() { RuleSet::QueryRules().addRule(this); } +bool PushFilterDownProjectRule::checkColumnExprKind(const Expression* expr) { + if (graph::ExpressionUtils::isPropertyExpr(expr)) { + return true; + } + if (expr->kind() == Expression::Kind::kSubscript) { + auto subscriptExpr = static_cast(expr); + if (graph::ExpressionUtils::isPropertyExpr(subscriptExpr->left())) { + return true; + } + } + return false; +} + const Pattern& PushFilterDownProjectRule::pattern() const { static Pattern pattern = Pattern::create(graph::PlanNode::Kind::kFilter, {Pattern::create(graph::PlanNode::Kind::kProject)}); @@ -48,28 +61,26 @@ StatusOr PushFilterDownProjectRule::transform( // Pick the passthrough expression items to avoid the expression overhead after filter pushdown auto picker = [&projColumns, &projColNames, &rewriteMap](const Expression* e) -> bool { auto varProps = graph::ExpressionUtils::collectAll(e, - {Expression::Kind::kTagProperty, - Expression::Kind::kEdgeProperty, - Expression::Kind::kInputProperty, - Expression::Kind::kVarProperty, - Expression::Kind::kDstProperty, - Expression::Kind::kSrcProperty}); + { + Expression::Kind::kTagProperty, + Expression::Kind::kEdgeProperty, + Expression::Kind::kInputProperty, + Expression::Kind::kVarProperty, + Expression::Kind::kDstProperty, + Expression::Kind::kSrcProperty, + }); if (varProps.empty()) { return false; } - std::vector propNames; - for (auto* expr : varProps) { - DCHECK(graph::ExpressionUtils::isPropertyExpr(expr)); - propNames.emplace_back(static_cast(expr)->prop()); - } for (size_t i = 0; i < projColNames.size(); ++i) { auto column = projColumns[i]; auto colName = projColNames[i]; - auto iter = std::find_if(propNames.begin(), propNames.end(), [&colName](const auto& name) { - return !colName.compare(name); + auto iter = std::find_if(varProps.begin(), varProps.end(), [&colName](const auto* expr) { + DCHECK(graph::ExpressionUtils::isPropertyExpr(expr)); + return !colName.compare(static_cast(expr)->prop()); }); - if (iter == propNames.end()) continue; - if (graph::ExpressionUtils::isPropertyExpr(column->expr())) { + if (iter == varProps.end()) continue; + if (checkColumnExprKind(column->expr())) { rewriteMap[colName] = column->expr(); continue; } else { diff --git a/src/graph/optimizer/rule/PushFilterDownProjectRule.h b/src/graph/optimizer/rule/PushFilterDownProjectRule.h index d81d4f80dcd..c69951216d1 100644 --- a/src/graph/optimizer/rule/PushFilterDownProjectRule.h +++ b/src/graph/optimizer/rule/PushFilterDownProjectRule.h @@ -59,6 +59,8 @@ class PushFilterDownProjectRule final : public OptRule { private: PushFilterDownProjectRule(); + static bool checkColumnExprKind(const Expression *expr); + static std::unique_ptr kInstance; }; diff --git a/src/graph/optimizer/rule/RemoveNoopProjectRule.cpp b/src/graph/optimizer/rule/RemoveNoopProjectRule.cpp index ce3b88b2b81..19636b6ad27 100644 --- a/src/graph/optimizer/rule/RemoveNoopProjectRule.cpp +++ b/src/graph/optimizer/rule/RemoveNoopProjectRule.cpp @@ -16,7 +16,8 @@ using nebula::graph::QueryContext; namespace nebula { namespace opt { -/*static*/ const std::initializer_list RemoveNoopProjectRule::kQueries{ +/*static*/ +const std::unordered_set RemoveNoopProjectRule::kQueries{ PlanNode::Kind::kGetNeighbors, PlanNode::Kind::kGetVertices, PlanNode::Kind::kGetEdges, @@ -61,7 +62,8 @@ namespace opt { PlanNode::Kind::kHashInnerJoin, PlanNode::Kind::kCrossJoin, PlanNode::Kind::kRollUpApply, - PlanNode::Kind::kArgument}; + PlanNode::Kind::kArgument, +}; std::unique_ptr RemoveNoopProjectRule::kInstance = std::unique_ptr(new RemoveNoopProjectRule()); @@ -72,21 +74,24 @@ RemoveNoopProjectRule::RemoveNoopProjectRule() { const Pattern& RemoveNoopProjectRule::pattern() const { static Pattern pattern = - Pattern::create(graph::PlanNode::Kind::kProject, {Pattern::create(kQueries)}); + Pattern::create(graph::PlanNode::Kind::kProject, {Pattern::create(PlanNode::Kind::kUnknown)}); return pattern; } StatusOr RemoveNoopProjectRule::transform( OptContext* octx, const MatchedResult& matched) const { - const auto* projGroupNode = matched.node; - const auto* oldProjNode = projGroupNode->node(); - const auto* projGroup = projGroupNode->group(); + const auto* oldProjNode = matched.planNode({0}); DCHECK_EQ(oldProjNode->kind(), PlanNode::Kind::kProject); - const auto* depGroupNode = matched.dependencies.front().node; - auto* newNode = depGroupNode->node()->clone(); + auto* projGroup = const_cast(matched.node->group()); + + const auto* depPlanNode = matched.planNode({0, 0}); + auto* newNode = depPlanNode->clone(); newNode->setOutputVar(oldProjNode->outputVar()); + newNode->setColNames(oldProjNode->colNames()); + newNode->setInputVar(depPlanNode->inputVar()); auto* newGroupNode = OptGroupNode::create(octx, newNode, projGroup); - newGroupNode->setDeps(depGroupNode->dependencies()); + newGroupNode->setDeps(matched.result({0, 0}).node->dependencies()); + TransformResult result; result.eraseAll = true; result.newGroupNodes.emplace_back(newGroupNode); @@ -103,27 +108,29 @@ bool RemoveNoopProjectRule::match(OptContext* octx, const MatchedResult& matched DCHECK_EQ(projGroupNode->node()->kind(), PlanNode::Kind::kProject); auto* projNode = static_cast(projGroupNode->node()); - std::vector cols = projNode->columns()->columns(); - for (auto* col : cols) { - if (col->expr()->kind() != Expression::Kind::kVarProperty && - col->expr()->kind() != Expression::Kind::kInputProperty) { - return false; - } + + const auto* depNode = matched.planNode({0, 0}); + if (kQueries.find(depNode->kind()) == kQueries.end()) { + return false; } - const auto* depGroupNode = matched.dependencies.front().node; - const auto* depNode = depGroupNode->node(); + const auto& depColNames = depNode->colNames(); const auto& projColNames = projNode->colNames(); - auto colsNum = depColNames.size(); - if (colsNum != projColNames.size()) { + auto numCols = depColNames.size(); + if (numCols != projColNames.size()) { return false; } - for (size_t i = 0; i < colsNum; ++i) { - if (depColNames[i].compare(projColNames[i])) { + + std::vector cols = projNode->columns()->columns(); + for (size_t i = 0; i < numCols; ++i) { + const auto* expr = DCHECK_NOTNULL(cols[i]->expr()); + if (expr->kind() != Expression::Kind::kVarProperty && + expr->kind() != Expression::Kind::kInputProperty) { return false; } + const auto* propExpr = static_cast(cols[i]->expr()); - if (propExpr->prop() != projColNames[i]) { + if (propExpr->prop() != projColNames[i] || depColNames[i].compare(projColNames[i])) { return false; } } diff --git a/src/graph/optimizer/rule/RemoveNoopProjectRule.h b/src/graph/optimizer/rule/RemoveNoopProjectRule.h index 6a66a778aa2..0b38d4d8235 100644 --- a/src/graph/optimizer/rule/RemoveNoopProjectRule.h +++ b/src/graph/optimizer/rule/RemoveNoopProjectRule.h @@ -6,7 +6,7 @@ #ifndef GRAPH_OPTIMIZER_RULE_REMOVENOOPPROJECTRULE_H_ #define GRAPH_OPTIMIZER_RULE_REMOVENOOPPROJECTRULE_H_ -#include +#include #include "graph/optimizer/OptRule.h" @@ -43,7 +43,7 @@ class RemoveNoopProjectRule final : public OptRule { private: RemoveNoopProjectRule(); - static const std::initializer_list kQueries; + static const std::unordered_set kQueries; static std::unique_ptr kInstance; }; diff --git a/src/graph/planner/match/ArgumentFinder.cpp b/src/graph/planner/match/ArgumentFinder.cpp index d4d5f2f5720..06ee0c58432 100644 --- a/src/graph/planner/match/ArgumentFinder.cpp +++ b/src/graph/planner/match/ArgumentFinder.cpp @@ -24,9 +24,6 @@ StatusOr ArgumentFinder::transformNode(NodeContext* nodeCtx) { auto alias = nodeCtx->info->alias; auto argNode = Argument::make(nodeCtx->qctx, alias); argNode->setColNames({alias}); - auto aliasGeneratedBy = nodeCtx->qctx->symTable()->getAliasGeneratedBy(alias); - NG_RETURN_IF_ERROR(aliasGeneratedBy); - argNode->setInputVar(aliasGeneratedBy.value()); subplan.root = argNode; subplan.tail = argNode; diff --git a/src/graph/planner/plan/PlanNode.cpp b/src/graph/planner/plan/PlanNode.cpp index 4d386865d1d..f7c1b240591 100644 --- a/src/graph/planner/plan/PlanNode.cpp +++ b/src/graph/planner/plan/PlanNode.cpp @@ -11,7 +11,6 @@ #include #include -#include "PlanNode.h" #include "common/graph/Response.h" #include "graph/context/QueryContext.h" #include "graph/planner/plan/PlanNodeVisitor.h" @@ -355,6 +354,20 @@ void PlanNode::setInputVar(const std::string& varname, size_t idx) { } } +Status PlanNode::isColumnsIncluded(const std::string& varname) const { + auto* refVar = qctx()->symTable()->getVar(varname); + // The referenced variable should have all columns used in current node + for (auto& colName : colNames()) { + auto iter = std::find(refVar->colNames.begin(), refVar->colNames.end(), colName); + if (iter == refVar->colNames.end()) { + return Status::Error("the column referenced by Argument is not included in variable `%s': %s", + varname.c_str(), + colName.c_str()); + } + } + return Status::OK(); +} + std::unique_ptr PlanNode::explain() const { auto desc = std::make_unique(); desc->id = id_; diff --git a/src/graph/planner/plan/PlanNode.h b/src/graph/planner/plan/PlanNode.h index f714038520d..80db483c7b7 100644 --- a/src/graph/planner/plan/PlanNode.h +++ b/src/graph/planner/plan/PlanNode.h @@ -304,6 +304,8 @@ class PlanNode { return static_cast(this); } + Status isColumnsIncluded(const std::string& varname) const; + protected: PlanNode(QueryContext* qctx, Kind kind); diff --git a/src/graph/util/ExpressionUtils.cpp b/src/graph/util/ExpressionUtils.cpp index 476e3381416..d8ce7d8953b 100644 --- a/src/graph/util/ExpressionUtils.cpp +++ b/src/graph/util/ExpressionUtils.cpp @@ -827,6 +827,24 @@ Expression *ExpressionUtils::flattenInnerLogicalExpr(const Expression *expr) { return allFlattenExpr; } +bool ExpressionUtils::checkVarPropIfExist(const std::vector &columns, + const Expression *e) { + auto varProps = graph::ExpressionUtils::collectAll(e, {Expression::Kind::kVarProperty}); + if (varProps.empty()) { + return false; + } + for (const auto *expr : varProps) { + DCHECK_EQ(expr->kind(), Expression::Kind::kVarProperty); + auto iter = std::find_if(columns.begin(), columns.end(), [expr](const std::string &item) { + return !item.compare(static_cast(expr)->prop()); + }); + if (iter == columns.end()) { + return false; + } + } + return true; +} + // pick the subparts of expression that meet picker's criteria void ExpressionUtils::splitFilter(const Expression *expr, std::function picker, diff --git a/src/graph/util/ExpressionUtils.h b/src/graph/util/ExpressionUtils.h index e762b4160ae..75af5021080 100644 --- a/src/graph/util/ExpressionUtils.h +++ b/src/graph/util/ExpressionUtils.h @@ -32,7 +32,7 @@ class ExpressionUtils { // Checks if the kind of the given expression is one of the expected kind static inline bool isKindOf(const Expression* expr, const std::unordered_set& expected) { - return expected.find(expr->kind()) != expected.end(); + return expected.find(DCHECK_NOTNULL(expr)->kind()) != expected.end(); } // Checks if the expression is a property expression (TagProperty or LabelTagProperty or @@ -182,9 +182,12 @@ class ExpressionUtils { // calls flattenInnerLogicalAndExpr() first then executes flattenInnerLogicalOrExpr() static Expression* flattenInnerLogicalExpr(const Expression* expr); + // Check whether there exists the property of variable expression in `columns' + static bool checkVarPropIfExist(const std::vector& columns, const Expression* e); + // Uses the picker to split the given experssion expr into two parts: filterPicked and // filterUnpicked If expr is a non-LogicalAnd expression, applies the picker to expr directly If - // expr is a logicalAnd expression, applies the picker to all its operands + // expr is a logicalAnd expression, applies the picker to all its operands static void splitFilter(const Expression* expr, std::function picker, Expression** filterPicked, diff --git a/tests/tck/features/match/AllShortestPaths.feature b/tests/tck/features/match/AllShortestPaths.feature index 71ea1225b07..c2402ea7710 100644 --- a/tests/tck/features/match/AllShortestPaths.feature +++ b/tests/tck/features/match/AllShortestPaths.feature @@ -475,3 +475,17 @@ Feature: allShortestPaths RETURN p """ Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. + + # Scenario: allShortestPaths for argument issue + # When executing query: + # """ + # MATCH (a:player), (b:player) + # MATCH p= allShortestPaths((a:player {name:"Tim Duncan"})-[*..15]-(b:player {age:33})) + # WITH nodes(p) AS pathNodes + # UNWIND pathNodes AS node + # RETURN DISTINCT node + # """ + # Then the result should be, in any order, with relax comparison: + # | node | + # | ("Tim Duncan" :bachelor{name: "Tim Duncan", speciality: "psychology"} :player{age: 42, name: "Tim Duncan"}) | + # | ("Dejounte Murray" :player{age: 29, name: "Dejounte Murray"}) | diff --git a/tests/tck/features/match/MultiLineMultiQueryParts.feature b/tests/tck/features/match/MultiLineMultiQueryParts.feature index fafc5ae4d87..0049557407e 100644 --- a/tests/tck/features/match/MultiLineMultiQueryParts.feature +++ b/tests/tck/features/match/MultiLineMultiQueryParts.feature @@ -9,11 +9,16 @@ Feature: Multi Line Multi Query Parts Scenario: Multi Line Multi Path Patterns When executing query: """ - USE nba; - MATCH (m)-[]-(n), (n)-[]-(l) WHERE id(m)=="Tim Duncan" - RETURN m.player.name AS n1, n.player.name AS n2, - CASE WHEN l.team.name is not null THEN l.team.name - WHEN l.player.name is not null THEN l.player.name ELSE "null" END AS n3 ORDER BY n1, n2, n3 LIMIT 10 + MATCH (m)-[]-(n), (n)-[]-(l) WHERE id(m)=='Tim Duncan' + RETURN + m.player.name AS n1, + n.player.name AS n2, + CASE + WHEN l.team.name is not null THEN l.team.name + WHEN l.player.name is not null THEN l.player.name ELSE 'null' + END AS n3 + ORDER BY n1, n2, n3 + LIMIT 10 """ Then the result should be, in order: | n1 | n2 | n3 | @@ -29,8 +34,10 @@ Feature: Multi Line Multi Query Parts | "Tim Duncan" | "Boris Diaw" | "Tim Duncan" | When executing query: """ - MATCH (m)-[]-(n), (n)-[]-(l) WHERE id(n)=="Tim Duncan" - RETURN m.player.name AS n1, n.player.name AS n2, l.player.name AS n3 ORDER BY n1, n2, n3 LIMIT 10 + MATCH (m)-[]-(n), (n)-[]-(l) WHERE id(n)=='Tim Duncan' + RETURN m.player.name AS n1, n.player.name AS n2, l.player.name AS n3 + ORDER BY n1, n2, n3 + LIMIT 10 """ Then the result should be, in order: | n1 | n2 | n3 | diff --git a/tests/tck/features/optimizer/PushFilterDownHashInnerJoinRule.feature b/tests/tck/features/optimizer/PushFilterDownHashInnerJoinRule.feature new file mode 100644 index 00000000000..cc219c934f9 --- /dev/null +++ b/tests/tck/features/optimizer/PushFilterDownHashInnerJoinRule.feature @@ -0,0 +1,205 @@ +# Copyright (c) 2022 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License. +Feature: Push Filter down HashInnerJoin rule + + Background: + Given a graph with space named "nba" + + Scenario: push filter down BiInnerJoin + When profiling query: + """ + MATCH (v:player) + WHERE id(v) == 'Tim Duncan' + WITH v AS v1 + MATCH (v1)-[e:like]-(v2) + WHERE e.likeness > 0 + RETURN e, v2 + ORDER BY e, v2 + """ + Then the result should be, in any order: + | e | v2 | + | [:like "Aron Baynes"->"Tim Duncan" @0 {likeness: 80}] | ("Aron Baynes" :player{age: 32, name: "Aron Baynes"}) | + | [:like "Boris Diaw"->"Tim Duncan" @0 {likeness: 80}] | ("Boris Diaw" :player{age: 36, name: "Boris Diaw"}) | + | [:like "Danny Green"->"Tim Duncan" @0 {likeness: 70}] | ("Danny Green" :player{age: 31, name: "Danny Green"}) | + | [:like "Dejounte Murray"->"Tim Duncan" @0 {likeness: 99}] | ("Dejounte Murray" :player{age: 29, name: "Dejounte Murray"}) | + | [:like "LaMarcus Aldridge"->"Tim Duncan" @0 {likeness: 75}] | ("LaMarcus Aldridge" :player{age: 33, name: "LaMarcus Aldridge"}) | + | [:like "Manu Ginobili"->"Tim Duncan" @0 {likeness: 90}] | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | + | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | + | [:like "Marco Belinelli"->"Tim Duncan" @0 {likeness: 55}] | ("Marco Belinelli" :player{age: 32, name: "Marco Belinelli"}) | + | [:like "Shaquille O'Neal"->"Tim Duncan" @0 {likeness: 80}] | ("Shaquille O'Neal" :player{age: 47, name: "Shaquille O'Neal"}) | + | [:like "Tiago Splitter"->"Tim Duncan" @0 {likeness: 80}] | ("Tiago Splitter" :player{age: 34, name: "Tiago Splitter"}) | + | [:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}] | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | + | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | + And the execution plan should be: + | id | name | dependencies | operator info | + | 30 | Sort | 14 | | + | 14 | Project | 19 | | + | 19 | BiInnerJoin | 6,22 | | + | 6 | Project | 20 | | + | 20 | AppendVertices | 2 | | + | 2 | Dedup | 1 | | + | 1 | PassThrough | 3 | | + | 3 | Start | | | + | 22 | Project | 21 | | + | 21 | Filter | 10 | { "condition": "($-.e[0].likeness>0)" } | + | 10 | AppendVertices | 9 | | + | 9 | Traverse | 7 | | + | 7 | Argument | 8 | | + | 8 | Start | | | + When profiling query: + """ + MATCH (v:player) + WHERE id(v) == 'Tim Duncan' + WITH v AS v1 + MATCH (v1)-[e:like]-(v2) + WHERE v1.player.age > 0 + RETURN e, v2 + ORDER BY e, v2 + """ + Then the result should be, in any order: + | e | v2 | + | [:like "Aron Baynes"->"Tim Duncan" @0 {likeness: 80}] | ("Aron Baynes" :player{age: 32, name: "Aron Baynes"}) | + | [:like "Boris Diaw"->"Tim Duncan" @0 {likeness: 80}] | ("Boris Diaw" :player{age: 36, name: "Boris Diaw"}) | + | [:like "Danny Green"->"Tim Duncan" @0 {likeness: 70}] | ("Danny Green" :player{age: 31, name: "Danny Green"}) | + | [:like "Dejounte Murray"->"Tim Duncan" @0 {likeness: 99}] | ("Dejounte Murray" :player{age: 29, name: "Dejounte Murray"}) | + | [:like "LaMarcus Aldridge"->"Tim Duncan" @0 {likeness: 75}] | ("LaMarcus Aldridge" :player{age: 33, name: "LaMarcus Aldridge"}) | + | [:like "Manu Ginobili"->"Tim Duncan" @0 {likeness: 90}] | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | + | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | + | [:like "Marco Belinelli"->"Tim Duncan" @0 {likeness: 55}] | ("Marco Belinelli" :player{age: 32, name: "Marco Belinelli"}) | + | [:like "Shaquille O'Neal"->"Tim Duncan" @0 {likeness: 80}] | ("Shaquille O'Neal" :player{age: 47, name: "Shaquille O'Neal"}) | + | [:like "Tiago Splitter"->"Tim Duncan" @0 {likeness: 80}] | ("Tiago Splitter" :player{age: 34, name: "Tiago Splitter"}) | + | [:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}] | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | + | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | + And the execution plan should be: + | id | name | dependencies | operator info | + | 30 | Sort | 14 | | + | 14 | Project | 19 | | + | 19 | BiInnerJoin | 6,11 | | + | 6 | Project | 16 | | + | 16 | Filter | 20 | { "condition": "(v.player.age>0)" } | + | 20 | AppendVertices | 2 | | + | 2 | Dedup | 1 | | + | 1 | PassThrough | 3 | | + | 3 | Start | | | + | 11 | Project | 10 | | + | 10 | AppendVertices | 9 | | + | 9 | Traverse | 7 | | + | 7 | Argument | 8 | | + | 8 | Start | | | + When profiling query: + """ + MATCH (v:player) + WHERE id(v) == 'Tim Duncan' + WITH v AS v1 + MATCH (v1)-[e:like]-(v2) + WHERE e.likeness > 0 AND v1.player.age > 0 + RETURN e, v2 + ORDER BY e, v2 + """ + Then the result should be, in any order: + | e | v2 | + | [:like "Aron Baynes"->"Tim Duncan" @0 {likeness: 80}] | ("Aron Baynes" :player{age: 32, name: "Aron Baynes"}) | + | [:like "Boris Diaw"->"Tim Duncan" @0 {likeness: 80}] | ("Boris Diaw" :player{age: 36, name: "Boris Diaw"}) | + | [:like "Danny Green"->"Tim Duncan" @0 {likeness: 70}] | ("Danny Green" :player{age: 31, name: "Danny Green"}) | + | [:like "Dejounte Murray"->"Tim Duncan" @0 {likeness: 99}] | ("Dejounte Murray" :player{age: 29, name: "Dejounte Murray"}) | + | [:like "LaMarcus Aldridge"->"Tim Duncan" @0 {likeness: 75}] | ("LaMarcus Aldridge" :player{age: 33, name: "LaMarcus Aldridge"}) | + | [:like "Manu Ginobili"->"Tim Duncan" @0 {likeness: 90}] | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | + | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | + | [:like "Marco Belinelli"->"Tim Duncan" @0 {likeness: 55}] | ("Marco Belinelli" :player{age: 32, name: "Marco Belinelli"}) | + | [:like "Shaquille O'Neal"->"Tim Duncan" @0 {likeness: 80}] | ("Shaquille O'Neal" :player{age: 47, name: "Shaquille O'Neal"}) | + | [:like "Tiago Splitter"->"Tim Duncan" @0 {likeness: 80}] | ("Tiago Splitter" :player{age: 34, name: "Tiago Splitter"}) | + | [:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}] | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | + | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | + And the execution plan should be: + | id | name | dependencies | operator info | + | 30 | Sort | 14 | | + | 14 | Project | 20 | | + | 20 | BiInnerJoin | 23,25 | | + | 23 | Project | 22 | | + | 22 | Filter | 21 | {"condition": "(v.player.age>0)"} | + | 21 | AppendVertices | 2 | | + | 2 | Dedup | 1 | | + | 1 | PassThrough | 3 | | + | 3 | Start | | | + | 25 | Project | 24 | | + | 24 | Filter | 10 | {"condition": "($-.e[0].likeness>0)"} | + | 10 | AppendVertices | 9 | | + | 9 | Traverse | 7 | | + | 7 | Argument | 8 | | + | 8 | Start | | | + When profiling query: + """ + MATCH (v:player) + WHERE id(v) == 'Tim Duncan' + WITH v AS v1 + MATCH (v1)-[e:like]-(v2) + WHERE e.likeness > 0 OR v1.player.age > 0 + RETURN e, v2 + ORDER BY e, v2 + """ + Then the result should be, in any order: + | e | v2 | + | [:like "Aron Baynes"->"Tim Duncan" @0 {likeness: 80}] | ("Aron Baynes" :player{age: 32, name: "Aron Baynes"}) | + | [:like "Boris Diaw"->"Tim Duncan" @0 {likeness: 80}] | ("Boris Diaw" :player{age: 36, name: "Boris Diaw"}) | + | [:like "Danny Green"->"Tim Duncan" @0 {likeness: 70}] | ("Danny Green" :player{age: 31, name: "Danny Green"}) | + | [:like "Dejounte Murray"->"Tim Duncan" @0 {likeness: 99}] | ("Dejounte Murray" :player{age: 29, name: "Dejounte Murray"}) | + | [:like "LaMarcus Aldridge"->"Tim Duncan" @0 {likeness: 75}] | ("LaMarcus Aldridge" :player{age: 33, name: "LaMarcus Aldridge"}) | + | [:like "Manu Ginobili"->"Tim Duncan" @0 {likeness: 90}] | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | + | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | + | [:like "Marco Belinelli"->"Tim Duncan" @0 {likeness: 55}] | ("Marco Belinelli" :player{age: 32, name: "Marco Belinelli"}) | + | [:like "Shaquille O'Neal"->"Tim Duncan" @0 {likeness: 80}] | ("Shaquille O'Neal" :player{age: 47, name: "Shaquille O'Neal"}) | + | [:like "Tiago Splitter"->"Tim Duncan" @0 {likeness: 80}] | ("Tiago Splitter" :player{age: 34, name: "Tiago Splitter"}) | + | [:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}] | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | + | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | + And the execution plan should be: + | id | name | dependencies | operator info | + | 30 | Sort | 14 | | + | 14 | Project | 19 | | + | 19 | BiInnerJoin | 6,22 | | + | 6 | Project | 20 | | + | 20 | AppendVertices | 2 | | + | 2 | Dedup | 1 | | + | 1 | PassThrough | 3 | | + | 3 | Start | | | + | 22 | Project | 21 | | + | 21 | Filter | 10 | { "condition": "(($-.e[0].likeness>0) OR ($-.v1.player.age>0))" } | + | 10 | AppendVertices | 9 | | + | 9 | Traverse | 7 | | + | 7 | Argument | 8 | | + | 8 | Start | | | + + Scenario: NOT push filter down BiInnerJoin + When profiling query: + """ + MATCH (v:player)-[]-(vv) + WHERE id(v) == 'Tim Duncan' + WITH v AS v1, vv AS vv + MATCH (v1)-[e:like]-(v2) + WHERE e.likeness > 90 OR vv.team.start_year>2000 + RETURN e, v2 + ORDER BY e, v2 + LIMIT 3 + """ + Then the result should be, in any order: + | e | v2 | + | [:like "Dejounte Murray"->"Tim Duncan" @0 {likeness: 99}] | ("Dejounte Murray" :player{age: 29, name: "Dejounte Murray"}) | + | [:like "Dejounte Murray"->"Tim Duncan" @0 {likeness: 99}] | ("Dejounte Murray" :player{age: 29, name: "Dejounte Murray"}) | + | [:like "Dejounte Murray"->"Tim Duncan" @0 {likeness: 99}] | ("Dejounte Murray" :player{age: 29, name: "Dejounte Murray"}) | + And the execution plan should be: + | id | name | dependencies | operator info | + | 20 | TopN | 15 | | + | 15 | Project | 14 | | + | 14 | Filter | 13 | { "condition": "(($e.likeness>90) OR (vv.team.start_year>2000))" } | + | 13 | BiInnerJoin | 16,12 | | + | 16 | Project | 5 | | + | 5 | AppendVertices | 17 | | + | 17 | Traverse | 2 | | + | 2 | Dedup | 1 | | + | 1 | PassThrough | 3 | | + | 3 | Start | | | + | 12 | Project | 11 | | + | 11 | AppendVertices | 10 | | + | 10 | Traverse | 8 | | + | 8 | Argument | 9 | | + | 9 | Start | | | From edcdf83e91f9897dcde753d4cc40bc840e5c8cc3 Mon Sep 17 00:00:00 2001 From: yixinglu <2520865+yixinglu@users.noreply.github.com> Date: Sat, 3 Dec 2022 13:23:12 +0800 Subject: [PATCH 02/14] rewrite argument input var --- src/graph/optimizer/Optimizer.cpp | 95 ++++++++++++++++++++++++++++- src/graph/optimizer/Optimizer.h | 2 +- src/graph/planner/plan/PlanNode.cpp | 1 - 3 files changed, 94 insertions(+), 4 deletions(-) diff --git a/src/graph/optimizer/Optimizer.cpp b/src/graph/optimizer/Optimizer.cpp index e9e26f5a24e..e5c7747dde0 100644 --- a/src/graph/optimizer/Optimizer.cpp +++ b/src/graph/optimizer/Optimizer.cpp @@ -52,6 +52,7 @@ StatusOr Optimizer::findBestPlan(QueryContext *qctx) { // Just for Properties Pruning Status Optimizer::postprocess(PlanNode *root, graph::QueryContext *qctx, GraphSpaceID spaceID) { + NG_RETURN_IF_ERROR(rewriteArgumentInputVar(root)); if (FLAGS_enable_optimizer_property_pruner_rule) { graph::PropertyTracker propsUsed; graph::PrunePropertiesVisitor visitor(propsUsed, qctx, spaceID); @@ -125,10 +126,100 @@ void Optimizer::addBodyToGroupNode(OptContext *ctx, gnode->addBody(body); } -// static -Status rewriteArgumentInputVar(PlanNode *root) { +Status rewriteArgumentInputVarInternal(PlanNode *root, + uint16_t stackDepth, + bool &hasArgument, + std::vector &path) { + const uint16_t kMaxStackDepth = 512u; + if (stackDepth > kMaxStackDepth) { + return Status::Error("The depth of plan tree has exceeded the max %u level", kMaxStackDepth); + } + stackDepth++; + + if (!root) return Status::OK(); + + switch (root->numDeps()) { + case 0: { + if (root->kind() == PlanNode::Kind::kArgument) { + hasArgument = true; + path.push_back(root); + for (size_t i = path.size() - 1; i >= 0; i--) { + const auto *pn = path[i]; + if (pn->isBiInput()) { + DCHECK_LT(i, path.size() - 1); + const auto *bpn = static_cast(pn); + if (bpn->right() == path[i + 1]) { + // Argument is in the right side dependency of binary plan node, check the left child + // output columns + const auto &colNames = bpn->left()->colNames(); + bool found = true; + for (const auto &col : root->colNames()) { + if (std::find(colNames.begin(), colNames.end(), col) == colNames.end()) { + found = false; + break; + } + } + if (found) { + root->setInputVar(bpn->left()->outputVar()); + break; + } + } else { + // Argument is in the left side dependency of binary plan node, continue to find + // next parent plan node + DCHECK_EQ(bpn->left(), path[i + 1]); + } + } + } + if (root->inputVar().empty()) { + return Status::Error("Could not find the right input variable for argument plan node"); + } + path.pop_back(); + } + break; + } + case 1: { + path.push_back(root); + auto *dep = const_cast(root->dep()); + NG_RETURN_IF_ERROR(rewriteArgumentInputVarInternal(dep, stackDepth, hasArgument, path)); + path.pop_back(); + break; + } + case 2: { + auto *bpn = static_cast(root); + auto *left = const_cast(bpn->left()); + path.push_back(left); + NG_RETURN_IF_ERROR(rewriteArgumentInputVarInternal(left, stackDepth, hasArgument, path)); + path.pop_back(); + + auto *right = const_cast(bpn->right()); + path.push_back(right); + NG_RETURN_IF_ERROR(rewriteArgumentInputVarInternal(right, stackDepth, hasArgument, path)); + path.pop_back(); + break; + } + default: { + return Status::Error( + "Invalid dependencies of plan node `%s': %lu", root->toString().c_str(), root->numDeps()); + } + } + + // Ensure that there's no argument plan node if loop/select plan nodes exist in execution plan + if (root->kind() == PlanNode::Kind::kLoop || root->kind() == PlanNode::Kind::kSelect) { + if (hasArgument) { + return Status::Error( + "Loop/Select plan node should not exist with argument in the same execution plan"); + } + } + return Status::OK(); } +// static +Status Optimizer::rewriteArgumentInputVar(PlanNode *root) { + bool hasArgument = false; + std::vector path; + return rewriteArgumentInputVarInternal(root, 0, hasArgument, path); +} + } // namespace opt } // namespace nebula diff --git a/src/graph/optimizer/Optimizer.h b/src/graph/optimizer/Optimizer.h index 9b1c2039706..8cf4f842ec1 100644 --- a/src/graph/optimizer/Optimizer.h +++ b/src/graph/optimizer/Optimizer.h @@ -47,7 +47,7 @@ class Optimizer final { OptGroupNode *gnode, std::unordered_map *visited); - static Status rewriteArgumentInputVar(PlanNode *root); + static Status rewriteArgumentInputVar(graph::PlanNode *root); static constexpr int8_t kMaxIterationRound = 5; diff --git a/src/graph/planner/plan/PlanNode.cpp b/src/graph/planner/plan/PlanNode.cpp index f7c1b240591..926432d2e2d 100644 --- a/src/graph/planner/plan/PlanNode.cpp +++ b/src/graph/planner/plan/PlanNode.cpp @@ -477,7 +477,6 @@ std::unique_ptr VariableDependencyNode::explain() const { } void PlanNode::setColNames(std::vector cols) { - qctx_->symTable()->setAliasGeneratedBy(cols, outputVarPtr()->name); outputVarPtr()->colNames = std::move(cols); } } // namespace graph From de87d502604f44b3c5f50176937b518f6addf6bf Mon Sep 17 00:00:00 2001 From: yixinglu <2520865+yixinglu@users.noreply.github.com> Date: Sat, 3 Dec 2022 14:20:25 +0800 Subject: [PATCH 03/14] Fix bug --- src/graph/optimizer/Optimizer.cpp | 13 ++------ src/graph/planner/match/MatchPlanner.cpp | 5 ---- .../features/match/AllShortestPaths.feature | 26 ++++++++-------- .../PushFilterDownHashInnerJoinRule.feature | 30 +++++++++---------- 4 files changed, 31 insertions(+), 43 deletions(-) diff --git a/src/graph/optimizer/Optimizer.cpp b/src/graph/optimizer/Optimizer.cpp index e5c7747dde0..66c2ed21292 100644 --- a/src/graph/optimizer/Optimizer.cpp +++ b/src/graph/optimizer/Optimizer.cpp @@ -138,12 +138,12 @@ Status rewriteArgumentInputVarInternal(PlanNode *root, if (!root) return Status::OK(); + path.push_back(root); switch (root->numDeps()) { case 0: { if (root->kind() == PlanNode::Kind::kArgument) { hasArgument = true; - path.push_back(root); - for (size_t i = path.size() - 1; i >= 0; i--) { + for (int i = path.size() - 1; i >= 0; i--) { const auto *pn = path[i]; if (pn->isBiInput()) { DCHECK_LT(i, path.size() - 1); @@ -173,28 +173,20 @@ Status rewriteArgumentInputVarInternal(PlanNode *root, if (root->inputVar().empty()) { return Status::Error("Could not find the right input variable for argument plan node"); } - path.pop_back(); } break; } case 1: { - path.push_back(root); auto *dep = const_cast(root->dep()); NG_RETURN_IF_ERROR(rewriteArgumentInputVarInternal(dep, stackDepth, hasArgument, path)); - path.pop_back(); break; } case 2: { auto *bpn = static_cast(root); auto *left = const_cast(bpn->left()); - path.push_back(left); NG_RETURN_IF_ERROR(rewriteArgumentInputVarInternal(left, stackDepth, hasArgument, path)); - path.pop_back(); - auto *right = const_cast(bpn->right()); - path.push_back(right); NG_RETURN_IF_ERROR(rewriteArgumentInputVarInternal(right, stackDepth, hasArgument, path)); - path.pop_back(); break; } default: { @@ -202,6 +194,7 @@ Status rewriteArgumentInputVarInternal(PlanNode *root, "Invalid dependencies of plan node `%s': %lu", root->toString().c_str(), root->numDeps()); } } + path.pop_back(); // Ensure that there's no argument plan node if loop/select plan nodes exist in execution plan if (root->kind() == PlanNode::Kind::kLoop || root->kind() == PlanNode::Kind::kSelect) { diff --git a/src/graph/planner/match/MatchPlanner.cpp b/src/graph/planner/match/MatchPlanner.cpp index 1e68fa9beaf..3c0725eecc0 100644 --- a/src/graph/planner/match/MatchPlanner.cpp +++ b/src/graph/planner/match/MatchPlanner.cpp @@ -88,11 +88,6 @@ Status MatchPlanner::connectMatchPlan(SubPlan& queryPlan, MatchClauseContext* ma } } if (!interAliases.empty()) { - if (matchPlan.tail->kind() == PlanNode::Kind::kArgument) { - // The input of the argument operator is always the output of the plan on the other side of - // the join - matchPlan.tail->setInputVar(queryPlan.root->outputVar()); - } if (matchCtx->isOptional) { // connect LeftJoin match filter auto& whereCtx = matchCtx->where; diff --git a/tests/tck/features/match/AllShortestPaths.feature b/tests/tck/features/match/AllShortestPaths.feature index c2402ea7710..12bc9f50426 100644 --- a/tests/tck/features/match/AllShortestPaths.feature +++ b/tests/tck/features/match/AllShortestPaths.feature @@ -476,16 +476,16 @@ Feature: allShortestPaths """ Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. - # Scenario: allShortestPaths for argument issue - # When executing query: - # """ - # MATCH (a:player), (b:player) - # MATCH p= allShortestPaths((a:player {name:"Tim Duncan"})-[*..15]-(b:player {age:33})) - # WITH nodes(p) AS pathNodes - # UNWIND pathNodes AS node - # RETURN DISTINCT node - # """ - # Then the result should be, in any order, with relax comparison: - # | node | - # | ("Tim Duncan" :bachelor{name: "Tim Duncan", speciality: "psychology"} :player{age: 42, name: "Tim Duncan"}) | - # | ("Dejounte Murray" :player{age: 29, name: "Dejounte Murray"}) | +# Scenario: allShortestPaths for argument issue +# When executing query: +# """ +# MATCH (a:player), (b:player) +# MATCH p= allShortestPaths((a:player {name:"Tim Duncan"})-[*..15]-(b:player {age:33})) +# WITH nodes(p) AS pathNodes +# UNWIND pathNodes AS node +# RETURN DISTINCT node +# """ +# Then the result should be, in any order, with relax comparison: +# | node | +# | ("Tim Duncan" :bachelor{name: "Tim Duncan", speciality: "psychology"} :player{age: 42, name: "Tim Duncan"}) | +# | ("Dejounte Murray" :player{age: 29, name: "Dejounte Murray"}) | diff --git a/tests/tck/features/optimizer/PushFilterDownHashInnerJoinRule.feature b/tests/tck/features/optimizer/PushFilterDownHashInnerJoinRule.feature index cc219c934f9..b1ee7cf5421 100644 --- a/tests/tck/features/optimizer/PushFilterDownHashInnerJoinRule.feature +++ b/tests/tck/features/optimizer/PushFilterDownHashInnerJoinRule.feature @@ -153,21 +153,21 @@ Feature: Push Filter down HashInnerJoin rule | [:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}] | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | And the execution plan should be: - | id | name | dependencies | operator info | - | 30 | Sort | 14 | | - | 14 | Project | 19 | | - | 19 | BiInnerJoin | 6,22 | | - | 6 | Project | 20 | | - | 20 | AppendVertices | 2 | | - | 2 | Dedup | 1 | | - | 1 | PassThrough | 3 | | - | 3 | Start | | | - | 22 | Project | 21 | | - | 21 | Filter | 10 | { "condition": "(($-.e[0].likeness>0) OR ($-.v1.player.age>0))" } | - | 10 | AppendVertices | 9 | | - | 9 | Traverse | 7 | | - | 7 | Argument | 8 | | - | 8 | Start | | | + | id | name | dependencies | operator info | + | 30 | Sort | 14 | | + | 14 | Project | 19 | | + | 19 | BiInnerJoin | 6,22 | | + | 6 | Project | 20 | | + | 20 | AppendVertices | 2 | | + | 2 | Dedup | 1 | | + | 1 | PassThrough | 3 | | + | 3 | Start | | | + | 22 | Project | 21 | | + | 21 | Filter | 10 | { "condition": "(($-.e[0].likeness>0) OR (-.v1.player.age>0))" } | + | 10 | AppendVertices | 9 | | + | 9 | Traverse | 7 | | + | 7 | Argument | 8 | | + | 8 | Start | | | Scenario: NOT push filter down BiInnerJoin When profiling query: From 3eccfc009f9a196a9c01c290f68c91ff557afcd3 Mon Sep 17 00:00:00 2001 From: yixinglu <2520865+yixinglu@users.noreply.github.com> Date: Sat, 3 Dec 2022 14:30:39 +0800 Subject: [PATCH 04/14] Enhancement --- src/graph/optimizer/Optimizer.cpp | 57 +++++++------- src/graph/planner/plan/PlanNode.cpp | 15 ++-- src/graph/planner/plan/PlanNode.h | 2 +- .../features/match/AllShortestPaths.feature | 74 +++++++++++++++---- 4 files changed, 97 insertions(+), 51 deletions(-) diff --git a/src/graph/optimizer/Optimizer.cpp b/src/graph/optimizer/Optimizer.cpp index 66c2ed21292..669c258982f 100644 --- a/src/graph/optimizer/Optimizer.cpp +++ b/src/graph/optimizer/Optimizer.cpp @@ -126,6 +126,32 @@ void Optimizer::addBodyToGroupNode(OptContext *ctx, gnode->addBody(body); } +namespace { + +bool findArgumentRefPlanNodeInPath(const std::vector &path, PlanNode *argument) { + DCHECK_EQ(argument->kind(), PlanNode::Kind::kArgument); + for (int i = path.size() - 1; i >= 0; i--) { + const auto *pn = path[i]; + if (pn->isBiInput()) { + DCHECK_LT(i, path.size() - 1); + const auto *bpn = static_cast(pn); + if (bpn->right() == path[i + 1]) { + // Argument is in the right side dependency of binary plan node, check the left child + // output columns + if (argument->isColumnsIncludedIn(bpn->left())) { + argument->setInputVar(bpn->left()->outputVar()); + return true; + } + } else { + // Argument is in the left side dependency of binary plan node, continue to find + // next parent plan node + DCHECK_EQ(bpn->left(), path[i + 1]); + } + } + } + return false; +} + Status rewriteArgumentInputVarInternal(PlanNode *root, uint16_t stackDepth, bool &hasArgument, @@ -143,34 +169,7 @@ Status rewriteArgumentInputVarInternal(PlanNode *root, case 0: { if (root->kind() == PlanNode::Kind::kArgument) { hasArgument = true; - for (int i = path.size() - 1; i >= 0; i--) { - const auto *pn = path[i]; - if (pn->isBiInput()) { - DCHECK_LT(i, path.size() - 1); - const auto *bpn = static_cast(pn); - if (bpn->right() == path[i + 1]) { - // Argument is in the right side dependency of binary plan node, check the left child - // output columns - const auto &colNames = bpn->left()->colNames(); - bool found = true; - for (const auto &col : root->colNames()) { - if (std::find(colNames.begin(), colNames.end(), col) == colNames.end()) { - found = false; - break; - } - } - if (found) { - root->setInputVar(bpn->left()->outputVar()); - break; - } - } else { - // Argument is in the left side dependency of binary plan node, continue to find - // next parent plan node - DCHECK_EQ(bpn->left(), path[i + 1]); - } - } - } - if (root->inputVar().empty()) { + if (!findArgumentRefPlanNodeInPath(path, root) || root->inputVar().empty()) { return Status::Error("Could not find the right input variable for argument plan node"); } } @@ -207,6 +206,8 @@ Status rewriteArgumentInputVarInternal(PlanNode *root, return Status::OK(); } +} // namespace + // static Status Optimizer::rewriteArgumentInputVar(PlanNode *root) { bool hasArgument = false; diff --git a/src/graph/planner/plan/PlanNode.cpp b/src/graph/planner/plan/PlanNode.cpp index 926432d2e2d..7221f84eef9 100644 --- a/src/graph/planner/plan/PlanNode.cpp +++ b/src/graph/planner/plan/PlanNode.cpp @@ -354,18 +354,15 @@ void PlanNode::setInputVar(const std::string& varname, size_t idx) { } } -Status PlanNode::isColumnsIncluded(const std::string& varname) const { - auto* refVar = qctx()->symTable()->getVar(varname); - // The referenced variable should have all columns used in current node +bool PlanNode::isColumnsIncludedIn(const PlanNode* other) const { + const auto& otherColNames = other->colNames(); for (auto& colName : colNames()) { - auto iter = std::find(refVar->colNames.begin(), refVar->colNames.end(), colName); - if (iter == refVar->colNames.end()) { - return Status::Error("the column referenced by Argument is not included in variable `%s': %s", - varname.c_str(), - colName.c_str()); + auto iter = std::find(otherColNames.begin(), otherColNames.end(), colName); + if (iter == otherColNames.end()) { + return false; } } - return Status::OK(); + return true; } std::unique_ptr PlanNode::explain() const { diff --git a/src/graph/planner/plan/PlanNode.h b/src/graph/planner/plan/PlanNode.h index 80db483c7b7..678acc5b271 100644 --- a/src/graph/planner/plan/PlanNode.h +++ b/src/graph/planner/plan/PlanNode.h @@ -304,7 +304,7 @@ class PlanNode { return static_cast(this); } - Status isColumnsIncluded(const std::string& varname) const; + bool isColumnsIncludedIn(const PlanNode* other) const; protected: PlanNode(QueryContext* qctx, Kind kind); diff --git a/tests/tck/features/match/AllShortestPaths.feature b/tests/tck/features/match/AllShortestPaths.feature index 12bc9f50426..d2109881aee 100644 --- a/tests/tck/features/match/AllShortestPaths.feature +++ b/tests/tck/features/match/AllShortestPaths.feature @@ -476,16 +476,64 @@ Feature: allShortestPaths """ Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. -# Scenario: allShortestPaths for argument issue -# When executing query: -# """ -# MATCH (a:player), (b:player) -# MATCH p= allShortestPaths((a:player {name:"Tim Duncan"})-[*..15]-(b:player {age:33})) -# WITH nodes(p) AS pathNodes -# UNWIND pathNodes AS node -# RETURN DISTINCT node -# """ -# Then the result should be, in any order, with relax comparison: -# | node | -# | ("Tim Duncan" :bachelor{name: "Tim Duncan", speciality: "psychology"} :player{age: 42, name: "Tim Duncan"}) | -# | ("Dejounte Murray" :player{age: 29, name: "Dejounte Murray"}) | + Scenario: allShortestPaths for argument issue + When profiling query: + """ + MATCH (a:player{name:'Tim Duncan'}), (b:player{name:'Tony Parker'}) + WITH a AS b, b AS a + MATCH allShortestPaths((a)-[:like*1..3]-(b)) + RETURN a, b + """ + Then the result should be, in any order, with relax comparison: + | a | b | + | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | ("Tim Duncan" :player{name: "Tim Duncan"}) | + | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | ("Tim Duncan" :player{name: "Tim Duncan"}) | + And the execution plan should be: + | id | name | dependencies | operator info | + | 19 | Project | 18 | | + | 18 | BiInnerJoin | 10,17 | | + | 10 | Project | 9 | | + | 9 | BiCartesianProduct | 24,25 | | + | 24 | AppendVertices | 20 | | + | 20 | IndexScan | 2 | | + | 2 | Start | | | + | 25 | AppendVertices | 21 | | + | 21 | IndexScan | 6 | | + | 6 | Start | | | + | 17 | Project | 16 | | + | 16 | ShortestPath | 15 | | + | 15 | BiCartesianProduct | 12,14 | | + | 12 | Project | 11 | | + | 11 | Argument | | | + | 14 | Project | 13 | | + | 13 | Argument | | | + When profiling query: + """ + MATCH (a:player{name:'Tim Duncan'}), (b:player{name:'Tony Parker'}) + WITH a AS b, b AS a + OPTIONAL MATCH allShortestPaths((a)-[:like*1..3]-(b)) + RETURN a, b + """ + Then the result should be, in any order, with relax comparison: + | a | b | + | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | ("Tim Duncan" :player{name: "Tim Duncan"}) | + | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | ("Tim Duncan" :player{name: "Tim Duncan"}) | + And the execution plan should be: + | id | name | dependencies | operator info | + | 19 | Project | 18 | | + | 18 | BiLeftJoin | 10,17 | | + | 10 | Project | 9 | | + | 9 | BiCartesianProduct | 24,25 | | + | 24 | AppendVertices | 20 | | + | 20 | IndexScan | 2 | | + | 2 | Start | | | + | 25 | AppendVertices | 21 | | + | 21 | IndexScan | 6 | | + | 6 | Start | | | + | 17 | Project | 16 | | + | 16 | ShortestPath | 15 | | + | 15 | BiCartesianProduct | 12,14 | | + | 12 | Project | 11 | | + | 11 | Argument | | | + | 14 | Project | 13 | | + | 13 | Argument | | | From 5fe1719f9738a90880c89d399c2c3d84a58558d0 Mon Sep 17 00:00:00 2001 From: yixinglu <2520865+yixinglu@users.noreply.github.com> Date: Sat, 3 Dec 2022 17:48:13 +0800 Subject: [PATCH 05/14] Disable the cleanup in optimizer --- src/graph/optimizer/OptGroup.cpp | 3 +++ src/graph/optimizer/Optimizer.cpp | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/graph/optimizer/OptGroup.cpp b/src/graph/optimizer/OptGroup.cpp index 2b68497d229..871513a2704 100644 --- a/src/graph/optimizer/OptGroup.cpp +++ b/src/graph/optimizer/OptGroup.cpp @@ -161,6 +161,8 @@ const PlanNode *OptGroup::getPlan() const { void OptGroup::deleteRefGroupNode(const OptGroupNode *node) { groupNodesReferenced_.erase(node); +#if 0 + // FIXME(yee): This cleanup will crash the optimizer for LDBC queries, try to fix it next PR if (groupNodesReferenced_.empty()) { // Cleanup all opt group nodes in current opt group if it's NOT referenced by any other opt // group nodes @@ -168,6 +170,7 @@ void OptGroup::deleteRefGroupNode(const OptGroupNode *node) { n->release(); } } +#endif } OptGroupNode *OptGroupNode::create(OptContext *ctx, PlanNode *node, const OptGroup *group) { diff --git a/src/graph/optimizer/Optimizer.cpp b/src/graph/optimizer/Optimizer.cpp index 669c258982f..80793021087 100644 --- a/src/graph/optimizer/Optimizer.cpp +++ b/src/graph/optimizer/Optimizer.cpp @@ -128,6 +128,9 @@ void Optimizer::addBodyToGroupNode(OptContext *ctx, namespace { +// The plan node referenced by argument always is in the left side of plan tree. So we only need to +// check whether the left root child of binary input plan node contains what the argument needs in +// its output columns bool findArgumentRefPlanNodeInPath(const std::vector &path, PlanNode *argument) { DCHECK_EQ(argument->kind(), PlanNode::Kind::kArgument); for (int i = path.size() - 1; i >= 0; i--) { From 160d91722dffd49f0f3ede129448e5a4159f558f Mon Sep 17 00:00:00 2001 From: yixinglu <2520865+yixinglu@users.noreply.github.com> Date: Sun, 4 Dec 2022 10:54:52 +0800 Subject: [PATCH 06/14] More DCHECK --- src/graph/optimizer/OptGroup.cpp | 17 ++++++++++------- src/graph/optimizer/Optimizer.cpp | 3 +++ src/graph/planner/match/MatchPlanner.cpp | 4 +++- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/graph/optimizer/OptGroup.cpp b/src/graph/optimizer/OptGroup.cpp index 871513a2704..7f4d4126a62 100644 --- a/src/graph/optimizer/OptGroup.cpp +++ b/src/graph/optimizer/OptGroup.cpp @@ -42,8 +42,7 @@ OptGroup::OptGroup(OptContext *ctx) noexcept : ctx_(ctx) { } void OptGroup::addGroupNode(OptGroupNode *groupNode) { - DCHECK(groupNode != nullptr); - DCHECK(groupNode->group() == this); + DCHECK_EQ(this, DCHECK_NOTNULL(groupNode)->group()); if (outputVar_.empty()) { outputVar_ = groupNode->node()->outputVar(); } else { @@ -69,6 +68,9 @@ Status OptGroup::explore(const OptRule *rule) { } setExplored(rule); + DCHECK(!groupNodesReferenced_.empty()) + << "Current group should be referenced by other group nodes before optimization"; + for (auto iter = groupNodes_.begin(); iter != groupNodes_.end();) { auto groupNode = *iter; DCHECK(groupNode != nullptr); @@ -121,6 +123,9 @@ Status OptGroup::explore(const OptRule *rule) { } } + DCHECK(!groupNodes_.empty()) + << "Should have at least one group node after optimizing the current group"; + return Status::OK(); } @@ -137,6 +142,7 @@ Status OptGroup::exploreUntilMaxRound(const OptRule *rule) { } std::pair OptGroup::findMinCostGroupNode() const { + DCHECK(!groupNodes_.empty()) << "There is no any group nodes in opt group"; double minCost = std::numeric_limits::max(); const OptGroupNode *minGroupNode = nullptr; for (auto &groupNode : groupNodes_) { @@ -155,22 +161,19 @@ double OptGroup::getCost() const { const PlanNode *OptGroup::getPlan() const { const OptGroupNode *minGroupNode = findMinCostGroupNode().second; - DCHECK(minGroupNode != nullptr); - return minGroupNode->getPlan(); + return DCHECK_NOTNULL(minGroupNode)->getPlan(); } void OptGroup::deleteRefGroupNode(const OptGroupNode *node) { groupNodesReferenced_.erase(node); -#if 0 - // FIXME(yee): This cleanup will crash the optimizer for LDBC queries, try to fix it next PR if (groupNodesReferenced_.empty()) { // Cleanup all opt group nodes in current opt group if it's NOT referenced by any other opt // group nodes for (auto *n : groupNodes_) { n->release(); } + groupNodes_.clear(); } -#endif } OptGroupNode *OptGroupNode::create(OptContext *ctx, PlanNode *node, const OptGroup *group) { diff --git a/src/graph/optimizer/Optimizer.cpp b/src/graph/optimizer/Optimizer.cpp index 80793021087..366acdd56ee 100644 --- a/src/graph/optimizer/Optimizer.cpp +++ b/src/graph/optimizer/Optimizer.cpp @@ -172,6 +172,9 @@ Status rewriteArgumentInputVarInternal(PlanNode *root, case 0: { if (root->kind() == PlanNode::Kind::kArgument) { hasArgument = true; + DCHECK(root->inputVar().empty()) + << "Should keep the input empty for argument when plan generation"; + if (!findArgumentRefPlanNodeInPath(path, root) || root->inputVar().empty()) { return Status::Error("Could not find the right input variable for argument plan node"); } diff --git a/src/graph/planner/match/MatchPlanner.cpp b/src/graph/planner/match/MatchPlanner.cpp index 3c0725eecc0..353ea04ab0c 100644 --- a/src/graph/planner/match/MatchPlanner.cpp +++ b/src/graph/planner/match/MatchPlanner.cpp @@ -162,7 +162,9 @@ Status MatchPlanner::genQueryPartPlan(QueryContext* qctx, // TBD: need generate var for all queryPlan.tail? if (queryPlan.tail->isSingleInput()) { - queryPlan.tail->setInputVar(qctx->vctx()->anonVarGen()->getVar()); + if (queryPlan.tail->kind() != PlanNode::Kind::kArgument) { + queryPlan.tail->setInputVar(qctx->vctx()->anonVarGen()->getVar()); + } if (!tailConnected_) { tailConnected_ = true; queryPlan.appendStartNode(qctx); From 82e5a48ff064c5e1b8c0f8be79565b04a60611f5 Mon Sep 17 00:00:00 2001 From: yixinglu <2520865+yixinglu@users.noreply.github.com> Date: Sun, 4 Dec 2022 13:32:07 +0800 Subject: [PATCH 07/14] More checks in optimizer --- src/graph/optimizer/OptGroup.cpp | 59 +++++++++++++++++++++++++---- src/graph/optimizer/OptGroup.h | 9 ++++- src/graph/optimizer/OptRule.cpp | 6 +-- src/graph/optimizer/OptRule.h | 2 +- src/graph/optimizer/Optimizer.cpp | 1 + src/graph/service/QueryInstance.cpp | 10 ++++- 6 files changed, 72 insertions(+), 15 deletions(-) diff --git a/src/graph/optimizer/OptGroup.cpp b/src/graph/optimizer/OptGroup.cpp index 7f4d4126a62..b9517b90820 100644 --- a/src/graph/optimizer/OptGroup.cpp +++ b/src/graph/optimizer/OptGroup.cpp @@ -41,15 +41,59 @@ OptGroup::OptGroup(OptContext *ctx) noexcept : ctx_(ctx) { DCHECK(ctx != nullptr); } -void OptGroup::addGroupNode(OptGroupNode *groupNode) { +Status OptGroup::validateSubPlan(const OptGroupNode *gn, + const std::vector &patternLeaves) const { + auto &deps = DCHECK_NOTNULL(gn)->dependencies(); + + auto checkDepGroup = [this, gn, &patternLeaves](const OptGroup *depGroup) -> Status { + auto iter = std::find(patternLeaves.begin(), patternLeaves.end(), depGroup); + if (iter == patternLeaves.end()) { + if (depGroup->groupNodes_.size() != 1U || depGroup->groupNodesReferenced_.size() != 1U) { + return Status::Error( + "Invalid sub-plan generated for plan node: %s, " + "numGroupNodes: %lu, numGroupNodesRef: %lu", + PlanNode::toString(gn->node()->kind()), + depGroup->groupNodes_.size(), + depGroup->groupNodesReferenced_.size()); + } + return validateSubPlan(depGroup->groupNodes_.front(), patternLeaves); + } + return Status::OK(); + }; + + switch (deps.size()) { + case 0: { + DLOG(INFO) << "there is no any dependencies for new generated group node"; + break; + } + case 1: { + NG_RETURN_IF_ERROR(checkDepGroup(deps[0])); + break; + } + case 2: { + NG_RETURN_IF_ERROR(checkDepGroup(deps[0])); + NG_RETURN_IF_ERROR(checkDepGroup(deps[1])); + break; + } + default: { + return Status::Error("Invalid dependencies of opt group node: %lu", deps.size()); + } + } + return Status::OK(); +} + +Status OptGroup::addGroupNode(OptGroupNode *groupNode, + const std::vector &patternLeaves) { DCHECK_EQ(this, DCHECK_NOTNULL(groupNode)->group()); if (outputVar_.empty()) { outputVar_ = groupNode->node()->outputVar(); } else { DCHECK_EQ(outputVar_, groupNode->node()->outputVar()); } + NG_RETURN_IF_ERROR(validateSubPlan(groupNode, patternLeaves)); groupNodes_.emplace_back(groupNode); groupNode->node()->updateSymbols(); + return Status::OK(); } OptGroupNode *OptGroup::makeGroupNode(PlanNode *node) { @@ -68,12 +112,11 @@ Status OptGroup::explore(const OptRule *rule) { } setExplored(rule); - DCHECK(!groupNodesReferenced_.empty()) + DCHECK(isRootGroup_ || !groupNodesReferenced_.empty()) << "Current group should be referenced by other group nodes before optimization"; for (auto iter = groupNodes_.begin(); iter != groupNodes_.end();) { - auto groupNode = *iter; - DCHECK(groupNode != nullptr); + auto *groupNode = DCHECK_NOTNULL(*iter); if (groupNode->isExplored(rule)) { ++iter; continue; @@ -82,7 +125,7 @@ Status OptGroup::explore(const OptRule *rule) { NG_RETURN_IF_ERROR(groupNode->explore(rule)); // Find more equivalents - std::vector boundary; + std::vector leaves; auto status = rule->match(ctx_, groupNode); if (!status.ok()) { ++iter; @@ -90,7 +133,7 @@ Status OptGroup::explore(const OptRule *rule) { } ctx_->setChanged(true); auto matched = std::move(status).value(); - matched.collectBoundary(boundary); + matched.collectLeaves(leaves); auto resStatus = rule->transform(ctx_, matched); NG_RETURN_IF_ERROR(resStatus); auto result = std::move(resStatus).value(); @@ -102,14 +145,14 @@ Status OptGroup::explore(const OptRule *rule) { } groupNodes_.clear(); for (auto ngn : result.newGroupNodes) { - addGroupNode(ngn); + NG_RETURN_IF_ERROR(addGroupNode(ngn, leaves)); } break; } if (!result.newGroupNodes.empty()) { for (auto ngn : result.newGroupNodes) { - addGroupNode(ngn); + NG_RETURN_IF_ERROR(addGroupNode(ngn, leaves)); } setUnexplored(rule); diff --git a/src/graph/optimizer/OptGroup.h b/src/graph/optimizer/OptGroup.h index 71e96921b94..444089ec838 100644 --- a/src/graph/optimizer/OptGroup.h +++ b/src/graph/optimizer/OptGroup.h @@ -34,7 +34,7 @@ class OptGroup final { void setUnexplored(const OptRule *rule); - void addGroupNode(OptGroupNode *groupNode); + Status addGroupNode(OptGroupNode *groupNode, const std::vector &patternLeaves); OptGroupNode *makeGroupNode(graph::PlanNode *node); const std::list &groupNodes() const { return groupNodes_; @@ -54,6 +54,10 @@ class OptGroup final { void deleteRefGroupNode(const OptGroupNode *node); + void setRootGroup() { + isRootGroup_ = true; + } + private: friend ObjectPool; explicit OptGroup(OptContext *ctx) noexcept; @@ -61,6 +65,8 @@ class OptGroup final { static constexpr int16_t kMaxExplorationRound = 128; std::pair findMinCostGroupNode() const; + Status validateSubPlan(const OptGroupNode *gn, + const std::vector &patternLeaves) const; OptContext *ctx_{nullptr}; std::list groupNodes_; @@ -68,6 +74,7 @@ class OptGroup final { // The output variable should be same across the whole group. std::string outputVar_; + bool isRootGroup_{false}; // Save the OptGroupNode which references this OptGroup std::unordered_set groupNodesReferenced_; }; diff --git a/src/graph/optimizer/OptRule.cpp b/src/graph/optimizer/OptRule.cpp index 68b934f1686..4c6feb23d35 100644 --- a/src/graph/optimizer/OptRule.cpp +++ b/src/graph/optimizer/OptRule.cpp @@ -36,12 +36,12 @@ const MatchedResult &MatchedResult::result(const std::vector &pos) cons return *DCHECK_NOTNULL(result); } -void MatchedResult::collectBoundary(std::vector &boundary) const { +void MatchedResult::collectLeaves(std::vector &leaves) const { if (dependencies.empty()) { - boundary.insert(boundary.end(), node->dependencies().begin(), node->dependencies().end()); + leaves.insert(leaves.end(), node->dependencies().begin(), node->dependencies().end()); } else { for (const auto &dep : dependencies) { - dep.collectBoundary(boundary); + dep.collectLeaves(leaves); } } } diff --git a/src/graph/optimizer/OptRule.h b/src/graph/optimizer/OptRule.h index 80eaf78fe8e..bfc2dfb8f5e 100644 --- a/src/graph/optimizer/OptRule.h +++ b/src/graph/optimizer/OptRule.h @@ -42,7 +42,7 @@ struct MatchedResult { const graph::PlanNode *planNode(const std::vector &pos = {}) const; const MatchedResult &result(const std::vector &pos = {}) const; - void collectBoundary(std::vector &boundary) const; + void collectLeaves(std::vector &leaves) const; }; // Match plan node by trait or kind of plan node. diff --git a/src/graph/optimizer/Optimizer.cpp b/src/graph/optimizer/Optimizer.cpp index 366acdd56ee..e9767ffd0a5 100644 --- a/src/graph/optimizer/Optimizer.cpp +++ b/src/graph/optimizer/Optimizer.cpp @@ -39,6 +39,7 @@ StatusOr Optimizer::findBestPlan(QueryContext *qctx) { auto ret = prepare(optCtx.get(), root); NG_RETURN_IF_ERROR(ret); auto rootGroup = std::move(ret).value(); + rootGroup->setRootGroup(); NG_RETURN_IF_ERROR(doExploration(optCtx.get(), rootGroup)); auto *newRoot = rootGroup->getPlan(); diff --git a/src/graph/service/QueryInstance.cpp b/src/graph/service/QueryInstance.cpp index 4425b86e3d6..2d5107d9bbb 100644 --- a/src/graph/service/QueryInstance.cpp +++ b/src/graph/service/QueryInstance.cpp @@ -90,8 +90,14 @@ Status QueryInstance::validateAndOptimize() { // Validate the query, if failed, return NG_RETURN_IF_ERROR(Validator::validate(sentence_.get(), qctx())); - // Optimize the query, and get the execution plan - NG_RETURN_IF_ERROR(findBestPlan()); + // Optimize the query, and get the execution plan. We should not pass the optimizer errors to user + // since the message is often not easy to understand. Logging them is enough. + if (auto status = findBestPlan(); !status.ok()) { + LOG(ERROR) << "Error found in optimization stage: " << status.message(); + return Status::Error( + "There are some errors found in optimizer, " + "please contact to the admin to learn more details"); + } stats::StatsManager::addValue(kOptimizerLatencyUs, *(qctx_->plan()->optimizeTimeInUs())); if (FLAGS_enable_space_level_metrics && spaceName != "") { stats::StatsManager::addValue( From 9c6bcf5853dea643c5a6d3dc3b4778fbac795275 Mon Sep 17 00:00:00 2001 From: yixinglu <2520865+yixinglu@users.noreply.github.com> Date: Sun, 4 Dec 2022 22:27:05 +0800 Subject: [PATCH 08/14] Use pattern leaves to validate the sub-plan --- src/graph/optimizer/OptGroup.cpp | 48 ++++++++++++++++++++------------ src/graph/optimizer/OptGroup.h | 3 +- src/graph/optimizer/OptRule.cpp | 11 ++++++-- src/graph/optimizer/OptRule.h | 2 +- 4 files changed, 41 insertions(+), 23 deletions(-) diff --git a/src/graph/optimizer/OptGroup.cpp b/src/graph/optimizer/OptGroup.cpp index b9517b90820..822c240a1be 100644 --- a/src/graph/optimizer/OptGroup.cpp +++ b/src/graph/optimizer/OptGroup.cpp @@ -42,28 +42,41 @@ OptGroup::OptGroup(OptContext *ctx) noexcept : ctx_(ctx) { } Status OptGroup::validateSubPlan(const OptGroupNode *gn, + const OptRule *rule, const std::vector &patternLeaves) const { auto &deps = DCHECK_NOTNULL(gn)->dependencies(); - auto checkDepGroup = [this, gn, &patternLeaves](const OptGroup *depGroup) -> Status { + auto checkDepGroup = [this, gn, rule, &patternLeaves](const OptGroup *depGroup) -> Status { auto iter = std::find(patternLeaves.begin(), patternLeaves.end(), depGroup); if (iter == patternLeaves.end()) { - if (depGroup->groupNodes_.size() != 1U || depGroup->groupNodesReferenced_.size() != 1U) { - return Status::Error( - "Invalid sub-plan generated for plan node: %s, " - "numGroupNodes: %lu, numGroupNodesRef: %lu", - PlanNode::toString(gn->node()->kind()), - depGroup->groupNodes_.size(), - depGroup->groupNodesReferenced_.size()); + if (!depGroup) { + return Status::Error("Could not find the dependent group in pattern leaves"); + } else { + if (depGroup->groupNodes_.size() != 1U || depGroup->groupNodesReferenced_.size() != 1U) { + return Status::Error( + "Invalid sub-plan generated when applying the rule: %s, " + "planNode: %s, numGroupNodes: %lu, numGroupNodesRef: %lu", + rule->toString().c_str(), + PlanNode::toString(gn->node()->kind()), + depGroup->groupNodes_.size(), + depGroup->groupNodesReferenced_.size()); + } else { + return validateSubPlan(depGroup->groupNodes_.front(), rule, patternLeaves); + } } - return validateSubPlan(depGroup->groupNodes_.front(), patternLeaves); + } else { + return Status::OK(); } - return Status::OK(); }; switch (deps.size()) { case 0: { - DLOG(INFO) << "there is no any dependencies for new generated group node"; + auto iter = std::find(patternLeaves.begin(), patternLeaves.end(), nullptr); + if (iter == patternLeaves.end()) { + return Status::Error("Invalid sub-plan generated by the rule: %s, planNode: %s", + rule->toString().c_str(), + PlanNode::toString(gn->node()->kind())); + } break; } case 1: { @@ -82,18 +95,15 @@ Status OptGroup::validateSubPlan(const OptGroupNode *gn, return Status::OK(); } -Status OptGroup::addGroupNode(OptGroupNode *groupNode, - const std::vector &patternLeaves) { +void OptGroup::addGroupNode(OptGroupNode *groupNode) { DCHECK_EQ(this, DCHECK_NOTNULL(groupNode)->group()); if (outputVar_.empty()) { outputVar_ = groupNode->node()->outputVar(); } else { DCHECK_EQ(outputVar_, groupNode->node()->outputVar()); } - NG_RETURN_IF_ERROR(validateSubPlan(groupNode, patternLeaves)); groupNodes_.emplace_back(groupNode); groupNode->node()->updateSymbols(); - return Status::OK(); } OptGroupNode *OptGroup::makeGroupNode(PlanNode *node) { @@ -133,7 +143,7 @@ Status OptGroup::explore(const OptRule *rule) { } ctx_->setChanged(true); auto matched = std::move(status).value(); - matched.collectLeaves(leaves); + matched.collectPatternLeaves(leaves); auto resStatus = rule->transform(ctx_, matched); NG_RETURN_IF_ERROR(resStatus); auto result = std::move(resStatus).value(); @@ -145,14 +155,16 @@ Status OptGroup::explore(const OptRule *rule) { } groupNodes_.clear(); for (auto ngn : result.newGroupNodes) { - NG_RETURN_IF_ERROR(addGroupNode(ngn, leaves)); + NG_RETURN_IF_ERROR(validateSubPlan(ngn, rule, leaves)); + addGroupNode(ngn); } break; } if (!result.newGroupNodes.empty()) { for (auto ngn : result.newGroupNodes) { - NG_RETURN_IF_ERROR(addGroupNode(ngn, leaves)); + NG_RETURN_IF_ERROR(validateSubPlan(ngn, rule, leaves)); + addGroupNode(ngn); } setUnexplored(rule); diff --git a/src/graph/optimizer/OptGroup.h b/src/graph/optimizer/OptGroup.h index 444089ec838..230d7a15cb4 100644 --- a/src/graph/optimizer/OptGroup.h +++ b/src/graph/optimizer/OptGroup.h @@ -34,7 +34,7 @@ class OptGroup final { void setUnexplored(const OptRule *rule); - Status addGroupNode(OptGroupNode *groupNode, const std::vector &patternLeaves); + void addGroupNode(OptGroupNode *groupNode); OptGroupNode *makeGroupNode(graph::PlanNode *node); const std::list &groupNodes() const { return groupNodes_; @@ -66,6 +66,7 @@ class OptGroup final { std::pair findMinCostGroupNode() const; Status validateSubPlan(const OptGroupNode *gn, + const OptRule *rule, const std::vector &patternLeaves) const; OptContext *ctx_{nullptr}; diff --git a/src/graph/optimizer/OptRule.cpp b/src/graph/optimizer/OptRule.cpp index 4c6feb23d35..23c693c2fd0 100644 --- a/src/graph/optimizer/OptRule.cpp +++ b/src/graph/optimizer/OptRule.cpp @@ -36,12 +36,17 @@ const MatchedResult &MatchedResult::result(const std::vector &pos) cons return *DCHECK_NOTNULL(result); } -void MatchedResult::collectLeaves(std::vector &leaves) const { +void MatchedResult::collectPatternLeaves(std::vector &leaves) const { if (dependencies.empty()) { - leaves.insert(leaves.end(), node->dependencies().begin(), node->dependencies().end()); + if (node->dependencies().empty()) { + // nullptr means this node in matched pattern is a leaf node + leaves.push_back(nullptr); + } else { + leaves.insert(leaves.end(), node->dependencies().begin(), node->dependencies().end()); + } } else { for (const auto &dep : dependencies) { - dep.collectLeaves(leaves); + dep.collectPatternLeaves(leaves); } } } diff --git a/src/graph/optimizer/OptRule.h b/src/graph/optimizer/OptRule.h index bfc2dfb8f5e..1a0e0d65046 100644 --- a/src/graph/optimizer/OptRule.h +++ b/src/graph/optimizer/OptRule.h @@ -42,7 +42,7 @@ struct MatchedResult { const graph::PlanNode *planNode(const std::vector &pos = {}) const; const MatchedResult &result(const std::vector &pos = {}) const; - void collectLeaves(std::vector &leaves) const; + void collectPatternLeaves(std::vector &leaves) const; }; // Match plan node by trait or kind of plan node. From 112e4bc515c649016fcea58dbb2fbf1d5aa86ab0 Mon Sep 17 00:00:00 2001 From: yixinglu <2520865+yixinglu@users.noreply.github.com> Date: Sun, 4 Dec 2022 22:30:06 +0800 Subject: [PATCH 09/14] Rename BiInnerJoin to hash join --- .../rule/PushFilterDownHashInnerJoinRule.cpp | 8 ++++---- .../rule/PushFilterDownHashInnerJoinRule.h | 2 +- tests/tck/features/match/AllShortestPaths.feature | 2 +- .../PushFilterDownHashInnerJoinRule.feature | 14 +++++++------- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.cpp b/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.cpp index b5377aa4ecb..4c3a8c274b7 100644 --- a/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.cpp +++ b/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.cpp @@ -28,7 +28,7 @@ const Pattern& PushFilterDownHashInnerJoinRule::pattern() const { static Pattern pattern = Pattern::create( PlanNode::Kind::kFilter, {Pattern::create( - PlanNode::Kind::kBiInnerJoin, + PlanNode::Kind::kHashInnerJoin, {Pattern::create(PlanNode::Kind::kUnknown), Pattern::create(PlanNode::Kind::kUnknown)})}); return pattern; } @@ -40,8 +40,8 @@ StatusOr PushFilterDownHashInnerJoinRule::transform( DCHECK_EQ(oldFilterNode->kind(), PlanNode::Kind::kFilter); auto* innerJoinNode = matched.planNode({0, 0}); - DCHECK_EQ(innerJoinNode->kind(), PlanNode::Kind::kBiInnerJoin); - auto* oldInnerJoinNode = static_cast(innerJoinNode); + DCHECK_EQ(innerJoinNode->kind(), PlanNode::Kind::kHashInnerJoin); + auto* oldInnerJoinNode = static_cast(innerJoinNode); const auto* condition = static_cast(oldFilterNode)->condition(); DCHECK(condition); @@ -62,7 +62,7 @@ StatusOr PushFilterDownHashInnerJoinRule::transform( rightGroup = rightGroup ? rightGroup : const_cast(rightResult.node->group()); // produce new InnerJoin node - auto* newInnerJoinNode = static_cast(oldInnerJoinNode->clone()); + auto* newInnerJoinNode = static_cast(oldInnerJoinNode->clone()); auto newJoinGroup = rightFilterUnpicked ? OptGroup::create(octx) : filterGroupNode->group(); auto newGroupNode = OptGroupNode::create(octx, newInnerJoinNode, newJoinGroup); newGroupNode->dependsOn(leftGroup); diff --git a/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.h b/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.h index f8fb0976337..2f9de94ce8b 100644 --- a/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.h +++ b/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.h @@ -11,7 +11,7 @@ namespace nebula { namespace opt { -// Push down the filter items from the left subplan of [[BiInnerJoin]] +// Push down the filter items from the left subplan of [[HashInnerJoin]] class PushFilterDownHashInnerJoinRule final : public OptRule { public: const Pattern &pattern() const override; diff --git a/tests/tck/features/match/AllShortestPaths.feature b/tests/tck/features/match/AllShortestPaths.feature index d2109881aee..16d50c774b8 100644 --- a/tests/tck/features/match/AllShortestPaths.feature +++ b/tests/tck/features/match/AllShortestPaths.feature @@ -491,7 +491,7 @@ Feature: allShortestPaths And the execution plan should be: | id | name | dependencies | operator info | | 19 | Project | 18 | | - | 18 | BiInnerJoin | 10,17 | | + | 18 | HashInnerJoin | 10,17 | | | 10 | Project | 9 | | | 9 | BiCartesianProduct | 24,25 | | | 24 | AppendVertices | 20 | | diff --git a/tests/tck/features/optimizer/PushFilterDownHashInnerJoinRule.feature b/tests/tck/features/optimizer/PushFilterDownHashInnerJoinRule.feature index b1ee7cf5421..43c8c956c38 100644 --- a/tests/tck/features/optimizer/PushFilterDownHashInnerJoinRule.feature +++ b/tests/tck/features/optimizer/PushFilterDownHashInnerJoinRule.feature @@ -6,7 +6,7 @@ Feature: Push Filter down HashInnerJoin rule Background: Given a graph with space named "nba" - Scenario: push filter down BiInnerJoin + Scenario: push filter down HashInnerJoin When profiling query: """ MATCH (v:player) @@ -35,7 +35,7 @@ Feature: Push Filter down HashInnerJoin rule | id | name | dependencies | operator info | | 30 | Sort | 14 | | | 14 | Project | 19 | | - | 19 | BiInnerJoin | 6,22 | | + | 19 | HashInnerJoin | 6,22 | | | 6 | Project | 20 | | | 20 | AppendVertices | 2 | | | 2 | Dedup | 1 | | @@ -75,7 +75,7 @@ Feature: Push Filter down HashInnerJoin rule | id | name | dependencies | operator info | | 30 | Sort | 14 | | | 14 | Project | 19 | | - | 19 | BiInnerJoin | 6,11 | | + | 19 | HashInnerJoin | 6,11 | | | 6 | Project | 16 | | | 16 | Filter | 20 | { "condition": "(v.player.age>0)" } | | 20 | AppendVertices | 2 | | @@ -115,7 +115,7 @@ Feature: Push Filter down HashInnerJoin rule | id | name | dependencies | operator info | | 30 | Sort | 14 | | | 14 | Project | 20 | | - | 20 | BiInnerJoin | 23,25 | | + | 20 | HashInnerJoin | 23,25 | | | 23 | Project | 22 | | | 22 | Filter | 21 | {"condition": "(v.player.age>0)"} | | 21 | AppendVertices | 2 | | @@ -156,7 +156,7 @@ Feature: Push Filter down HashInnerJoin rule | id | name | dependencies | operator info | | 30 | Sort | 14 | | | 14 | Project | 19 | | - | 19 | BiInnerJoin | 6,22 | | + | 19 | HashInnerJoin | 6,22 | | | 6 | Project | 20 | | | 20 | AppendVertices | 2 | | | 2 | Dedup | 1 | | @@ -169,7 +169,7 @@ Feature: Push Filter down HashInnerJoin rule | 7 | Argument | 8 | | | 8 | Start | | | - Scenario: NOT push filter down BiInnerJoin + Scenario: NOT push filter down HashInnerJoin When profiling query: """ MATCH (v:player)-[]-(vv) @@ -191,7 +191,7 @@ Feature: Push Filter down HashInnerJoin rule | 20 | TopN | 15 | | | 15 | Project | 14 | | | 14 | Filter | 13 | { "condition": "(($e.likeness>90) OR (vv.team.start_year>2000))" } | - | 13 | BiInnerJoin | 16,12 | | + | 13 | HashInnerJoin | 16,12 | | | 16 | Project | 5 | | | 5 | AppendVertices | 17 | | | 17 | Traverse | 2 | | From 9707c93c241a9d790ffb1f9c0a0e15443b52fd4f Mon Sep 17 00:00:00 2001 From: yixinglu <2520865+yixinglu@users.noreply.github.com> Date: Sun, 4 Dec 2022 22:45:29 +0800 Subject: [PATCH 10/14] cleanup --- src/graph/optimizer/OptGroup.cpp | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/src/graph/optimizer/OptGroup.cpp b/src/graph/optimizer/OptGroup.cpp index 822c240a1be..5cb47a1baee 100644 --- a/src/graph/optimizer/OptGroup.cpp +++ b/src/graph/optimizer/OptGroup.cpp @@ -48,25 +48,22 @@ Status OptGroup::validateSubPlan(const OptGroupNode *gn, auto checkDepGroup = [this, gn, rule, &patternLeaves](const OptGroup *depGroup) -> Status { auto iter = std::find(patternLeaves.begin(), patternLeaves.end(), depGroup); - if (iter == patternLeaves.end()) { - if (!depGroup) { - return Status::Error("Could not find the dependent group in pattern leaves"); - } else { - if (depGroup->groupNodes_.size() != 1U || depGroup->groupNodesReferenced_.size() != 1U) { - return Status::Error( - "Invalid sub-plan generated when applying the rule: %s, " - "planNode: %s, numGroupNodes: %lu, numGroupNodesRef: %lu", - rule->toString().c_str(), - PlanNode::toString(gn->node()->kind()), - depGroup->groupNodes_.size(), - depGroup->groupNodesReferenced_.size()); - } else { - return validateSubPlan(depGroup->groupNodes_.front(), rule, patternLeaves); - } - } - } else { + if (iter != patternLeaves.end()) { return Status::OK(); } + if (!depGroup) { + return Status::Error("Could not find the dependent group in pattern leaves"); + } + if (depGroup->groupNodes_.size() != 1U || depGroup->groupNodesReferenced_.size() != 1U) { + return Status::Error( + "Invalid sub-plan generated when applying the rule: %s, " + "planNode: %s, numGroupNodes: %lu, numGroupNodesRef: %lu", + rule->toString().c_str(), + PlanNode::toString(gn->node()->kind()), + depGroup->groupNodes_.size(), + depGroup->groupNodesReferenced_.size()); + } + return validateSubPlan(depGroup->groupNodes_.front(), rule, patternLeaves); }; switch (deps.size()) { From e4ba34e8aa1d992f31c986371eff529076a6c4b3 Mon Sep 17 00:00:00 2001 From: yixinglu <2520865+yixinglu@users.noreply.github.com> Date: Mon, 5 Dec 2022 23:12:07 +0800 Subject: [PATCH 11/14] Add validator for OptGroup --- src/graph/optimizer/CMakeLists.txt | 2 +- src/graph/optimizer/OptGroup.cpp | 67 +++++++++++++++++++++++++++++- src/graph/optimizer/OptGroup.h | 4 ++ src/graph/optimizer/Optimizer.cpp | 1 + 4 files changed, 71 insertions(+), 3 deletions(-) diff --git a/src/graph/optimizer/CMakeLists.txt b/src/graph/optimizer/CMakeLists.txt index cae7300d687..6a8bc6c67eb 100644 --- a/src/graph/optimizer/CMakeLists.txt +++ b/src/graph/optimizer/CMakeLists.txt @@ -27,7 +27,7 @@ nebula_add_library( rule/PushFilterDownAggregateRule.cpp rule/PushFilterDownProjectRule.cpp rule/PushFilterDownLeftJoinRule.cpp - rule/PushFilterDownHashInnerJoinRule.cpp + # rule/PushFilterDownHashInnerJoinRule.cpp rule/PushFilterDownInnerJoinRule.cpp rule/PushFilterDownNodeRule.cpp rule/PushFilterDownScanVerticesRule.cpp diff --git a/src/graph/optimizer/OptGroup.cpp b/src/graph/optimizer/OptGroup.cpp index 5cb47a1baee..e0b42fbc487 100644 --- a/src/graph/optimizer/OptGroup.cpp +++ b/src/graph/optimizer/OptGroup.cpp @@ -92,6 +92,28 @@ Status OptGroup::validateSubPlan(const OptGroupNode *gn, return Status::OK(); } +Status OptGroup::validate(const OptRule *rule) const { + if (groupNodes_.empty() && !groupNodesReferenced_.empty()) { + return Status::Error( + "The OptGroup has no any OptGroupNode but used by other OptGroupNode " + "when applying the rule: %s, numGroupNodesRef: %lu", + rule->toString().c_str(), + groupNodesReferenced_.size()); + } + for (auto *gn : groupNodes_) { + NG_RETURN_IF_ERROR(gn->validate(rule)); + if (gn->node()->outputVar() != outputVar_) { + return Status::Error( + "The output columns of the OptGroupNode is different from OptGroup " + "when applying the rule: %s, %s vs. %s", + rule->toString().c_str(), + gn->node()->outputVar().c_str(), + outputVar_.c_str()); + } + } + return Status::OK(); +} + void OptGroup::addGroupNode(OptGroupNode *groupNode) { DCHECK_EQ(this, DCHECK_NOTNULL(groupNode)->group()); if (outputVar_.empty()) { @@ -144,6 +166,10 @@ Status OptGroup::explore(const OptRule *rule) { auto resStatus = rule->transform(ctx_, matched); NG_RETURN_IF_ERROR(resStatus); auto result = std::move(resStatus).value(); + + // for (auto *ngn : result.newGroupNodes) { + // NG_RETURN_IF_ERROR(validateSubPlan(ngn, rule, leaves)); + // } // In some cases, we can apply optimization rules even if the control flow and data flow are // inconsistent. For now, let the optimization rules themselves guarantee correctness. if (result.eraseAll) { @@ -152,7 +178,7 @@ Status OptGroup::explore(const OptRule *rule) { } groupNodes_.clear(); for (auto ngn : result.newGroupNodes) { - NG_RETURN_IF_ERROR(validateSubPlan(ngn, rule, leaves)); + // NG_RETURN_IF_ERROR(validateSubPlan(ngn, rule, leaves)); addGroupNode(ngn); } break; @@ -160,7 +186,7 @@ Status OptGroup::explore(const OptRule *rule) { if (!result.newGroupNodes.empty()) { for (auto ngn : result.newGroupNodes) { - NG_RETURN_IF_ERROR(validateSubPlan(ngn, rule, leaves)); + // NG_RETURN_IF_ERROR(validateSubPlan(ngn, rule, leaves)); addGroupNode(ngn); } @@ -300,5 +326,42 @@ void OptGroupNode::release() { } } +Status OptGroupNode::validate(const OptRule *rule) const { + if (!node_) { + return Status::Error("The OptGroupNode does not have plan node when applying the rule: %s", + rule->toString().c_str()); + } + if (!group_) { + return Status::Error( + "The OptGroupNode does not have the right OptGroup when applying the rule: %s", + rule->toString().c_str()); + } + if (!bodies_.empty()) { + if (node_->kind() != PlanNode::Kind::kLoop) { + return Status::Error( + "The plan node is not Loop in OptGroupNode when applying the rule: %s, planNode: %s", + rule->toString().c_str(), + PlanNode::toString(node_->kind())); + } + for (auto *g : bodies_) { + NG_RETURN_IF_ERROR(g->validate(rule)); + } + } + if (dependencies_.empty()) { + if (node_->kind() != PlanNode::Kind::kStart && node_->kind() != PlanNode::Kind::kArgument) { + return Status::Error( + "The leaf plan node is not Start or Argument in OptGroupNode when applying the rule: %s, " + "planNode: %s", + rule->toString().c_str(), + PlanNode::toString(node_->kind())); + } + } else { + for (auto *g : dependencies_) { + NG_RETURN_IF_ERROR(g->validate(rule)); + } + } + return Status::OK(); +} + } // namespace opt } // namespace nebula diff --git a/src/graph/optimizer/OptGroup.h b/src/graph/optimizer/OptGroup.h index 230d7a15cb4..1f264928229 100644 --- a/src/graph/optimizer/OptGroup.h +++ b/src/graph/optimizer/OptGroup.h @@ -58,6 +58,8 @@ class OptGroup final { isRootGroup_ = true; } + Status validate(const OptRule *rule) const; + private: friend ObjectPool; explicit OptGroup(OptContext *ctx) noexcept; @@ -133,6 +135,8 @@ class OptGroupNode final { // Release the opt group node from its opt group void release(); + Status validate(const OptRule *rule) const; + private: friend ObjectPool; OptGroupNode(graph::PlanNode *node, const OptGroup *group) noexcept; diff --git a/src/graph/optimizer/Optimizer.cpp b/src/graph/optimizer/Optimizer.cpp index e9767ffd0a5..b6909361669 100644 --- a/src/graph/optimizer/Optimizer.cpp +++ b/src/graph/optimizer/Optimizer.cpp @@ -81,6 +81,7 @@ Status Optimizer::doExploration(OptContext *octx, OptGroup *rootGroup) { for (auto rule : ruleSet->rules()) { // Explore until the maximum number of iterations(Rules) is reached NG_RETURN_IF_ERROR(rootGroup->exploreUntilMaxRound(rule)); + NG_RETURN_IF_ERROR(rootGroup->validate(rule)); rootGroup->setUnexplored(rule); } } From d7ff4660fdce4ae9be97145490e4c320018f5d42 Mon Sep 17 00:00:00 2001 From: yixinglu <2520865+yixinglu@users.noreply.github.com> Date: Tue, 6 Dec 2022 11:41:06 +0800 Subject: [PATCH 12/14] Fuck the bug --- src/graph/optimizer/CMakeLists.txt | 2 +- src/graph/optimizer/OptGroup.cpp | 24 +++++++++--------- .../rule/PushFilterDownHashInnerJoinRule.cpp | 5 +++- src/graph/planner/plan/Query.cpp | 25 ++++++++----------- 4 files changed, 27 insertions(+), 29 deletions(-) diff --git a/src/graph/optimizer/CMakeLists.txt b/src/graph/optimizer/CMakeLists.txt index 6a8bc6c67eb..cae7300d687 100644 --- a/src/graph/optimizer/CMakeLists.txt +++ b/src/graph/optimizer/CMakeLists.txt @@ -27,7 +27,7 @@ nebula_add_library( rule/PushFilterDownAggregateRule.cpp rule/PushFilterDownProjectRule.cpp rule/PushFilterDownLeftJoinRule.cpp - # rule/PushFilterDownHashInnerJoinRule.cpp + rule/PushFilterDownHashInnerJoinRule.cpp rule/PushFilterDownInnerJoinRule.cpp rule/PushFilterDownNodeRule.cpp rule/PushFilterDownScanVerticesRule.cpp diff --git a/src/graph/optimizer/OptGroup.cpp b/src/graph/optimizer/OptGroup.cpp index e0b42fbc487..0fcc0fdcb35 100644 --- a/src/graph/optimizer/OptGroup.cpp +++ b/src/graph/optimizer/OptGroup.cpp @@ -126,13 +126,9 @@ void OptGroup::addGroupNode(OptGroupNode *groupNode) { } OptGroupNode *OptGroup::makeGroupNode(PlanNode *node) { - if (outputVar_.empty()) { - outputVar_ = node->outputVar(); - } else { - DCHECK_EQ(outputVar_, node->outputVar()); - } - groupNodes_.emplace_back(OptGroupNode::create(ctx_, node, this)); - return groupNodes_.back(); + auto *gn = OptGroupNode::create(ctx_, node, this); + addGroupNode(gn); + return gn; } Status OptGroup::explore(const OptRule *rule) { @@ -167,9 +163,15 @@ Status OptGroup::explore(const OptRule *rule) { NG_RETURN_IF_ERROR(resStatus); auto result = std::move(resStatus).value(); - // for (auto *ngn : result.newGroupNodes) { - // NG_RETURN_IF_ERROR(validateSubPlan(ngn, rule, leaves)); - // } + for (auto *gn : result.newGroupNodes) { + auto it = std::find(groupNodes_.begin(), groupNodes_.end(), gn); + if (it != groupNodes_.end()) { + return Status::Error("Should not push the OptGroupNode %s into the group in the rule: %s", + gn->node()->toString().c_str(), + rule->toString().c_str()); + } + } + // In some cases, we can apply optimization rules even if the control flow and data flow are // inconsistent. For now, let the optimization rules themselves guarantee correctness. if (result.eraseAll) { @@ -178,7 +180,6 @@ Status OptGroup::explore(const OptRule *rule) { } groupNodes_.clear(); for (auto ngn : result.newGroupNodes) { - // NG_RETURN_IF_ERROR(validateSubPlan(ngn, rule, leaves)); addGroupNode(ngn); } break; @@ -186,7 +187,6 @@ Status OptGroup::explore(const OptRule *rule) { if (!result.newGroupNodes.empty()) { for (auto ngn : result.newGroupNodes) { - // NG_RETURN_IF_ERROR(validateSubPlan(ngn, rule, leaves)); addGroupNode(ngn); } diff --git a/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.cpp b/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.cpp index 4c3a8c274b7..92b2513f80f 100644 --- a/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.cpp +++ b/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.cpp @@ -64,7 +64,10 @@ StatusOr PushFilterDownHashInnerJoinRule::transform( // produce new InnerJoin node auto* newInnerJoinNode = static_cast(oldInnerJoinNode->clone()); auto newJoinGroup = rightFilterUnpicked ? OptGroup::create(octx) : filterGroupNode->group(); - auto newGroupNode = OptGroupNode::create(octx, newInnerJoinNode, newJoinGroup); + // TODO(yee): it's too tricky + auto newGroupNode = rightFilterUnpicked + ? const_cast(newJoinGroup)->makeGroupNode(newInnerJoinNode) + : OptGroupNode::create(octx, newInnerJoinNode, newJoinGroup); newGroupNode->dependsOn(leftGroup); newGroupNode->dependsOn(rightGroup); newInnerJoinNode->setLeftVar(leftGroup->outputVar()); diff --git a/src/graph/planner/plan/Query.cpp b/src/graph/planner/plan/Query.cpp index 6145559f02a..2383987b78a 100644 --- a/src/graph/planner/plan/Query.cpp +++ b/src/graph/planner/plan/Query.cpp @@ -866,14 +866,15 @@ HashJoin::HashJoin(QueryContext* qctx, : BinaryInputNode(qctx, kind, left, right), hashKeys_(std::move(hashKeys)), probeKeys_(std::move(probeKeys)) { - auto lColNames = left->colNames(); - auto& rColNames = right->colNames(); - for (auto& rColName : rColNames) { - if (std::find(lColNames.begin(), lColNames.end(), rColName) == lColNames.end()) { - lColNames.emplace_back(rColName); + if (left && right) { + auto lColNames = left->colNames(); + for (auto& rColName : right->colNames()) { + if (std::find(lColNames.begin(), lColNames.end(), rColName) == lColNames.end()) { + lColNames.emplace_back(rColName); + } } + setColNames(lColNames); } - setColNames(lColNames); } std::unique_ptr HashLeftJoin::explain() const { @@ -883,9 +884,7 @@ std::unique_ptr HashLeftJoin::explain() const { } PlanNode* HashLeftJoin::clone() const { - auto* lnode = left() ? left()->clone() : nullptr; - auto* rnode = right() ? right()->clone() : nullptr; - auto* newLeftJoin = HashLeftJoin::make(qctx_, lnode, rnode); + auto* newLeftJoin = HashLeftJoin::make(qctx_, nullptr, nullptr); newLeftJoin->cloneMembers(*this); return newLeftJoin; } @@ -901,9 +900,7 @@ std::unique_ptr HashInnerJoin::explain() const { } PlanNode* HashInnerJoin::clone() const { - auto* lnode = left() ? left()->clone() : nullptr; - auto* rnode = right() ? right()->clone() : nullptr; - auto* newInnerJoin = HashInnerJoin::make(qctx_, lnode, rnode); + auto* newInnerJoin = HashInnerJoin::make(qctx_, nullptr, nullptr); newInnerJoin->cloneMembers(*this); return newInnerJoin; } @@ -942,9 +939,7 @@ void RollUpApply::cloneMembers(const RollUpApply& r) { } PlanNode* RollUpApply::clone() const { - auto* lnode = left() ? left()->clone() : nullptr; - auto* rnode = right() ? right()->clone() : nullptr; - auto* newRollUpApply = RollUpApply::make(qctx_, lnode, rnode, {}, nullptr); + auto* newRollUpApply = RollUpApply::make(qctx_, nullptr, nullptr, {}, nullptr); newRollUpApply->cloneMembers(*this); return newRollUpApply; } From 172509a956949018742ae3cdb1f59b2d51e4cc77 Mon Sep 17 00:00:00 2001 From: yixinglu <2520865+yixinglu@users.noreply.github.com> Date: Tue, 6 Dec 2022 12:13:34 +0800 Subject: [PATCH 13/14] cancel DCHECK --- src/graph/optimizer/OptGroup.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/graph/optimizer/OptGroup.cpp b/src/graph/optimizer/OptGroup.cpp index 0fcc0fdcb35..569b5ec43c8 100644 --- a/src/graph/optimizer/OptGroup.cpp +++ b/src/graph/optimizer/OptGroup.cpp @@ -137,8 +137,9 @@ Status OptGroup::explore(const OptRule *rule) { } setExplored(rule); - DCHECK(isRootGroup_ || !groupNodesReferenced_.empty()) - << "Current group should be referenced by other group nodes before optimization"; + // TODO(yee): the opt group maybe in the loop body branch + // DCHECK(isRootGroup_ || !groupNodesReferenced_.empty()) + // << "Current group should be referenced by other group nodes before optimization"; for (auto iter = groupNodes_.begin(); iter != groupNodes_.end();) { auto *groupNode = DCHECK_NOTNULL(*iter); From 6aa04ab5185693264a5161eb8d1a6437bdf3aaf7 Mon Sep 17 00:00:00 2001 From: yixinglu <2520865+yixinglu@users.noreply.github.com> Date: Tue, 6 Dec 2022 13:44:06 +0800 Subject: [PATCH 14/14] Fix tck tests --- .../features/match/AllShortestPaths.feature | 72 +++++++++---------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/tests/tck/features/match/AllShortestPaths.feature b/tests/tck/features/match/AllShortestPaths.feature index 16d50c774b8..7a1134f4263 100644 --- a/tests/tck/features/match/AllShortestPaths.feature +++ b/tests/tck/features/match/AllShortestPaths.feature @@ -489,24 +489,24 @@ Feature: allShortestPaths | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | ("Tim Duncan" :player{name: "Tim Duncan"}) | | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | ("Tim Duncan" :player{name: "Tim Duncan"}) | And the execution plan should be: - | id | name | dependencies | operator info | - | 19 | Project | 18 | | - | 18 | HashInnerJoin | 10,17 | | - | 10 | Project | 9 | | - | 9 | BiCartesianProduct | 24,25 | | - | 24 | AppendVertices | 20 | | - | 20 | IndexScan | 2 | | - | 2 | Start | | | - | 25 | AppendVertices | 21 | | - | 21 | IndexScan | 6 | | - | 6 | Start | | | - | 17 | Project | 16 | | - | 16 | ShortestPath | 15 | | - | 15 | BiCartesianProduct | 12,14 | | - | 12 | Project | 11 | | - | 11 | Argument | | | - | 14 | Project | 13 | | - | 13 | Argument | | | + | id | name | dependencies | operator info | + | 19 | Project | 18 | | + | 18 | HashInnerJoin | 10,17 | | + | 10 | Project | 9 | | + | 9 | CrossJoin | 24,25 | | + | 24 | AppendVertices | 20 | | + | 20 | IndexScan | 2 | | + | 2 | Start | | | + | 25 | AppendVertices | 21 | | + | 21 | IndexScan | 6 | | + | 6 | Start | | | + | 17 | Project | 16 | | + | 16 | ShortestPath | 15 | | + | 15 | CrossJoin | 12,14 | | + | 12 | Project | 11 | | + | 11 | Argument | | | + | 14 | Project | 13 | | + | 13 | Argument | | | When profiling query: """ MATCH (a:player{name:'Tim Duncan'}), (b:player{name:'Tony Parker'}) @@ -519,21 +519,21 @@ Feature: allShortestPaths | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | ("Tim Duncan" :player{name: "Tim Duncan"}) | | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | ("Tim Duncan" :player{name: "Tim Duncan"}) | And the execution plan should be: - | id | name | dependencies | operator info | - | 19 | Project | 18 | | - | 18 | BiLeftJoin | 10,17 | | - | 10 | Project | 9 | | - | 9 | BiCartesianProduct | 24,25 | | - | 24 | AppendVertices | 20 | | - | 20 | IndexScan | 2 | | - | 2 | Start | | | - | 25 | AppendVertices | 21 | | - | 21 | IndexScan | 6 | | - | 6 | Start | | | - | 17 | Project | 16 | | - | 16 | ShortestPath | 15 | | - | 15 | BiCartesianProduct | 12,14 | | - | 12 | Project | 11 | | - | 11 | Argument | | | - | 14 | Project | 13 | | - | 13 | Argument | | | + | id | name | dependencies | operator info | + | 19 | Project | 18 | | + | 18 | HashLeftJoin | 10,17 | | + | 10 | Project | 9 | | + | 9 | CrossJoin | 24,25 | | + | 24 | AppendVertices | 20 | | + | 20 | IndexScan | 2 | | + | 2 | Start | | | + | 25 | AppendVertices | 21 | | + | 21 | IndexScan | 6 | | + | 6 | Start | | | + | 17 | Project | 16 | | + | 16 | ShortestPath | 15 | | + | 15 | CrossJoin | 12,14 | | + | 12 | Project | 11 | | + | 11 | Argument | | | + | 14 | Project | 13 | | + | 13 | Argument | | |