From 444a1f194c838fee1e87f1e2dff6f74a70bd4665 Mon Sep 17 00:00:00 2001 From: "jie.wang" <38901892+jievince@users.noreply.github.com> Date: Sat, 8 Oct 2022 17:30:12 +0800 Subject: [PATCH] optimize for the last step of go n steps plan (#4690) * optimize for the last step of go n steps plan * improve some comments --- src/graph/context/ast/QueryAstContext.h | 5 +- .../executor/query/GetDstBySrcExecutor.h | 2 +- src/graph/planner/ngql/GoPlanner.cpp | 65 +++++++++++++------ src/graph/planner/ngql/GoPlanner.h | 2 + src/graph/visitor/DeducePropsVisitor.h | 4 ++ tests/tck/features/go/SimpleCase.feature | 59 ++++++++++++++++- 6 files changed, 115 insertions(+), 22 deletions(-) diff --git a/src/graph/context/ast/QueryAstContext.h b/src/graph/context/ast/QueryAstContext.h index 3e5146a318c..80fb21c9326 100644 --- a/src/graph/context/ast/QueryAstContext.h +++ b/src/graph/context/ast/QueryAstContext.h @@ -93,8 +93,11 @@ struct GoContext final : AstContext { bool joinInput{false}; // true when $$.tag.prop exist bool joinDst{false}; + // true when yield clause only yield distinct dst id + bool onlyYieldDistinctDstId{false}; + // true when edge props only use dst id + bool edgePropsOnlyUseDstId{false}; // Optimize for some simple go sentence which only need dst id. - // eg. GO 1 TO N STEPS FROM "A" OVER like YIELD DISTINCT like._dst bool isSimple{false}; ExpressionProps exprProps; diff --git a/src/graph/executor/query/GetDstBySrcExecutor.h b/src/graph/executor/query/GetDstBySrcExecutor.h index 994f76d0caf..df8f046f6d6 100644 --- a/src/graph/executor/query/GetDstBySrcExecutor.h +++ b/src/graph/executor/query/GetDstBySrcExecutor.h @@ -8,7 +8,7 @@ #include "graph/executor/StorageAccessExecutor.h" #include "graph/planner/plan/Query.h" -// get the dst id of the src id +// Get the dst id of the src id. The dst is is partially deduplicated on the storage side. namespace nebula { namespace graph { class GetDstBySrcExecutor final : public StorageAccessExecutor { diff --git a/src/graph/planner/ngql/GoPlanner.cpp b/src/graph/planner/ngql/GoPlanner.cpp index e2fbf344028..c4f5e7abe25 100644 --- a/src/graph/planner/ngql/GoPlanner.cpp +++ b/src/graph/planner/ngql/GoPlanner.cpp @@ -310,13 +310,28 @@ PlanNode* GoPlanner::buildLastStepJoinPlan(PlanNode* gn, PlanNode* join) { PlanNode* GoPlanner::lastStep(PlanNode* dep, PlanNode* join) { auto qctx = goCtx_->qctx; PlanNode* scan = nullptr; - if (goCtx_->isSimple) { + + if (!goCtx_->joinInput && goCtx_->limits.empty() && goCtx_->onlyYieldDistinctDstId && + !goCtx_->exprProps.hasSrcTagProperty() && goCtx_->edgePropsOnlyUseDstId) { auto* gd = GetDstBySrc::make(qctx, dep, goCtx_->space.id); gd->setSrc(goCtx_->from.src); gd->setEdgeTypes(buildEdgeTypes()); gd->setInputVar(goCtx_->vidsVar); - gd->setColNames({kDst}); - scan = gd; + gd->setColNames(goCtx_->colNames); + auto* dedup = Dedup::make(qctx, gd); + dedup->setColNames(goCtx_->colNames); + scan = dedup; + + auto* root = goCtx_->joinDst ? buildJoinDstPlan(scan) : scan; + if (goCtx_->filter != nullptr) { + root = Filter::make(qctx, root, goCtx_->filter); + } + if (goCtx_->joinDst) { + goCtx_->yieldExpr->columns()[0]->setExpr(ColumnExpression::make(qctx->objPool(), 0)); + root = Project::make(qctx, root, goCtx_->yieldExpr); + } + root->setColNames(std::move(goCtx_->colNames)); + return root; } else { auto* gn = GetNeighbors::make(qctx, dep, goCtx_->space.id); gn->setSrc(goCtx_->from.src); @@ -324,27 +339,24 @@ PlanNode* GoPlanner::lastStep(PlanNode* dep, PlanNode* join) { gn->setEdgeProps(buildEdgeProps(false)); gn->setInputVar(goCtx_->vidsVar); scan = gn; - } - const auto& steps = goCtx_->steps; - auto* sampleLimit = buildSampleLimit(scan, steps.isMToN() ? steps.nSteps() : steps.steps()); + const auto& steps = goCtx_->steps; + auto* sampleLimit = buildSampleLimit(scan, steps.isMToN() ? steps.nSteps() : steps.steps()); - auto* root = buildLastStepJoinPlan(sampleLimit, join); + auto* root = buildLastStepJoinPlan(sampleLimit, join); - if (goCtx_->filter != nullptr) { - root = Filter::make(qctx, root, goCtx_->filter); - } + if (goCtx_->filter != nullptr) { + root = Filter::make(qctx, root, goCtx_->filter); + } - if (!goCtx_->isSimple) { root = Project::make(qctx, root, goCtx_->yieldExpr); - } - root->setColNames(std::move(goCtx_->colNames)); + root->setColNames(std::move(goCtx_->colNames)); - if (goCtx_->distinct) { - root = Dedup::make(qctx, root); + if (goCtx_->distinct) { + root = Dedup::make(qctx, root); + } + return root; } - - return root; } PlanNode* GoPlanner::buildOneStepJoinPlan(PlanNode* gn) { @@ -595,6 +607,8 @@ StatusOr GoPlanner::transform(AstContext* astCtx) { auto qctx = goCtx_->qctx; goCtx_->joinInput = goCtx_->from.fromType != FromType::kInstantExpr; goCtx_->joinDst = !goCtx_->exprProps.dstTagProps().empty(); + goCtx_->onlyYieldDistinctDstId = onlyYieldDistinctDstId(); + goCtx_->edgePropsOnlyUseDstId = edgePropsOnlyUseDstId(); goCtx_->isSimple = isSimpleCase(); if (goCtx_->isSimple) { // We don't need to do a inner join in such case. @@ -626,18 +640,31 @@ StatusOr GoPlanner::transform(AstContext* astCtx) { } bool GoPlanner::isSimpleCase() { - if (goCtx_->joinDst || goCtx_->filter || !goCtx_->distinct || !goCtx_->limits.empty()) { + if (!goCtx_->onlyYieldDistinctDstId) { + return false; + } + if (goCtx_->joinDst || goCtx_->filter || !goCtx_->limits.empty()) { return false; } auto& exprProps = goCtx_->exprProps; if (!exprProps.srcTagProps().empty()) return false; if (!exprProps.dstTagProps().empty()) return false; - for (auto& edgeProp : exprProps.edgeProps()) { + + return goCtx_->edgePropsOnlyUseDstId; +} + +bool GoPlanner::edgePropsOnlyUseDstId() { + for (auto& edgeProp : goCtx_->exprProps.edgeProps()) { auto props = edgeProp.second; if (props.size() != 1) return false; if (props.find(kDst) == props.end()) return false; } + return true; +} + +bool GoPlanner::onlyYieldDistinctDstId() { + if (!goCtx_->distinct) return false; if (goCtx_->yieldExpr->columns().size() != 1) { return false; } diff --git a/src/graph/planner/ngql/GoPlanner.h b/src/graph/planner/ngql/GoPlanner.h index 9217666ad42..cd196cc0422 100644 --- a/src/graph/planner/ngql/GoPlanner.h +++ b/src/graph/planner/ngql/GoPlanner.h @@ -90,6 +90,8 @@ class GoPlanner final : public Planner { Expression* stepSampleLimit(); bool isSimpleCase(); + bool edgePropsOnlyUseDstId(); + bool onlyYieldDistinctDstId(); std::vector buildEdgeTypes(); private: diff --git a/src/graph/visitor/DeducePropsVisitor.h b/src/graph/visitor/DeducePropsVisitor.h index b556edce78a..6d6db6acd12 100644 --- a/src/graph/visitor/DeducePropsVisitor.h +++ b/src/graph/visitor/DeducePropsVisitor.h @@ -54,6 +54,10 @@ class ExpressionProps final { return !inputProps_.empty() || !varProps_.empty(); } + bool hasSrcTagProperty() const { + return !srcTagProps_.empty(); + } + bool hasSrcDstTagProperty() const { return !srcTagProps_.empty() || !dstTagProps_.empty(); } diff --git a/tests/tck/features/go/SimpleCase.feature b/tests/tck/features/go/SimpleCase.feature index 61131763c63..238c3955b40 100644 --- a/tests/tck/features/go/SimpleCase.feature +++ b/tests/tck/features/go/SimpleCase.feature @@ -39,7 +39,46 @@ Feature: Simple case | 2 | GetDstBySrc | 1 | | | 1 | Start | | | | 0 | Start | | | - # The last step degenerates to `GetNeighbors` when yield or filter properties other than `_dst` + When profiling query: + """ + GO 3 STEPS FROM "Tony Parker" OVER serve BIDIRECT WHERE $$.team.name != "Lakers" YIELD DISTINCT id($$) | YIELD count(*) + """ + Then the result should be, in any order, with relax comparison: + | count(*) | + | 21 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 12 | Aggregate | 11 | | + | 11 | Project | 10 | | + | 10 | Filter | 9 | | + | 9 | LeftJoin | 8 | | + | 8 | Project | 7 | | + | 7 | GetVertices | 6 | | + | 6 | Dedup | 5 | | + | 5 | GetDstBySrc | 4 | | + | 4 | Loop | 0 | {"loopBody": "3"} | + | 3 | Dedup | 2 | | + | 2 | GetDstBySrc | 1 | | + | 1 | Start | | | + | 0 | Start | | | + # The last step degenerates to `GetNeighbors` when the yield clause is not `YIELD DISTINCT id($$)` + When profiling query: + """ + GO 3 STEPS FROM "Tony Parker" OVER serve BIDIRECT YIELD id($$) AS dst | YIELD count(*) + """ + Then the result should be, in any order, with relax comparison: + | count(*) | + | 65 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 7 | Aggregate | 6 | | + | 6 | Project | 5 | | + | 5 | GetNeighbors | 4 | | + | 4 | Loop | 0 | {"loopBody": "3"} | + | 3 | Dedup | 2 | | + | 2 | GetDstBySrc | 1 | | + | 1 | Start | | | + | 0 | Start | | | When profiling query: """ GO 3 STEPS FROM "Tony Parker" OVER serve BIDIRECT YIELD id($$) AS dst | YIELD count(*) @@ -57,6 +96,24 @@ Feature: Simple case | 2 | GetDstBySrc | 1 | | | 1 | Start | | | | 0 | Start | | | + When profiling query: + """ + GO 3 STEPS FROM "Tony Parker" OVER serve BIDIRECT WHERE $^.player.age > 30 YIELD DISTINCT id($$) AS dst | YIELD count(*) + """ + Then the result should be, in any order, with relax comparison: + | count(*) | + | 22 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 9 | Aggregate | 8 | | + | 8 | Dedup | 7 | | + | 7 | Project | 10 | | + | 10 | GetNeighbors | 4 | | + | 4 | Loop | 0 | {"loopBody": "3"} | + | 3 | Dedup | 2 | | + | 2 | GetDstBySrc | 1 | | + | 1 | Start | | | + | 0 | Start | | | When profiling query: """ GO 3 STEPS FROM "Tony Parker" OVER serve BIDIRECT YIELD $$.player.age AS age | YIELD count(*)