From 50a2f463bf5713314dfeb47c6f82dbe097396e5a Mon Sep 17 00:00:00 2001 From: jimingquan Date: Tue, 4 Apr 2023 09:45:02 +0800 Subject: [PATCH 01/20] refactor traverse output (#5464) * refactor traverse output * fix pruneproperties error & none_direct_dst * fix test error * fix shortest path --- src/common/function/FunctionManager.cpp | 9 ++- src/graph/context/ast/CypherAstContext.h | 2 + src/graph/executor/query/TraverseExecutor.cpp | 80 ++++++++++++++----- src/graph/executor/query/TraverseExecutor.h | 1 + .../RemoveAppendVerticesBelowJoinRule.cpp | 2 + src/graph/planner/match/MatchPathPlanner.cpp | 22 +++-- src/graph/planner/match/MatchSolver.cpp | 27 ++++--- src/graph/planner/plan/Query.cpp | 1 + src/graph/planner/plan/Query.h | 9 +++ src/graph/validator/MatchValidator.cpp | 7 +- src/graph/visitor/PrunePropertiesVisitor.cpp | 28 +++++-- .../optimizer/PrunePropertiesRule.feature | 14 ++-- .../RemoveAppendVerticesBelowJoinRule.feature | 28 +++---- 13 files changed, 160 insertions(+), 70 deletions(-) diff --git a/src/common/function/FunctionManager.cpp b/src/common/function/FunctionManager.cpp index 07765e8be8a..c55d867544e 100644 --- a/src/common/function/FunctionManager.cpp +++ b/src/common/function/FunctionManager.cpp @@ -2017,7 +2017,7 @@ FunctionManager::FunctionManager() { // More information of encoding could be found in `NebulaKeyUtils.h` auto &attr = functions_["none_direct_dst"]; attr.minArity_ = 1; - attr.maxArity_ = 1; + attr.maxArity_ = 2; attr.isAlwaysPure_ = true; attr.body_ = [](const auto &args) -> Value { switch (args[0].get().type()) { @@ -2035,6 +2035,13 @@ FunctionManager::FunctionManager() { case Value::Type::LIST: { const auto &listVal = args[0].get().getList().values; if (listVal.empty()) { + if (args.size() == 2) { + if (args[1].get().type() == Value::Type::VERTEX) { + const auto &v = args[1].get().getVertex(); + return v.vid; + } + return Value::kNullBadType; + } return Value::kNullBadType; } auto &lastVal = listVal.back(); diff --git a/src/graph/context/ast/CypherAstContext.h b/src/graph/context/ast/CypherAstContext.h index 1b7ca85c235..47b9a184bf2 100644 --- a/src/graph/context/ast/CypherAstContext.h +++ b/src/graph/context/ast/CypherAstContext.h @@ -51,6 +51,8 @@ struct EdgeInfo { MatchEdge::Direction direction{MatchEdge::Direction::OUT_EDGE}; std::vector types; std::string alias; + // use for construct path struct + std::string innerAlias; const MapExpression* props{nullptr}; Expression* filter{nullptr}; }; diff --git a/src/graph/executor/query/TraverseExecutor.cpp b/src/graph/executor/query/TraverseExecutor.cpp index 3c34b8c6255..9425917b66d 100644 --- a/src/graph/executor/query/TraverseExecutor.cpp +++ b/src/graph/executor/query/TraverseExecutor.cpp @@ -24,6 +24,7 @@ namespace graph { folly::Future TraverseExecutor::execute() { range_ = traverse_->stepRange(); + genPath_ = traverse_->genPath(); NG_RETURN_IF_ERROR(buildRequestVids()); if (vids_.empty()) { DataSet emptyDs; @@ -374,9 +375,12 @@ std::vector TraverseExecutor::buildZeroStepPath() { for (auto& p : prevPaths) { Row row = p; List edgeList; - edgeList.values.emplace_back(vertex); row.values.emplace_back(vertex); - row.values.emplace_back(std::move(edgeList)); + row.values.emplace_back(edgeList); + if (genPath_) { + edgeList.values.emplace_back(vertex); + row.values.emplace_back(std::move(edgeList)); + } result.emplace_back(std::move(row)); } } @@ -384,9 +388,12 @@ std::vector TraverseExecutor::buildZeroStepPath() { for (auto& vertex : initVertices_) { Row row; List edgeList; - edgeList.values.emplace_back(vertex); row.values.emplace_back(vertex); - row.values.emplace_back(std::move(edgeList)); + row.values.emplace_back(edgeList); + if (genPath_) { + edgeList.values.emplace_back(vertex); + row.values.emplace_back(std::move(edgeList)); + } result.emplace_back(std::move(row)); } } @@ -477,38 +484,53 @@ std::vector TraverseExecutor::buildPath(const Value& initVertex, return std::vector(); } - std::vector result; - result.reserve(adjEdges.size()); + std::vector oneStepPath; + oneStepPath.reserve(adjEdges.size()); for (auto& edge : adjEdges) { List edgeList; edgeList.values.emplace_back(edge); Row row; row.values.emplace_back(src); - row.values.emplace_back(std::move(edgeList)); - result.emplace_back(std::move(row)); + // only contain edges + row.values.emplace_back(edgeList); + if (genPath_) { + // contain nodes & edges + row.values.emplace_back(std::move(edgeList)); + } + oneStepPath.emplace_back(std::move(row)); } if (maxStep == 1) { if (traverse_->trackPrevPath()) { - return joinPrevPath(initVertex, result); + return joinPrevPath(initVertex, oneStepPath); } - return result; + return oneStepPath; } size_t step = 2; std::vector newResult; std::queue*> queue; + std::queue*> edgeListQueue; std::list>> holder; for (auto& edge : adjEdges) { auto ptr = std::make_unique>(std::vector({edge})); queue.emplace(ptr.get()); + edgeListQueue.emplace(ptr.get()); holder.emplace_back(std::move(ptr)); } - size_t adjSize = queue.size(); - while (!queue.empty()) { - auto edgeListPtr = queue.front(); + + size_t adjSize = edgeListQueue.size(); + while (!edgeListQueue.empty()) { + auto edgeListPtr = edgeListQueue.front(); auto& dst = edgeListPtr->back().getEdge().dst; - queue.pop(); + edgeListQueue.pop(); + + std::vector* vertexEdgeListPtr = nullptr; + if (genPath_) { + vertexEdgeListPtr = queue.front(); + queue.pop(); + } + --adjSize; auto dstIter = adjList_.find(dst); if (dstIter == adjList_.end()) { @@ -516,7 +538,7 @@ std::vector TraverseExecutor::buildPath(const Value& initVertex, if (++step > maxStep) { break; } - adjSize = queue.size(); + adjSize = edgeListQueue.size(); } continue; } @@ -527,29 +549,44 @@ std::vector TraverseExecutor::buildPath(const Value& initVertex, continue; } auto newEdgeListPtr = std::make_unique>(*edgeListPtr); - newEdgeListPtr->emplace_back(dstIter->first); newEdgeListPtr->emplace_back(edge); + std::unique_ptr> newVertexEdgeListPtr = nullptr; + if (genPath_) { + newVertexEdgeListPtr = std::make_unique>(*vertexEdgeListPtr); + newVertexEdgeListPtr->emplace_back(dstIter->first); + newVertexEdgeListPtr->emplace_back(edge); + } + if (step >= minStep) { Row row; row.values.emplace_back(src); + // only contain edges row.values.emplace_back(List(*newEdgeListPtr)); + if (genPath_) { + // contain nodes & edges + row.values.emplace_back(List(*newVertexEdgeListPtr)); + } newResult.emplace_back(std::move(row)); } - queue.emplace(newEdgeListPtr.get()); + edgeListQueue.emplace(newEdgeListPtr.get()); holder.emplace_back(std::move(newEdgeListPtr)); + if (genPath_ && newVertexEdgeListPtr != nullptr) { + queue.emplace(newVertexEdgeListPtr.get()); + holder.emplace_back(std::move(newVertexEdgeListPtr)); + } } if (adjSize == 0) { if (++step > maxStep) { break; } - adjSize = queue.size(); + adjSize = edgeListQueue.size(); } } if (minStep <= 1) { newResult.insert(newResult.begin(), - std::make_move_iterator(result.begin()), - std::make_move_iterator(result.end())); + std::make_move_iterator(oneStepPath.begin()), + std::make_move_iterator(oneStepPath.end())); } if (traverse_->trackPrevPath()) { return joinPrevPath(initVertex, newResult); @@ -570,8 +607,7 @@ std::vector TraverseExecutor::joinPrevPath(const Value& initVertex, if (!hasSameEdgeInPath(prevPath, p)) { // copy Row row = prevPath; - row.values.emplace_back(p.values.front()); - row.values.emplace_back(p.values.back()); + row.values.insert(row.values.end(), p.values.begin(), p.values.end()); newPaths.emplace_back(std::move(row)); } } diff --git a/src/graph/executor/query/TraverseExecutor.h b/src/graph/executor/query/TraverseExecutor.h index 629718b34b5..e8cda9c798f 100644 --- a/src/graph/executor/query/TraverseExecutor.h +++ b/src/graph/executor/query/TraverseExecutor.h @@ -93,6 +93,7 @@ class TraverseExecutor final : public StorageAccessExecutor { private: ObjectPool objPool_; + bool genPath_{false}; VidHashSet vids_; std::vector initVertices_; DataSet result_; diff --git a/src/graph/optimizer/rule/RemoveAppendVerticesBelowJoinRule.cpp b/src/graph/optimizer/rule/RemoveAppendVerticesBelowJoinRule.cpp index 5b0364b7dfd..b9211632d1f 100644 --- a/src/graph/optimizer/rule/RemoveAppendVerticesBelowJoinRule.cpp +++ b/src/graph/optimizer/rule/RemoveAppendVerticesBelowJoinRule.cpp @@ -52,6 +52,7 @@ StatusOr RemoveAppendVerticesBelowJoinRule::transform( auto& avNodeAlias = appendVertices->nodeAlias(); auto& tvEdgeAlias = traverse->edgeAlias(); + auto& tvNodeAlias = traverse->nodeAlias(); auto& leftExprs = join->hashKeys(); auto& rightExprs = join->probeKeys(); @@ -148,6 +149,7 @@ StatusOr RemoveAppendVerticesBelowJoinRule::transform( // and let the new left/inner join use it as right expr auto* args = ArgumentList::make(pool); args->addArgument(InputPropertyExpression::make(pool, tvEdgeAlias)); + args->addArgument(InputPropertyExpression::make(pool, tvNodeAlias)); auto* newPrjExpr = FunctionCallExpression::make(pool, "none_direct_dst", args); auto oldYieldColumns = project->columns()->columns(); diff --git a/src/graph/planner/match/MatchPathPlanner.cpp b/src/graph/planner/match/MatchPathPlanner.cpp index 741c18f785b..24f4f6b5c94 100644 --- a/src/graph/planner/match/MatchPathPlanner.cpp +++ b/src/graph/planner/match/MatchPathPlanner.cpp @@ -26,13 +26,17 @@ MatchPathPlanner::MatchPathPlanner(CypherClauseContextBase* ctx, const Path& pat static std::vector genTraverseColNames(const std::vector& inputCols, const NodeInfo& node, const EdgeInfo& edge, - bool trackPrev) { + bool trackPrev, + bool genPath = false) { std::vector cols; if (trackPrev) { cols = inputCols; } cols.emplace_back(node.alias); cols.emplace_back(edge.alias); + if (genPath) { + cols.emplace_back(edge.innerAlias); + } return cols; } @@ -47,9 +51,12 @@ static std::vector genAppendVColNames(const std::vectoraddArgument(InputPropertyExpression::make(pool, edge.alias)); + args->addArgument(InputPropertyExpression::make(pool, node.alias)); return FunctionCallExpression::make(pool, "none_direct_dst", args); } @@ -218,13 +225,15 @@ Status MatchPathPlanner::leftExpandFromNode(size_t startIndex, SubPlan& subplan) traverse->setEdgeDirection(edge.direction); traverse->setStepRange(stepRange); traverse->setDedup(); + traverse->setGenPath(path_.genPath); // If start from end of the path pattern, the first traverse would not // track the previous path, otherwise, it should. bool trackPrevPath = (startIndex + 1 == nodeInfos.size() ? i != startIndex : true); traverse->setTrackPrevPath(trackPrevPath); - traverse->setColNames(genTraverseColNames(subplan.root->colNames(), node, edge, trackPrevPath)); + traverse->setColNames( + genTraverseColNames(subplan.root->colNames(), node, edge, trackPrevPath, path_.genPath)); subplan.root = traverse; - nextTraverseStart = genNextTraverseStart(qctx->objPool(), edge); + nextTraverseStart = genNextTraverseStart(qctx->objPool(), edge, node); if (expandInto) { // TODO(shylock) optimize to embed filter to Traverse auto* startVid = nodeId(qctx->objPool(), dst); @@ -290,10 +299,11 @@ Status MatchPathPlanner::rightExpandFromNode(size_t startIndex, SubPlan& subplan traverse->setStepRange(stepRange); traverse->setDedup(); traverse->setTrackPrevPath(i != startIndex); + traverse->setGenPath(path_.genPath); traverse->setColNames( - genTraverseColNames(subplan.root->colNames(), node, edge, i != startIndex)); + genTraverseColNames(subplan.root->colNames(), node, edge, i != startIndex, path_.genPath)); subplan.root = traverse; - nextTraverseStart = genNextTraverseStart(qctx->objPool(), edge); + nextTraverseStart = genNextTraverseStart(qctx->objPool(), edge, node); if (expandInto) { auto* startVid = nodeId(qctx->objPool(), dst); auto* endVid = nextTraverseStart; diff --git a/src/graph/planner/match/MatchSolver.cpp b/src/graph/planner/match/MatchSolver.cpp index 8f91271ddb0..8687d906893 100644 --- a/src/graph/planner/match/MatchSolver.cpp +++ b/src/graph/planner/match/MatchSolver.cpp @@ -231,21 +231,17 @@ static YieldColumn* buildVertexColumn(ObjectPool* pool, const std::string& alias return new YieldColumn(InputPropertyExpression::make(pool, alias), alias); } -static YieldColumn* buildEdgeColumn(QueryContext* qctx, const EdgeInfo& edge) { +static YieldColumn* buildEdgeColumn(QueryContext* qctx, const EdgeInfo& edge, bool genPath) { Expression* expr = nullptr; + const std::string& edgeName = genPath ? edge.innerAlias : edge.alias; auto pool = qctx->objPool(); if (edge.range == nullptr) { expr = SubscriptExpression::make( - pool, InputPropertyExpression::make(pool, edge.alias), ConstantExpression::make(pool, 0)); + pool, InputPropertyExpression::make(pool, edgeName), ConstantExpression::make(pool, 0)); } else { - auto innerVar = qctx->vctx()->anonVarGen()->getVar(); - auto* args = ArgumentList::make(pool); - args->addArgument(VariableExpression::makeInner(pool, innerVar)); - auto* filter = FunctionCallExpression::make(pool, "is_edge", args); - expr = ListComprehensionExpression::make( - pool, innerVar, InputPropertyExpression::make(pool, edge.alias), filter); + expr = InputPropertyExpression::make(pool, edgeName); } - return new YieldColumn(expr, edge.alias); + return new YieldColumn(expr, edgeName); } // static @@ -262,16 +258,23 @@ void MatchSolver::buildProjectColumns(QueryContext* qctx, const Path& path, SubP } }; - auto addEdge = [columns, &colNames, qctx](auto& edgeInfo) { + auto addEdge = [columns, &colNames, qctx](auto& edgeInfo, bool genPath = false) { if (!edgeInfo.alias.empty() && !edgeInfo.anonymous) { - columns->addColumn(buildEdgeColumn(qctx, edgeInfo)); - colNames.emplace_back(edgeInfo.alias); + columns->addColumn(buildEdgeColumn(qctx, edgeInfo, genPath)); + if (genPath) { + colNames.emplace_back(edgeInfo.innerAlias); + } else { + colNames.emplace_back(edgeInfo.alias); + } } }; for (size_t i = 0; i < edgeInfos.size(); i++) { addNode(nodeInfos[i]); addEdge(edgeInfos[i]); + if (path.genPath) { + addEdge(edgeInfos[i], true); + } } // last vertex diff --git a/src/graph/planner/plan/Query.cpp b/src/graph/planner/plan/Query.cpp index 8d50e1f09a4..5a64b3e0c8d 100644 --- a/src/graph/planner/plan/Query.cpp +++ b/src/graph/planner/plan/Query.cpp @@ -832,6 +832,7 @@ void Traverse::cloneMembers(const Traverse& g) { if (g.tagFilter_ != nullptr) { setTagFilter(g.tagFilter_->clone()); } + genPath_ = g.genPath(); } std::unique_ptr Traverse::explain() const { diff --git a/src/graph/planner/plan/Query.h b/src/graph/planner/plan/Query.h index 60f8dadf859..c130d4361d7 100644 --- a/src/graph/planner/plan/Query.h +++ b/src/graph/planner/plan/Query.h @@ -1713,10 +1713,18 @@ class Traverse final : public GetNeighbors { firstStepFilter_ = filter; } + void setGenPath(bool genPath) { + genPath_ = genPath; + } + Expression* tagFilter() const { return tagFilter_; } + bool genPath() const { + return genPath_; + } + void setTagFilter(Expression* tagFilter) { tagFilter_ = tagFilter; } @@ -1738,6 +1746,7 @@ class Traverse final : public GetNeighbors { // Push down filter in first step Expression* firstStepFilter_{nullptr}; Expression* tagFilter_{nullptr}; + bool genPath_{false}; }; // Append vertices to a path. diff --git a/src/graph/validator/MatchValidator.cpp b/src/graph/validator/MatchValidator.cpp index a49e4fd7ed2..561322b6aaa 100644 --- a/src/graph/validator/MatchValidator.cpp +++ b/src/graph/validator/MatchValidator.cpp @@ -181,7 +181,11 @@ Status MatchValidator::buildPathExpr(const MatchPath *path, auto pathBuild = PathBuildExpression::make(pool); for (size_t i = 0; i < edgeInfos.size(); ++i) { pathBuild->add(InputPropertyExpression::make(pool, nodeInfos[i].alias)); - pathBuild->add(InputPropertyExpression::make(pool, edgeInfos[i].alias)); + if (pathType == MatchPath::PathType::kDefault) { + pathBuild->add(InputPropertyExpression::make(pool, edgeInfos[i].innerAlias)); + } else { + pathBuild->add(InputPropertyExpression::make(pool, edgeInfos[i].alias)); + } } pathBuild->add(InputPropertyExpression::make(pool, nodeInfos.back().alias)); pathInfo.pathBuild = std::move(pathBuild); @@ -332,6 +336,7 @@ Status MatchValidator::buildEdgeInfo(const MatchPath *path, edgeInfos[i].anonymous = anonymous; edgeInfos[i].direction = direction; edgeInfos[i].alias = alias; + edgeInfos[i].innerAlias = "_" + alias; edgeInfos[i].props = props; edgeInfos[i].filter = filter; } diff --git a/src/graph/visitor/PrunePropertiesVisitor.cpp b/src/graph/visitor/PrunePropertiesVisitor.cpp index 9daed3f1626..8cae8f06e1a 100644 --- a/src/graph/visitor/PrunePropertiesVisitor.cpp +++ b/src/graph/visitor/PrunePropertiesVisitor.cpp @@ -218,8 +218,15 @@ void PrunePropertiesVisitor::visitCurrent(Traverse *node) { // The number of output columns of the Traverse operator is at least two(starting point and edge), // which is by design. DCHECK_GE(colNames.size(), 2); - auto &nodeAlias = colNames[colNames.size() - 2]; - auto &edgeAlias = colNames.back(); + size_t nodeIndex = colNames.size() - 2; + size_t edgeIndex = colNames.size() - 1; + if (node->genPath()) { + // if genPath, traverse operator's last column stores list of alternating vertices and edges + nodeIndex = nodeIndex - 1; + edgeIndex = edgeIndex - 1; + } + auto &nodeAlias = colNames[nodeIndex]; + auto &edgeAlias = colNames[edgeIndex]; if (node->vFilter() != nullptr) { status_ = extractPropsFromExpr(node->vFilter(), nodeAlias); @@ -242,13 +249,22 @@ void PrunePropertiesVisitor::pruneCurrent(Traverse *node) { // The number of output columns of the Traverse operator is at least two(starting point and edge), // which is by design. DCHECK_GE(colNames.size(), 2); - auto &nodeAlias = colNames[colNames.size() - 2]; - auto &edgeAlias = colNames.back(); - auto *vertexProps = node->vertexProps(); + size_t nodeIndex = colNames.size() - 2; + size_t edgeIndex = colNames.size() - 1; + size_t innerEdgeIndex = edgeIndex; + if (node->genPath()) { + // if genPath, traverse operator's last column stores list of alternating vertices and edges + nodeIndex = nodeIndex - 1; + edgeIndex = edgeIndex - 1; + } + auto &nodeAlias = colNames[nodeIndex]; + auto &edgeAlias = colNames[edgeIndex]; + auto &innerEdgeAlias = colNames[innerEdgeIndex]; auto &colsSet = propsUsed_.colsSet; auto &vertexPropsMap = propsUsed_.vertexPropsMap; auto &edgePropsMap = propsUsed_.edgePropsMap; + auto *vertexProps = node->vertexProps(); if (colsSet.find(nodeAlias) == colsSet.end()) { auto aliasIter = vertexPropsMap.find(nodeAlias); if (aliasIter == vertexPropsMap.end()) { @@ -295,7 +311,7 @@ void PrunePropertiesVisitor::pruneCurrent(Traverse *node) { } auto *edgeProps = node->edgeProps(); - if (colsSet.find(edgeAlias) != colsSet.end()) { + if (colsSet.find(edgeAlias) != colsSet.end() || colsSet.find(innerEdgeAlias) != colsSet.end()) { // all edge properties are used return; } diff --git a/tests/tck/features/optimizer/PrunePropertiesRule.feature b/tests/tck/features/optimizer/PrunePropertiesRule.feature index 0c4f38f3e80..5731e68177c 100644 --- a/tests/tck/features/optimizer/PrunePropertiesRule.feature +++ b/tests/tck/features/optimizer/PrunePropertiesRule.feature @@ -411,7 +411,7 @@ Feature: Prune Properties rule | 14 | IndexScan | 2 | | | 2 | Start | | | | 11 | Project | 9 | | - | 9 | Traverse | 8 | { "vertexProps": "" } | + | 9 | Traverse | 8 | | | 8 | Argument | | | @distonly @@ -446,9 +446,8 @@ Feature: Prune Properties rule | 24 | And the execution plan should be: | id | name | dependencies | operator info | - | 7 | Aggregate | 6 | | - | 6 | Project | 5 | | - | 5 | AppendVertices | 4 | { "props": "[{\"props\":[\"age\"] }]" } | + | 7 | Aggregate | 8 | | + | 8 | AppendVertices | 4 | { "props": "[{\"props\":[\"age\"] }]" } | | 4 | Traverse | 2 | {"vertexProps": "", "edgeProps": "[{ \"props\": [\"_type\", \"_rank\", \"_dst\"]}]" } | | 2 | Dedup | 1 | | | 1 | PassThrough | 3 | | @@ -464,9 +463,8 @@ Feature: Prune Properties rule | 24 | And the execution plan should be: | id | name | dependencies | operator info | - | 7 | Aggregate | 6 | | - | 6 | Project | 5 | | - | 5 | AppendVertices | 4 | { "props": "[{\"props\":[\"_tag\"] }, {\"props\":[\"_tag\"] }, {\"props\":[\"_tag\"] }]" } | + | 7 | Aggregate | 8 | | + | 8 | AppendVertices | 4 | { "props": "[{\"props\":[\"_tag\"] }, {\"props\":[\"_tag\"] }, {\"props\":[\"_tag\"] }]" } | | 4 | Traverse | 2 | {"vertexProps": "" , "edgeProps": "[{ \"props\": [\"_type\", \"_rank\", \"_dst\"]}]" } | | 2 | Dedup | 1 | | | 1 | PassThrough | 3 | | @@ -588,7 +586,7 @@ Feature: Prune Properties rule | 1 | PassThrough | 3 | | | 3 | Start | | | | 26 | Project | 14 | | - | 14 | Traverse | 13 | {"vertexProps": "", "edgeProps": "[{ \"props\": [\"_src\", \"_type\", \"_rank\", \"_dst\", \"start_year\", \"end_year\"]}]" } | + | 14 | Traverse | 13 | {"edgeProps": "[{ \"props\": [\"_src\", \"_type\", \"_rank\", \"_dst\", \"start_year\", \"end_year\"]}]" } | | 13 | Traverse | 12 | {"vertexProps": "", "edgeProps": "[{ \"props\": [\"_type\", \"_rank\", \"_dst\"]}]" } | | 12 | Traverse | 11 | {"vertexProps": "", "edgeProps": "[{ \"props\": [\"_type\", \"_rank\", \"_dst\"]}, { \"props\": [\"_type\", \"_rank\", \"_dst\"]}]" } | | 11 | Argument | | | diff --git a/tests/tck/features/optimizer/RemoveAppendVerticesBelowJoinRule.feature b/tests/tck/features/optimizer/RemoveAppendVerticesBelowJoinRule.feature index 2d767926e47..c04a130e4b6 100644 --- a/tests/tck/features/optimizer/RemoveAppendVerticesBelowJoinRule.feature +++ b/tests/tck/features/optimizer/RemoveAppendVerticesBelowJoinRule.feature @@ -58,7 +58,7 @@ Feature: Remove AppendVertices Below Join | 2 | Dedup | 1 | | | 1 | PassThrough | 3 | | | 3 | Start | | | - | 15 | Project | 14 | {"columns": ["$-.friend AS friend", "$-.friend2 AS friend2", "none_direct_dst($-.__VAR_3) AS friendTeam"]} | + | 15 | Project | 14 | {"columns": ["$-.friend AS friend", "$-.friend2 AS friend2", "none_direct_dst($-.__VAR_3,$-.friend2) AS friendTeam"]} | | 14 | Traverse | 12 | | | 12 | Traverse | 11 | | | 11 | Argument | | | @@ -76,16 +76,16 @@ Feature: Remove AppendVertices Below Join | me | both | he | | ("Tony Parker") | ("Manu Ginobili") | ("Tim Duncan") | And the execution plan should be: - | id | name | dependencies | operator info | - | 13 | HashInnerJoin | 6,12 | {"hashKeys": ["_joinkey($-.both)"], "probeKeys": ["$-.both"]} | - | 6 | Project | 5 | | - | 5 | AppendVertices | 15 | | - | 15 | Traverse | 2 | | - | 2 | Dedup | 1 | | - | 1 | PassThrough | 3 | | - | 3 | Start | | | - | 12 | Project | 16 | {"columns": ["$-.he AS he", "none_direct_dst($-.__VAR_1) AS both"]} | - | 16 | Traverse | 8 | | - | 8 | Dedup | 7 | | - | 7 | PassThrough | 9 | | - | 9 | Start | | | + | id | name | dependencies | operator info | + | 13 | HashInnerJoin | 6,12 | {"hashKeys": ["_joinkey($-.both)"], "probeKeys": ["$-.both"]} | + | 6 | Project | 5 | | + | 5 | AppendVertices | 15 | | + | 15 | Traverse | 2 | | + | 2 | Dedup | 1 | | + | 1 | PassThrough | 3 | | + | 3 | Start | | | + | 12 | Project | 16 | {"columns": ["$-.he AS he", "none_direct_dst($-.__VAR_1,$-.he) AS both"]} | + | 16 | Traverse | 8 | | + | 8 | Dedup | 7 | | + | 7 | PassThrough | 9 | | + | 9 | Start | | | From 37a24f14a1b648450bb077dfcd9ea6d85125799b Mon Sep 17 00:00:00 2001 From: Ryan Date: Tue, 4 Apr 2023 12:48:26 +0800 Subject: [PATCH 02/20] Change the compaction filter logic to let periodic compaction go through custom compaction filter, to gc expired data (#5447) --- resources/gflags.json | 1 - src/common/meta/GflagsManager.cpp | 1 - src/kvstore/Common.h | 4 +++- src/kvstore/CompactionFilter.h | 22 +++++++--------------- src/kvstore/NebulaStore.cpp | 4 ---- src/storage/CompactionFilter.h | 12 +++++++++++- src/storage/test/CompactionTest.cpp | 8 ++++++++ src/storage/test/IndexWithTTLTest.cpp | 4 ++++ tests/admin/test_configs.py | 1 - 9 files changed, 33 insertions(+), 24 deletions(-) diff --git a/resources/gflags.json b/resources/gflags.json index 3d6a2b43286..bc928e7b306 100644 --- a/resources/gflags.json +++ b/resources/gflags.json @@ -7,7 +7,6 @@ "clean_wal_interval_secs", "wal_ttl", "clean_wal_interval_secs", - "custom_filter_interval_secs", "accept_partial_success", "system_memory_high_watermark_ratio", "num_rows_to_check_memory", diff --git a/src/common/meta/GflagsManager.cpp b/src/common/meta/GflagsManager.cpp index b1e100aabf0..76c14412211 100644 --- a/src/common/meta/GflagsManager.cpp +++ b/src/common/meta/GflagsManager.cpp @@ -58,7 +58,6 @@ std::unordered_map> GflagsManager {"meta_client_retry_times", {cpp2::ConfigMode::MUTABLE, false}}, {"wal_ttl", {cpp2::ConfigMode::MUTABLE, false}}, {"clean_wal_interval_secs", {cpp2::ConfigMode::MUTABLE, false}}, - {"custom_filter_interval_secs", {cpp2::ConfigMode::MUTABLE, false}}, {"accept_partial_success", {cpp2::ConfigMode::MUTABLE, false}}, {"rocksdb_db_options", {cpp2::ConfigMode::MUTABLE, true}}, diff --git a/src/kvstore/Common.h b/src/kvstore/Common.h index 6e8df267da4..02716567c3b 100644 --- a/src/kvstore/Common.h +++ b/src/kvstore/Common.h @@ -34,13 +34,15 @@ class KVFilter { /** * @brief Whether remove the key during compaction * + * @param level * @param spaceId * @param key * @param val * @return true Key will not be removed * @return false Key will be removed */ - virtual bool filter(GraphSpaceID spaceId, + virtual bool filter(int level, + GraphSpaceID spaceId, const folly::StringPiece& key, const folly::StringPiece& val) const = 0; }; diff --git a/src/kvstore/CompactionFilter.h b/src/kvstore/CompactionFilter.h index bb2830f5282..cb73cacc9b3 100644 --- a/src/kvstore/CompactionFilter.h +++ b/src/kvstore/CompactionFilter.h @@ -34,7 +34,7 @@ class KVCompactionFilter final : public rocksdb::CompactionFilter { /** * @brief whether remove the key during compaction * - * @param level Levels of key in rocksdb, not used for now + * @param level Levels of key in rocksdb * @param key Rocksdb key * @param val Rocksdb val * @return true Key will not be removed @@ -45,8 +45,8 @@ class KVCompactionFilter final : public rocksdb::CompactionFilter { const rocksdb::Slice& val, std::string*, bool*) const override { - UNUSED(level); - return kvFilter_->filter(spaceId_, + return kvFilter_->filter(level, + spaceId_, folly::StringPiece(key.data(), key.size()), folly::StringPiece(val.data(), val.size())); } @@ -77,21 +77,14 @@ class KVCompactionFilterFactory : public rocksdb::CompactionFilterFactory { */ std::unique_ptr CreateCompactionFilter( const rocksdb::CompactionFilter::Context& context) override { - auto now = time::WallClock::fastNowInSec(); if (context.is_full_compaction || context.is_manual_compaction) { LOG(INFO) << "Do full/manual compaction!"; - lastRunCustomFilterTimeSec_ = now; - return std::make_unique(spaceId_, createKVFilter()); } else { - if (FLAGS_custom_filter_interval_secs >= 0 && - now - lastRunCustomFilterTimeSec_ > FLAGS_custom_filter_interval_secs) { - LOG(INFO) << "Do custom minor compaction!"; - lastRunCustomFilterTimeSec_ = now; - return std::make_unique(spaceId_, createKVFilter()); - } - LOG(INFO) << "Do default minor compaction!"; - return std::unique_ptr(nullptr); + // No worry, by default flush will not go through the custom compaction filter. + // See CompactionFilterFactory::ShouldFilterTableFileCreation. + LOG(INFO) << "Do automatic or periodic compaction!"; } + return std::make_unique(spaceId_, createKVFilter()); } const char* Name() const override { @@ -102,7 +95,6 @@ class KVCompactionFilterFactory : public rocksdb::CompactionFilterFactory { private: GraphSpaceID spaceId_; - int32_t lastRunCustomFilterTimeSec_ = 0; }; /** diff --git a/src/kvstore/NebulaStore.cpp b/src/kvstore/NebulaStore.cpp index 7d87695a1c4..58e6ad31be6 100644 --- a/src/kvstore/NebulaStore.cpp +++ b/src/kvstore/NebulaStore.cpp @@ -20,10 +20,6 @@ #include "kvstore/listener/elasticsearch/ESListener.h" DEFINE_string(engine_type, "rocksdb", "rocksdb, memory..."); -DEFINE_int32(custom_filter_interval_secs, - 24 * 3600, - "interval to trigger custom compaction, < 0 means always do " - "default minor compaction"); DEFINE_int32(num_workers, 4, "Number of worker threads"); DEFINE_int32(clean_wal_interval_secs, 600, "interval to trigger clean expired wal"); DEFINE_bool(auto_remove_invalid_space, true, "whether remove data of invalid space when restart"); diff --git a/src/storage/CompactionFilter.h b/src/storage/CompactionFilter.h index 253b82414d4..2c47e1576a9 100644 --- a/src/storage/CompactionFilter.h +++ b/src/storage/CompactionFilter.h @@ -16,6 +16,10 @@ #include "storage/CommonUtils.h" #include "storage/StorageFlags.h" +DEFINE_int32(min_level_for_custom_filter, + 4, + "Minimal level compaction which will go through custom compaction filter"); + namespace nebula { namespace storage { @@ -28,9 +32,15 @@ class StorageCompactionFilter final : public kvstore::KVFilter { CHECK_NOTNULL(schemaMan_); } - bool filter(GraphSpaceID spaceId, + bool filter(int level, + GraphSpaceID spaceId, const folly::StringPiece& key, const folly::StringPiece& val) const override { + if (level < FLAGS_min_level_for_custom_filter) { + // for upper level such as L0/L1, we don't go through the custom + // validation to achieve better performance + return false; + } if (NebulaKeyUtils::isTag(vIdLen_, key)) { return !tagValid(spaceId, key, val); } else if (NebulaKeyUtils::isEdge(vIdLen_, key)) { diff --git a/src/storage/test/CompactionTest.cpp b/src/storage/test/CompactionTest.cpp index 7c4f8cf9079..0130fa2f8d3 100644 --- a/src/storage/test/CompactionTest.cpp +++ b/src/storage/test/CompactionTest.cpp @@ -18,6 +18,8 @@ #include "storage/test/QueryTestUtils.h" #include "storage/test/TestUtils.h" +DECLARE_int32(min_level_for_custom_filter); + namespace nebula { namespace storage { @@ -167,6 +169,7 @@ TEST(CompactionFilterTest, InvalidSchemaFilterTest) { adhoc->removeTagSchema(spaceId, tagId); LOG(INFO) << "Do compaction"; + FLAGS_min_level_for_custom_filter = -1; auto* ns = dynamic_cast(env->kvstore_); ns->compact(spaceId); @@ -215,6 +218,7 @@ TEST(CompactionFilterTest, TTLFilterDataExpiredTest) { sleep(FLAGS_mock_ttl_duration + 1); LOG(INFO) << "Do compaction"; + FLAGS_min_level_for_custom_filter = -1; auto* ns = dynamic_cast(env->kvstore_); ns->compact(spaceId); @@ -262,6 +266,7 @@ TEST(CompactionFilterTest, TTLFilterDataNotExpiredTest) { checkEdgeData(spaceVidLen, spaceId, 102, parts, env, 18); LOG(INFO) << "Do compaction"; + FLAGS_min_level_for_custom_filter = -1; auto* ns = dynamic_cast(env->kvstore_); ns->compact(spaceId); @@ -323,6 +328,7 @@ TEST(CompactionFilterTest, DropIndexTest) { adIndex->removeTagIndex(spaceId, indexId); LOG(INFO) << "Do compaction"; + FLAGS_min_level_for_custom_filter = -1; auto* ns = dynamic_cast(env->kvstore_); ns->compact(spaceId); @@ -392,6 +398,7 @@ TEST(CompactionFilterTest, TTLFilterDataIndexExpiredTest) { sleep(FLAGS_mock_ttl_duration + 1); LOG(INFO) << "Do compaction"; + FLAGS_min_level_for_custom_filter = -1; auto* ns = dynamic_cast(env->kvstore_); ns->compact(spaceId); @@ -460,6 +467,7 @@ TEST(CompactionFilterTest, TTLFilterDataIndexNotExpiredTest) { checkIndexData(spaceId, 102, 6, env, 18); LOG(INFO) << "Do compaction"; + FLAGS_min_level_for_custom_filter = -1; auto* ns = dynamic_cast(env->kvstore_); ns->compact(spaceId); diff --git a/src/storage/test/IndexWithTTLTest.cpp b/src/storage/test/IndexWithTTLTest.cpp index 86f6e9ae7d6..8efe0b0817d 100644 --- a/src/storage/test/IndexWithTTLTest.cpp +++ b/src/storage/test/IndexWithTTLTest.cpp @@ -24,6 +24,8 @@ #include "storage/mutate/UpdateEdgeProcessor.h" #include "storage/mutate/UpdateVertexProcessor.h" +DECLARE_int32(min_level_for_custom_filter); + namespace nebula { namespace storage { @@ -182,6 +184,7 @@ TEST(IndexWithTTLTest, AddVerticesIndexWithTTL) { sleep(2); LOG(INFO) << "Do compaction"; + FLAGS_min_level_for_custom_filter = -1; auto* ns = dynamic_cast(env->kvstore_); ns->compact(1); @@ -229,6 +232,7 @@ TEST(IndexWithTTLTest, AddEdgesIndexWithTTL) { sleep(2); LOG(INFO) << "Do compaction"; + FLAGS_min_level_for_custom_filter = -1; auto* ns = dynamic_cast(env->kvstore_); ns->compact(1); diff --git a/tests/admin/test_configs.py b/tests/admin/test_configs.py index 8a28bd1c79b..ab39049b444 100644 --- a/tests/admin/test_configs.py +++ b/tests/admin/test_configs.py @@ -86,7 +86,6 @@ def test_configs(self): ['STORAGE', 'v', 'int', 'MUTABLE', 3], ['STORAGE', 'wal_ttl', 'int', 'MUTABLE', 14400], ['STORAGE', 'minloglevel', 'int', 'MUTABLE', 0], - ['STORAGE', 'custom_filter_interval_secs', 'int', 'MUTABLE', 86400], ['STORAGE', 'heartbeat_interval_secs', 'int', 'MUTABLE', 1], ['STORAGE', 'meta_client_retry_times', 'int', 'MUTABLE', 3], ['STORAGE', 'rocksdb_db_options', 'map', 'MUTABLE', {}], From 4be13b0cfa72e6cb8b37654348a17a221c972de2 Mon Sep 17 00:00:00 2001 From: Yee <2520865+yixinglu@users.noreply.github.com> Date: Tue, 4 Apr 2023 17:13:51 +0800 Subject: [PATCH 03/20] Push filter down cross join (#5473) * fix comment * push down filter through cross join --------- Co-authored-by: Sophie <84560950+Sophie-Xie@users.noreply.github.com> --- .../algo/CartesianProductExecutor.cpp | 1 + src/graph/optimizer/CMakeLists.txt | 1 + .../rule/PushFilterDownCrossJoinRule.cpp | 139 ++++++++++++++++++ .../rule/PushFilterDownCrossJoinRule.h | 37 +++++ src/graph/planner/match/MatchSolver.cpp | 3 +- src/graph/planner/match/StartVidFinder.h | 2 +- src/graph/planner/plan/Algo.cpp | 28 ---- src/graph/planner/plan/Algo.h | 26 ---- src/graph/planner/plan/Query.cpp | 28 ++++ src/graph/planner/plan/Query.h | 27 ++++ .../PushFilterDownCrossJoinRule.feature | 32 ++++ 11 files changed, 267 insertions(+), 57 deletions(-) create mode 100644 src/graph/optimizer/rule/PushFilterDownCrossJoinRule.cpp create mode 100644 src/graph/optimizer/rule/PushFilterDownCrossJoinRule.h create mode 100644 tests/tck/features/optimizer/PushFilterDownCrossJoinRule.feature diff --git a/src/graph/executor/algo/CartesianProductExecutor.cpp b/src/graph/executor/algo/CartesianProductExecutor.cpp index 147b60e5af1..b48ab7a4bcc 100644 --- a/src/graph/executor/algo/CartesianProductExecutor.cpp +++ b/src/graph/executor/algo/CartesianProductExecutor.cpp @@ -5,6 +5,7 @@ #include "graph/executor/algo/CartesianProductExecutor.h" #include "graph/planner/plan/Algo.h" +#include "graph/planner/plan/Query.h" namespace nebula { namespace graph { diff --git a/src/graph/optimizer/CMakeLists.txt b/src/graph/optimizer/CMakeLists.txt index d53fa00498e..2d98c2c1a14 100644 --- a/src/graph/optimizer/CMakeLists.txt +++ b/src/graph/optimizer/CMakeLists.txt @@ -10,6 +10,7 @@ nebula_add_library( OptGroup.cpp OptRule.cpp OptContext.cpp + rule/PushFilterDownCrossJoinRule.cpp rule/PushFilterDownGetNbrsRule.cpp rule/RemoveNoopProjectRule.cpp rule/CombineFilterRule.cpp diff --git a/src/graph/optimizer/rule/PushFilterDownCrossJoinRule.cpp b/src/graph/optimizer/rule/PushFilterDownCrossJoinRule.cpp new file mode 100644 index 00000000000..9de2d8ad516 --- /dev/null +++ b/src/graph/optimizer/rule/PushFilterDownCrossJoinRule.cpp @@ -0,0 +1,139 @@ +/* Copyright (c) 2023 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include "graph/optimizer/rule/PushFilterDownCrossJoinRule.h" + +#include "graph/optimizer/OptContext.h" +#include "graph/optimizer/OptGroup.h" +#include "graph/planner/plan/PlanNode.h" +#include "graph/planner/plan/Query.h" +#include "graph/util/ExpressionUtils.h" + +using nebula::graph::CrossJoin; +using nebula::graph::ExpressionUtils; +using nebula::graph::Filter; +using nebula::graph::PlanNode; +using nebula::graph::QueryContext; + +namespace nebula { +namespace opt { + +std::unique_ptr PushFilterDownCrossJoinRule::kInstance = + std::unique_ptr(new PushFilterDownCrossJoinRule()); + +PushFilterDownCrossJoinRule::PushFilterDownCrossJoinRule() { + RuleSet::QueryRules().addRule(this); +} + +const Pattern& PushFilterDownCrossJoinRule::pattern() const { + static Pattern pattern = Pattern::create( + PlanNode::Kind::kFilter, + {Pattern::create( + PlanNode::Kind::kCrossJoin, + {Pattern::create(PlanNode::Kind::kUnknown), Pattern::create(PlanNode::Kind::kUnknown)})}); + return pattern; +} + +StatusOr PushFilterDownCrossJoinRule::transform( + OptContext* octx, const MatchedResult& matched) const { + auto* filterGroupNode = matched.node; + auto* oldFilterNode = filterGroupNode->node(); + DCHECK_EQ(oldFilterNode->kind(), PlanNode::Kind::kFilter); + + auto* crossJoinNode = matched.planNode({0, 0}); + DCHECK_EQ(crossJoinNode->kind(), PlanNode::Kind::kCrossJoin); + auto* oldCrossJoinNode = static_cast(crossJoinNode); + + const auto* condition = static_cast(oldFilterNode)->condition(); + DCHECK(condition); + + const auto& leftResult = matched.result({0, 0, 0}); + const auto& rightResult = matched.result({0, 0, 1}); + + Expression *leftFilterUnpicked = nullptr, *rightFilterUnpicked = nullptr; + OptGroup* leftGroup = pushFilterDownChild(octx, leftResult, condition, &leftFilterUnpicked); + OptGroup* rightGroup = + pushFilterDownChild(octx, rightResult, leftFilterUnpicked, &rightFilterUnpicked); + + if (!leftGroup && !rightGroup) { + return TransformResult::noTransform(); + } + + leftGroup = leftGroup ? leftGroup : const_cast(leftResult.node->group()); + rightGroup = rightGroup ? rightGroup : const_cast(rightResult.node->group()); + + // produce new CrossJoin node + auto* newCrossJoinNode = static_cast(oldCrossJoinNode->clone()); + auto newJoinGroup = rightFilterUnpicked ? OptGroup::create(octx) : filterGroupNode->group(); + // TODO(yee): it's too tricky + auto newGroupNode = rightFilterUnpicked + ? const_cast(newJoinGroup)->makeGroupNode(newCrossJoinNode) + : OptGroupNode::create(octx, newCrossJoinNode, newJoinGroup); + newGroupNode->dependsOn(leftGroup); + newGroupNode->dependsOn(rightGroup); + newCrossJoinNode->setLeftVar(leftGroup->outputVar()); + newCrossJoinNode->setRightVar(rightGroup->outputVar()); + + if (rightFilterUnpicked) { + auto newFilterNode = Filter::make(octx->qctx(), nullptr, rightFilterUnpicked); + newFilterNode->setOutputVar(oldFilterNode->outputVar()); + newFilterNode->setColNames(oldFilterNode->colNames()); + newFilterNode->setInputVar(newCrossJoinNode->outputVar()); + newGroupNode = OptGroupNode::create(octx, newFilterNode, filterGroupNode->group()); + newGroupNode->dependsOn(const_cast(newJoinGroup)); + } else { + newCrossJoinNode->setOutputVar(oldFilterNode->outputVar()); + newCrossJoinNode->setColNames(oldCrossJoinNode->colNames()); + } + + TransformResult result; + result.eraseAll = true; + result.newGroupNodes.emplace_back(newGroupNode); + return result; +} + +OptGroup* PushFilterDownCrossJoinRule::pushFilterDownChild(OptContext* octx, + const MatchedResult& child, + const Expression* condition, + Expression** unpickedFilter) { + if (!condition) return nullptr; + + const auto* childPlanNode = DCHECK_NOTNULL(child.node->node()); + const auto& colNames = childPlanNode->colNames(); + + // split the `condition` based on whether the varPropExpr comes from the left child + auto picker = [&colNames](const Expression* e) -> bool { + return ExpressionUtils::checkColName(colNames, e); + }; + + Expression* filterPicked = nullptr; + ExpressionUtils::splitFilter(condition, picker, &filterPicked, unpickedFilter); + if (!filterPicked) return nullptr; + + auto* newChildPlanNode = childPlanNode->clone(); + DCHECK_NE(childPlanNode->outputVar(), newChildPlanNode->outputVar()); + newChildPlanNode->setInputVar(childPlanNode->inputVar()); + newChildPlanNode->setColNames(childPlanNode->colNames()); + auto* newChildGroup = OptGroup::create(octx); + auto* newChildGroupNode = newChildGroup->makeGroupNode(newChildPlanNode); + for (auto* g : child.node->dependencies()) { + newChildGroupNode->dependsOn(g); + } + + auto* newFilterNode = Filter::make(octx->qctx(), nullptr, filterPicked); + newFilterNode->setOutputVar(childPlanNode->outputVar()); + newFilterNode->setColNames(colNames); + newFilterNode->setInputVar(newChildPlanNode->outputVar()); + auto* group = OptGroup::create(octx); + group->makeGroupNode(newFilterNode)->dependsOn(newChildGroup); + return group; +} + +std::string PushFilterDownCrossJoinRule::toString() const { + return "PushFilterDownCrossJoinRule"; +} + +} // namespace opt +} // namespace nebula diff --git a/src/graph/optimizer/rule/PushFilterDownCrossJoinRule.h b/src/graph/optimizer/rule/PushFilterDownCrossJoinRule.h new file mode 100644 index 00000000000..21af0c23701 --- /dev/null +++ b/src/graph/optimizer/rule/PushFilterDownCrossJoinRule.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2023 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#ifndef GRAPH_OPTIMIZER_RULE_PUSHFILTERDOWNCROSSJOINRULE_H_ +#define GRAPH_OPTIMIZER_RULE_PUSHFILTERDOWNCROSSJOINRULE_H_ + +#include "graph/optimizer/OptRule.h" + +namespace nebula { +namespace opt { + +// Push down the filter items into the child sub-plan of [[CrossJoin]] +class PushFilterDownCrossJoinRule final : public OptRule { + public: + const Pattern &pattern() const override; + + StatusOr transform(OptContext *octx, + const MatchedResult &matched) const override; + + std::string toString() const override; + + private: + PushFilterDownCrossJoinRule(); + static OptGroup *pushFilterDownChild(OptContext *octx, + const MatchedResult &child, + const Expression *condition, + Expression **unpickedFilter); + + static std::unique_ptr kInstance; +}; + +} // namespace opt +} // namespace nebula + +#endif // GRAPH_OPTIMIZER_RULE_PUSHFILTERDOWNCROSSJOINRULE_H_ diff --git a/src/graph/planner/match/MatchSolver.cpp b/src/graph/planner/match/MatchSolver.cpp index 8687d906893..679829937d7 100644 --- a/src/graph/planner/match/MatchSolver.cpp +++ b/src/graph/planner/match/MatchSolver.cpp @@ -207,8 +207,7 @@ Expression* MatchSolver::makeIndexFilter(const std::string& label, auto* root = relationals[0]; for (auto i = 1u; i < relationals.size(); i++) { - auto* left = root; - root = LogicalExpression::makeAnd(qctx->objPool(), left, relationals[i]); + root = LogicalExpression::makeAnd(qctx->objPool(), root, relationals[i]); } return root; diff --git a/src/graph/planner/match/StartVidFinder.h b/src/graph/planner/match/StartVidFinder.h index 7726ef0b646..531d9dc54f1 100644 --- a/src/graph/planner/match/StartVidFinder.h +++ b/src/graph/planner/match/StartVidFinder.h @@ -28,7 +28,7 @@ using StartVidFinderInstantiateFunc = std::function CartesianProduct::inputVars() const { return varNames; } -std::unique_ptr CrossJoin::explain() const { - return BinaryInputNode::explain(); -} - -PlanNode* CrossJoin::clone() const { - auto* node = make(qctx_); - node->cloneMembers(*this); - return node; -} - -void CrossJoin::cloneMembers(const CrossJoin& r) { - BinaryInputNode::cloneMembers(r); -} - -CrossJoin::CrossJoin(QueryContext* qctx, PlanNode* left, PlanNode* right) - : BinaryInputNode(qctx, Kind::kCrossJoin, left, right) { - auto lColNames = left->colNames(); - auto rColNames = right->colNames(); - lColNames.insert(lColNames.end(), rColNames.begin(), rColNames.end()); - setColNames(lColNames); -} - -void CrossJoin::accept(PlanNodeVisitor* visitor) { - visitor->visit(this); -} - -CrossJoin::CrossJoin(QueryContext* qctx) : BinaryInputNode(qctx, Kind::kCrossJoin) {} - std::unique_ptr Subgraph::explain() const { auto desc = SingleInputNode::explain(); addDescription("src", src_ ? src_->toString() : "", desc.get()); diff --git a/src/graph/planner/plan/Algo.h b/src/graph/planner/plan/Algo.h index 58a5ff83602..74f5f12f14e 100644 --- a/src/graph/planner/plan/Algo.h +++ b/src/graph/planner/plan/Algo.h @@ -437,32 +437,6 @@ class Subgraph final : public SingleInputNode { std::unique_ptr> edgeProps_; }; -class CrossJoin final : public BinaryInputNode { - public: - static CrossJoin* make(QueryContext* qctx, PlanNode* left, PlanNode* right) { - return qctx->objPool()->makeAndAdd(qctx, left, right); - } - - std::unique_ptr explain() const override; - - PlanNode* clone() const override; - - void accept(PlanNodeVisitor* visitor) override; - - private: - friend ObjectPool; - - // used for clone only - static CrossJoin* make(QueryContext* qctx) { - return qctx->objPool()->makeAndAdd(qctx); - } - - void cloneMembers(const CrossJoin& r); - - CrossJoin(QueryContext* qctx, PlanNode* left, PlanNode* right); - // use for clone - explicit CrossJoin(QueryContext* qctx); -}; } // namespace graph } // namespace nebula #endif // GRAPH_PLANNER_PLAN_ALGO_H_ diff --git a/src/graph/planner/plan/Query.cpp b/src/graph/planner/plan/Query.cpp index 5a64b3e0c8d..6dd9c037f48 100644 --- a/src/graph/planner/plan/Query.cpp +++ b/src/graph/planner/plan/Query.cpp @@ -959,6 +959,34 @@ void HashInnerJoin::cloneMembers(const HashInnerJoin& l) { HashJoin::cloneMembers(l); } +std::unique_ptr CrossJoin::explain() const { + return BinaryInputNode::explain(); +} + +PlanNode* CrossJoin::clone() const { + auto* node = make(qctx_); + node->cloneMembers(*this); + return node; +} + +void CrossJoin::cloneMembers(const CrossJoin& r) { + BinaryInputNode::cloneMembers(r); +} + +CrossJoin::CrossJoin(QueryContext* qctx, PlanNode* left, PlanNode* right) + : BinaryInputNode(qctx, Kind::kCrossJoin, left, right) { + auto lColNames = left->colNames(); + auto rColNames = right->colNames(); + lColNames.insert(lColNames.end(), rColNames.begin(), rColNames.end()); + setColNames(lColNames); +} + +void CrossJoin::accept(PlanNodeVisitor* visitor) { + visitor->visit(this); +} + +CrossJoin::CrossJoin(QueryContext* qctx) : BinaryInputNode(qctx, Kind::kCrossJoin) {} + std::unique_ptr RollUpApply::explain() const { auto desc = BinaryInputNode::explain(); addDescription("compareCols", folly::toJson(util::toJson(compareCols_)), desc.get()); diff --git a/src/graph/planner/plan/Query.h b/src/graph/planner/plan/Query.h index c130d4361d7..fdd055a89af 100644 --- a/src/graph/planner/plan/Query.h +++ b/src/graph/planner/plan/Query.h @@ -1899,6 +1899,33 @@ class HashInnerJoin final : public HashJoin { void cloneMembers(const HashInnerJoin&); }; +class CrossJoin final : public BinaryInputNode { + public: + static CrossJoin* make(QueryContext* qctx, PlanNode* left, PlanNode* right) { + return qctx->objPool()->makeAndAdd(qctx, left, right); + } + + std::unique_ptr explain() const override; + + PlanNode* clone() const override; + + void accept(PlanNodeVisitor* visitor) override; + + private: + friend ObjectPool; + + // used for clone only + static CrossJoin* make(QueryContext* qctx) { + return qctx->objPool()->makeAndAdd(qctx); + } + + void cloneMembers(const CrossJoin& r); + + CrossJoin(QueryContext* qctx, PlanNode* left, PlanNode* right); + // use for clone + explicit CrossJoin(QueryContext* qctx); +}; + // Roll Up Apply two results from two inputs. class RollUpApply : public BinaryInputNode { public: diff --git a/tests/tck/features/optimizer/PushFilterDownCrossJoinRule.feature b/tests/tck/features/optimizer/PushFilterDownCrossJoinRule.feature new file mode 100644 index 00000000000..ee2263c921c --- /dev/null +++ b/tests/tck/features/optimizer/PushFilterDownCrossJoinRule.feature @@ -0,0 +1,32 @@ +# Copyright (c) 2023 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License. +Feature: Push Filter down HashInnerJoin rule + + Background: + Given a graph with space named "nba" + + Scenario: push filter down HashInnerJoin + When profiling query: + """ + with ['Tim Duncan', 'Tony Parker'] as id_list + match (v1:player)-[e]-(v2:player) + where id(v1) in ['Tim Duncan', 'Tony Parker'] AND id(v2) in ['Tim Duncan', 'Tony Parker'] + return count(e) + """ + Then the result should be, in any order: + | count(e) | + | 8 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 11 | Aggregate | 14 | | + | 14 | CrossJoin | 1,16 | | + | 1 | Project | 2 | | + | 2 | Start | | | + | 16 | Project | 15 | | + | 15 | Filter | 18 | {"condition": "((id($-.v1) IN [\"Tim Duncan\",\"Tony Parker\"]) AND (id($-.v2) IN [\"Tim Duncan\",\"Tony Parker\"]))"} | + | 18 | AppendVertices | 17 | | + | 17 | Traverse | 4 | | + | 4 | Dedup | 3 | | + | 3 | PassThrough | 5 | | + | 5 | Start | | | From 10bcb5daa6a9100659605fc675089715f20c8e6d Mon Sep 17 00:00:00 2001 From: jimingquan Date: Tue, 4 Apr 2023 18:10:45 +0800 Subject: [PATCH 04/20] Fix shortest path crash (#5472) --- src/graph/executor/algo/BatchShortestPath.cpp | 3 + .../executor/algo/SingleShortestPath.cpp | 3 + src/graph/validator/MatchValidator.cpp | 11 +- .../features/match/AllShortestPaths.feature | 104 ++++++++++++++++++ .../features/match/SingleShorestPath.feature | 52 +++++++++ 5 files changed, 169 insertions(+), 4 deletions(-) diff --git a/src/graph/executor/algo/BatchShortestPath.cpp b/src/graph/executor/algo/BatchShortestPath.cpp index 1ec6cd6204c..42d69f03bc8 100644 --- a/src/graph/executor/algo/BatchShortestPath.cpp +++ b/src/graph/executor/algo/BatchShortestPath.cpp @@ -16,6 +16,9 @@ namespace graph { folly::Future BatchShortestPath::execute(const HashSet& startVids, const HashSet& endVids, DataSet* result) { + if (maxStep_ == 0) { + return Status::OK(); + } // MemoryTrackerVerified size_t rowSize = init(startVids, endVids); std::vector> futures; diff --git a/src/graph/executor/algo/SingleShortestPath.cpp b/src/graph/executor/algo/SingleShortestPath.cpp index ed0d0cce4b6..90e1bfc9813 100644 --- a/src/graph/executor/algo/SingleShortestPath.cpp +++ b/src/graph/executor/algo/SingleShortestPath.cpp @@ -14,6 +14,9 @@ namespace graph { folly::Future SingleShortestPath::execute(const HashSet& startVids, const HashSet& endVids, DataSet* result) { + if (maxStep_ == 0) { + return Status::OK(); + } size_t rowSize = startVids.size() * endVids.size(); init(startVids, endVids, rowSize); std::vector> futures; diff --git a/src/graph/validator/MatchValidator.cpp b/src/graph/validator/MatchValidator.cpp index 561322b6aaa..d4517705809 100644 --- a/src/graph/validator/MatchValidator.cpp +++ b/src/graph/validator/MatchValidator.cpp @@ -165,10 +165,13 @@ Status MatchValidator::buildPathExpr(const MatchPath *path, return Status::SemanticError( "`shortestPath(...)' only support pattern like (start)-[edge*..hop]-(end)"); } - auto min = edgeInfos.front().range->min(); - if (min != 0 && min != 1) { - return Status::SemanticError( - "`shortestPath(...)' does not support a minimal length different from 0 or 1"); + auto *range = edgeInfos.front().range.get(); + if (range != nullptr) { + auto min = range->min(); + if (min != 0 && min != 1) { + return Status::SemanticError( + "The minimal number of steps for shortestPath() must be either 0 or 1."); + } } pathInfo.pathType = static_cast(pathType); } diff --git a/tests/tck/features/match/AllShortestPaths.feature b/tests/tck/features/match/AllShortestPaths.feature index 766156b7d16..563d90a371d 100644 --- a/tests/tck/features/match/AllShortestPaths.feature +++ b/tests/tck/features/match/AllShortestPaths.feature @@ -6,6 +6,110 @@ Feature: allShortestPaths Background: Given a graph with space named "nba" + Scenario: shortest path invalid step + When executing query: + """ + WITH ["Tim Duncan","Tony Parker"] as list1 + MATCH allShortestPaths((v1:player)-[e*2]-(v2:player)) + WHERE id(v1) in list1 AND id(v2) in list1 + RETURN e + """ + Then a SemanticError should be raised at runtime: The minimal number of steps for shortestPath() must be either 0 or 1. + When executing query: + """ + WITH ["Tim Duncan","Tony Parker"] as list1 + MATCH allShortestPaths((v1:player)-[e*2..4]-(v2:player)) + WHERE id(v1) in list1 AND id(v2) in list1 + RETURN e + """ + Then a SemanticError should be raised at runtime: The minimal number of steps for shortestPath() must be either 0 or 1. + When executing query: + """ + WITH ["Tim Duncan","Tony Parker"] as list1 + MATCH allShortestPaths((v1:player)-[e]->(b)--(v2:player)) + WHERE id(v1) in list1 AND id(v2) in list1 + RETURN e + """ + Then a SemanticError should be raised at runtime: `shortestPath(...)' only support pattern like (start)-[edge*..hop]-(end) + When executing query: + """ + WITH ["Tim Duncan","Tony Parker"] as list1 + MATCH allShortestPaths((v1:player)-[e]->(b)-[e2:like]-(v2:player)) + WHERE id(v1) in list1 AND id(v2) in list1 + RETURN e + """ + Then a SemanticError should be raised at runtime: `shortestPath(...)' only support pattern like (start)-[edge*..hop]-(end) + + Scenario: zero step shortest path + When executing query: + """ + WITH ["Tim Duncan","Tony Parker"] as list1 + MATCH allShortestPaths((v1:player)-[e*0]-(v2:player)) + WHERE id(v1) in list1 AND id(v2) in list1 + RETURN e + """ + Then the result should be, in any order, with relax comparison: + | e | + When executing query: + """ + MATCH allShortestPaths((v1:player{name:"Tim Duncan"})-[e*0]-(v2:player{name:"Tony Parker"})) + RETURN e + """ + Then the result should be, in any order, with relax comparison: + | e | + + Scenario: one step shortest path + When executing query: + """ + WITH ["Tim Duncan","Tony Parker"] as list1 + MATCH allShortestPaths((v1:player)-[e]-(v2:player)) + WHERE id(v1) in list1 AND id(v2) in list1 + RETURN e + """ + Then the result should be, in any order, with relax comparison: + | e | + | [:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}] | + | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | + | [:teammate "Tony Parker"->"Tim Duncan" @0 {end_year: 2016, start_year: 2001}] | + | [:teammate "Tim Duncan"->"Tony Parker" @0 {end_year: 2016, start_year: 2001}] | + | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | + | [:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}] | + | [:teammate "Tim Duncan"->"Tony Parker" @0 {end_year: 2016, start_year: 2001}] | + | [:teammate "Tony Parker"->"Tim Duncan" @0 {end_year: 2016, start_year: 2001}] | + When executing query: + """ + MATCH allShortestPaths((v1:player{name:"Tim Duncan"})-[e]-(v2:player{name:"Tony Parker"})) + RETURN e + """ + Then the result should be, in any order, with relax comparison: + | e | + | [:teammate "Tony Parker"->"Tim Duncan" @0 {end_year: 2016, start_year: 2001}] | + | [:teammate "Tim Duncan"->"Tony Parker" @0 {end_year: 2016, start_year: 2001}] | + | [:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}] | + | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | + When executing query: + """ + MATCH allShortestPaths((v1:player{name:"Tim Duncan"})-[e*1]-(v2:player{name:"Tony Parker"})) + RETURN e + """ + Then the result should be, in any order, with relax comparison: + | e | + | [[:teammate "Tony Parker"->"Tim Duncan" @0 {end_year: 2016, start_year: 2001}]] | + | [[:teammate "Tim Duncan"->"Tony Parker" @0 {end_year: 2016, start_year: 2001}]] | + | [[:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}]] | + | [[:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}]] | + When executing query: + """ + MATCH allShortestPaths((v1:player{name:"Tim Duncan"})-[e*1..1]-(v2:player{name:"Tony Parker"})) + RETURN e + """ + Then the result should be, in any order, with relax comparison: + | e | + | [[:teammate "Tony Parker"->"Tim Duncan" @0 {end_year: 2016, start_year: 2001}]] | + | [[:teammate "Tim Duncan"->"Tony Parker" @0 {end_year: 2016, start_year: 2001}]] | + | [[:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}]] | + | [[:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}]] | + Scenario: allShortestPaths1 When executing query: """ diff --git a/tests/tck/features/match/SingleShorestPath.feature b/tests/tck/features/match/SingleShorestPath.feature index b7e74473073..78ad83c1662 100644 --- a/tests/tck/features/match/SingleShorestPath.feature +++ b/tests/tck/features/match/SingleShorestPath.feature @@ -6,6 +6,58 @@ Feature: single shortestPath Background: Given a graph with space named "nba" + Scenario: shortest path invalid step + When executing query: + """ + WITH ["Tim Duncan","Tony Parker"] as list1 + MATCH shortestPath((v1:player)-[e*2]-(v2:player)) + WHERE id(v1) in list1 AND id(v2) in list1 + RETURN e + """ + Then a SemanticError should be raised at runtime: The minimal number of steps for shortestPath() must be either 0 or 1. + When executing query: + """ + WITH ["Tim Duncan","Tony Parker"] as list1 + MATCH shortestPath((v1:player)-[e*2..4]-(v2:player)) + WHERE id(v1) in list1 AND id(v2) in list1 + RETURN e + """ + Then a SemanticError should be raised at runtime: The minimal number of steps for shortestPath() must be either 0 or 1. + When executing query: + """ + WITH ["Tim Duncan","Tony Parker"] as list1 + MATCH shortestPath((v1:player)-[e]->(b)--(v2:player)) + WHERE id(v1) in list1 AND id(v2) in list1 + RETURN e + """ + Then a SemanticError should be raised at runtime: `shortestPath(...)' only support pattern like (start)-[edge*..hop]-(end) + When executing query: + """ + WITH ["Tim Duncan","Tony Parker"] as list1 + MATCH shortestPath((v1:player)-[e]->(b)-[e2:like]-(v2:player)) + WHERE id(v1) in list1 AND id(v2) in list1 + RETURN e + """ + Then a SemanticError should be raised at runtime: `shortestPath(...)' only support pattern like (start)-[edge*..hop]-(end) + + Scenario: zero step shortestpath + When executing query: + """ + WITH ["Tim Duncan","Tony Parker"] as list1 + MATCH shortestPath((v1:player)-[e*0]-(v2:player)) + WHERE id(v1) in list1 AND id(v2) in list1 + RETURN e + """ + Then the result should be, in any order, with relax comparison: + | e | + When executing query: + """ + MATCH shortestPath((v1:player{name:"Tim Duncan"})-[e*0]-(v2:player{name:"Tony Parker"})) + RETURN e + """ + Then the result should be, in any order, with relax comparison: + | e | + Scenario: single shortestPath1 When executing query: """ From fb5ea407ac58fc899629569eafc4c8a9f839e39d Mon Sep 17 00:00:00 2001 From: "jie.wang" <38901892+jievince@users.noreply.github.com> Date: Tue, 4 Apr 2023 20:03:20 +0800 Subject: [PATCH 05/20] fix crash of geo (#5475) * fix crash of geo * change log(fatal) to log(error) --- .../rule/GeoPredicateIndexScanBaseRule.cpp | 35 ++++++++++++++----- .../optimizer/rule/PushEFilterDownRule.cpp | 2 +- .../optimizer/rule/PushFilterDownNodeRule.cpp | 6 ++-- .../rule/UnionAllIndexScanBaseRule.cpp | 2 +- tests/tck/features/geo/GeoBase.feature | 7 ++++ 5 files changed, 38 insertions(+), 14 deletions(-) diff --git a/src/graph/optimizer/rule/GeoPredicateIndexScanBaseRule.cpp b/src/graph/optimizer/rule/GeoPredicateIndexScanBaseRule.cpp index 1f6e1cc1bbe..8e285de2349 100644 --- a/src/graph/optimizer/rule/GeoPredicateIndexScanBaseRule.cpp +++ b/src/graph/optimizer/rule/GeoPredicateIndexScanBaseRule.cpp @@ -66,24 +66,39 @@ StatusOr GeoPredicateIndexScanBaseRule::transform( } auto condition = filter->condition(); - DCHECK(graph::ExpressionUtils::isGeoIndexAcceleratedPredicate(condition)); + if (!graph::ExpressionUtils::isGeoIndexAcceleratedPredicate(condition)) { + return TransformResult::noTransform(); + } auto* geoPredicate = static_cast(condition); - DCHECK_GE(geoPredicate->args()->numArgs(), 2); + if (geoPredicate->args()->numArgs() < 2) { + return TransformResult::noTransform(); + } std::string geoPredicateName = geoPredicate->name(); folly::toLowerAscii(geoPredicateName); auto* first = geoPredicate->args()->args()[0]; auto* second = geoPredicate->args()->args()[1]; - DCHECK(first->kind() == Expression::Kind::kTagProperty || - first->kind() == Expression::Kind::kEdgeProperty); - DCHECK(second->kind() == Expression::Kind::kConstant); - const auto& secondVal = static_cast(second)->value(); - DCHECK(secondVal.isGeography()); + if (first->kind() != Expression::Kind::kTagProperty && + first->kind() != Expression::Kind::kEdgeProperty) { + return TransformResult::noTransform(); + } + + if (!graph::ExpressionUtils::isEvaluableExpr(second, qctx)) { + return TransformResult::noTransform(); + } + + auto secondVal = second->eval(graph::QueryExpressionContext(qctx->ectx())()); + if (!secondVal.isGeography()) { + return TransformResult::noTransform(); + } + const auto& geog = secondVal.getGeography(); auto indexItem = indexItems.back(); const auto& fields = indexItem->get_fields(); - DCHECK_EQ(fields.size(), 1); // geo field + if (fields.size() != 1) { + return TransformResult::noTransform(); + } auto& geoField = fields.back(); auto& geoColumnTypeDef = geoField.get_type(); bool isPointColumn = geoColumnTypeDef.geo_shape_ref().has_value() && @@ -108,7 +123,9 @@ StatusOr GeoPredicateIndexScanBaseRule::transform( } else if (geoPredicateName == "st_coveredby") { scanRanges = geoIndex.covers(geog); } else if (geoPredicateName == "st_dwithin") { - DCHECK_GE(geoPredicate->args()->numArgs(), 3); + if (geoPredicate->args()->numArgs() < 3) { + return TransformResult::noTransform(); + } auto* third = geoPredicate->args()->args()[2]; if (!graph::ExpressionUtils::isEvaluableExpr(third, qctx)) { return TransformResult::noTransform(); diff --git a/src/graph/optimizer/rule/PushEFilterDownRule.cpp b/src/graph/optimizer/rule/PushEFilterDownRule.cpp index ac886530210..c97c42239ef 100644 --- a/src/graph/optimizer/rule/PushEFilterDownRule.cpp +++ b/src/graph/optimizer/rule/PushEFilterDownRule.cpp @@ -174,7 +174,7 @@ std::string PushEFilterDownRule::toString() const { ret = EdgePropertyExpression::make(pool, std::move(edgeNameResult).value(), exp->prop()); break; default: - DLOG(FATAL) << "Unexpected expr: " << exp->kind(); + DLOG(ERROR) << "Unexpected expr: " << exp->kind(); } return ret; } diff --git a/src/graph/optimizer/rule/PushFilterDownNodeRule.cpp b/src/graph/optimizer/rule/PushFilterDownNodeRule.cpp index 9c89cd16c63..3b611ec408d 100644 --- a/src/graph/optimizer/rule/PushFilterDownNodeRule.cpp +++ b/src/graph/optimizer/rule/PushFilterDownNodeRule.cpp @@ -55,7 +55,7 @@ StatusOr PushFilterDownNodeRule::transform( auto *append = static_cast(node); vFilter = append->vFilter()->clone(); } else { - DLOG(FATAL) << "Unsupported node kind: " << node->kind(); + DLOG(ERROR) << "Unsupported node kind: " << node->kind(); return TransformResult::noTransform(); } auto visitor = graph::ExtractFilterExprVisitor::makePushGetVertices(pool); @@ -83,7 +83,7 @@ StatusOr PushFilterDownNodeRule::transform( append->setVertexFilter(remainedExpr); append->setFilter(vFilter); } else { - DLOG(FATAL) << "Unsupported node kind: " << newExplore->kind(); + DLOG(ERROR) << "Unsupported node kind: " << newExplore->kind(); return TransformResult::noTransform(); } @@ -111,7 +111,7 @@ bool PushFilterDownNodeRule::match(OptContext *octx, const MatchedResult &matche return false; } } else { - DLOG(FATAL) << "Unexpected node kind: " << node->kind(); + DLOG(ERROR) << "Unexpected node kind: " << node->kind(); return false; } return true; diff --git a/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp b/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp index cc1bb6a41fd..f148386ad4e 100644 --- a/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp +++ b/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp @@ -167,7 +167,7 @@ StatusOr UnionAllIndexScanBaseRule::transform(OptContext* ctx, break; } default: - DLOG(FATAL) << "Invalid expression kind: " << static_cast(conditionType); + DLOG(ERROR) << "Invalid expression kind: " << static_cast(conditionType); return TransformResult::noTransform(); } diff --git a/tests/tck/features/geo/GeoBase.feature b/tests/tck/features/geo/GeoBase.feature index 35861c365b4..ff8c2eb7aa6 100644 --- a/tests/tck/features/geo/GeoBase.feature +++ b/tests/tck/features/geo/GeoBase.feature @@ -20,6 +20,7 @@ Feature: Geo base CREATE EDGE any_shape_edge(geo geography); """ And wait 3 seconds + Given parameters: {"p4": {"s1":"test","s2":"2020-01-01 10:00:00","s3":[1,2,3,4,5],"longitude":[1.0,2.0,3.0],"latitude":[10.1,11.1,12.1]}} Scenario: test geo schema # Desc geo schema @@ -543,6 +544,12 @@ Feature: Geo base | "LINESTRING(3 8, 4.7 73.23)" | | "POLYGON((0 1, 1 2, 2 3, 0 1))" | | "POINT(72.3 84.6)" | + When executing query: + """ + LOOKUP ON any_shape WHERE ST_Distance(any_shape.geo, ST_Point($p4.longitude[0], $p4.latitude[1])) < $p4.latitude[2] YIELD id(vertex) + """ + Then the result should be, in any order: + | id(VERTEX) | When executing query: """ LOOKUP ON any_shape WHERE 8909524.383934560 > ST_Distance(any_shape.geo, ST_Point(3, 8)) YIELD ST_ASText(any_shape.geo); From 14e8d904825427ce69b1fd361ae6357156092f78 Mon Sep 17 00:00:00 2001 From: George <58841610+Shinji-IkariG@users.noreply.github.com> Date: Thu, 6 Apr 2023 11:25:58 +0800 Subject: [PATCH 06/20] fix miss arg $GITHUB_OUTPUT (#5478) --- .github/workflows/nightly.yml | 2 +- .github/workflows/rc.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index d0cb46d65e7..47321b2e120 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -46,7 +46,7 @@ jobs: find pkg-build/cpack_output -type f \( -iname \*.deb -o -iname \*.rpm -o -iname \*.tar.gz \) -exec bash -c "sha256sum {} > {}.sha256sum.txt" \; subdir=$(date -u +%Y.%m.%d) echo "subdir=$subdir" >> $GITHUB_OUTPUT - # - uses: actions/upload-artifact@v1 + # - uses: actions/upload-artifact@v3 # with: # name: ${{ matrix.os }}-nightly # path: pkg-build/cpack_output diff --git a/.github/workflows/rc.yml b/.github/workflows/rc.yml index d9f36d6981c..310364daf2f 100644 --- a/.github/workflows/rc.yml +++ b/.github/workflows/rc.yml @@ -199,7 +199,7 @@ jobs: run: sh -c "find . -mindepth 1 -delete" - uses: actions/checkout@v3 - id: tag - run: echo tagnum=${{ github.event.inputs.version }} + run: echo tagnum=${{ github.event.inputs.version }} >> $GITHUB_OUTPUT - id: oss_package run: | From 0790926494a9f87507d70b2bdc948af7eea9a36c Mon Sep 17 00:00:00 2001 From: "kyle.cao" Date: Fri, 7 Apr 2023 09:34:53 +0800 Subject: [PATCH 07/20] Split optimizer rules (#5470) Fix compile small rename Fix tck Fix tck fmt Fix tck Fix tck --- src/common/expression/Expression.h | 4 + src/common/expression/PropertyExpression.h | 4 + src/graph/optimizer/CMakeLists.txt | 1 + .../rule/PushFilterDownTraverseRule.cpp | 93 ++++++------- .../PushFilterThroughAppendVerticesRule.cpp | 123 ++++++++++++++++++ .../PushFilterThroughAppendVerticesRule.h | 46 +++++++ .../bugfix/LackFilterGetEdges.feature | 19 +++ tests/tck/features/match/SeekById.feature | 1 + .../optimizer/CollapseProjectRule.feature | 2 + .../optimizer/PrunePropertiesRule.feature | 1 + .../optimizer/PushFilterDownBugFixes.feature | 2 + .../PushFilterDownCrossJoinRule.feature | 25 ++-- .../PushFilterDownTraverseRule.feature | 4 +- .../PushLimitDownProjectRule.feature | 1 + .../RemoveAppendVerticesBelowJoinRule.feature | 4 +- 15 files changed, 259 insertions(+), 71 deletions(-) create mode 100644 src/graph/optimizer/rule/PushFilterThroughAppendVerticesRule.cpp create mode 100644 src/graph/optimizer/rule/PushFilterThroughAppendVerticesRule.h diff --git a/src/common/expression/Expression.h b/src/common/expression/Expression.h index a23340589fe..31b4dcfac73 100644 --- a/src/common/expression/Expression.h +++ b/src/common/expression/Expression.h @@ -167,6 +167,10 @@ class Expression { return false; } + virtual bool isPropertyExpr() const { + return false; + } + virtual bool isContainerExpr() const { return false; } diff --git a/src/common/expression/PropertyExpression.h b/src/common/expression/PropertyExpression.h index b2cf81b7343..ab267bd184f 100644 --- a/src/common/expression/PropertyExpression.h +++ b/src/common/expression/PropertyExpression.h @@ -32,6 +32,10 @@ class PropertyExpression : public Expression { public: bool operator==(const Expression& rhs) const override; + bool isPropertyExpr() const override { + return true; + } + const Value& eval(ExpressionContext& ctx) override; const std::string& ref() const { diff --git a/src/graph/optimizer/CMakeLists.txt b/src/graph/optimizer/CMakeLists.txt index 2d98c2c1a14..bd22e5204ed 100644 --- a/src/graph/optimizer/CMakeLists.txt +++ b/src/graph/optimizer/CMakeLists.txt @@ -59,6 +59,7 @@ nebula_add_library( rule/PushLimitDownScanEdgesRule.cpp rule/PushFilterDownTraverseRule.cpp rule/RemoveAppendVerticesBelowJoinRule.cpp + rule/PushFilterThroughAppendVerticesRule.cpp ) nebula_add_subdirectory(test) diff --git a/src/graph/optimizer/rule/PushFilterDownTraverseRule.cpp b/src/graph/optimizer/rule/PushFilterDownTraverseRule.cpp index 6c9b507763c..60dc074a414 100644 --- a/src/graph/optimizer/rule/PushFilterDownTraverseRule.cpp +++ b/src/graph/optimizer/rule/PushFilterDownTraverseRule.cpp @@ -32,9 +32,7 @@ PushFilterDownTraverseRule::PushFilterDownTraverseRule() { const Pattern& PushFilterDownTraverseRule::pattern() const { static Pattern pattern = - Pattern::create(PlanNode::Kind::kFilter, - {Pattern::create(PlanNode::Kind::kAppendVertices, - {Pattern::create(PlanNode::Kind::kTraverse)})}); + Pattern::create(PlanNode::Kind::kFilter, {Pattern::create(PlanNode::Kind::kTraverse)}); return pattern; } @@ -42,28 +40,23 @@ bool PushFilterDownTraverseRule::match(OptContext* ctx, const MatchedResult& mat if (!OptRule::match(ctx, matched)) { return false; } - DCHECK_EQ(matched.dependencies[0].dependencies[0].node->node()->kind(), - PlanNode::Kind::kTraverse); - auto traverse = - static_cast(matched.dependencies[0].dependencies[0].node->node()); + DCHECK_EQ(matched.dependencies[0].node->node()->kind(), PlanNode::Kind::kTraverse); + auto traverse = static_cast(matched.dependencies[0].node->node()); return traverse->isOneStep(); } StatusOr PushFilterDownTraverseRule::transform( - OptContext* ctx, const MatchedResult& matched) const { - auto* filterGroupNode = matched.node; - auto* filterGroup = filterGroupNode->group(); - auto* filter = static_cast(filterGroupNode->node()); + OptContext* octx, const MatchedResult& matched) const { + auto* filterGNode = matched.node; + auto* filterGroup = filterGNode->group(); + auto* filter = static_cast(filterGNode->node()); auto* condition = filter->condition(); - auto* avGroupNode = matched.dependencies[0].node; - auto* av = static_cast(avGroupNode->node()); + auto* tvGNode = matched.dependencies[0].node; + auto* tvNode = static_cast(tvGNode->node()); + auto& edgeAlias = tvNode->edgeAlias(); - auto* tvGroupNode = matched.dependencies[0].dependencies[0].node; - auto* tv = static_cast(tvGroupNode->node()); - auto& edgeAlias = tv->edgeAlias(); - - auto qctx = ctx->qctx(); + auto qctx = octx->qctx(); auto pool = qctx->objPool(); // Pick the expr looks like `$-.e[0].likeness @@ -105,49 +98,39 @@ StatusOr PushFilterDownTraverseRule::transform( } auto* newFilterPicked = graph::ExpressionUtils::rewriteEdgePropertyFilter(pool, edgeAlias, filterPicked->clone()); - - Filter* newFilter = nullptr; - OptGroupNode* newFilterGroupNode = nullptr; - if (filterUnpicked) { - newFilter = Filter::make(qctx, nullptr, filterUnpicked); - newFilter->setOutputVar(filter->outputVar()); - newFilter->setColNames(filter->colNames()); - newFilterGroupNode = OptGroupNode::create(ctx, newFilter, filterGroup); - } - - auto* newAv = static_cast(av->clone()); - - OptGroupNode* newAvGroupNode = nullptr; - if (newFilterGroupNode) { - auto* newAvGroup = OptGroup::create(ctx); - newAvGroupNode = newAvGroup->makeGroupNode(newAv); - newFilterGroupNode->dependsOn(newAvGroup); - newFilter->setInputVar(newAv->outputVar()); - } else { - newAvGroupNode = OptGroupNode::create(ctx, newAv, filterGroup); - newAv->setOutputVar(filter->outputVar()); - } - - auto* eFilter = tv->eFilter(); + auto* eFilter = tvNode->eFilter(); Expression* newEFilter = eFilter ? LogicalExpression::makeAnd(pool, newFilterPicked, eFilter->clone()) : newFilterPicked; - auto* newTv = static_cast(tv->clone()); - newAv->setInputVar(newTv->outputVar()); - newTv->setEdgeFilter(newEFilter); - - auto* newTvGroup = OptGroup::create(ctx); - newAvGroupNode->dependsOn(newTvGroup); - auto* newTvGroupNode = newTvGroup->makeGroupNode(newTv); - - for (auto dep : tvGroupNode->dependencies()) { - newTvGroupNode->dependsOn(dep); - } + // produce new Traverse node + auto* newTvNode = static_cast(tvNode->clone()); + newTvNode->setEdgeFilter(newEFilter); + newTvNode->setInputVar(tvNode->inputVar()); + newTvNode->setColNames(tvNode->outputVarPtr()->colNames); + // connect the optimized plan TransformResult result; - result.eraseCurr = true; - result.newGroupNodes.emplace_back(newFilterGroupNode ? newFilterGroupNode : newAvGroupNode); + result.eraseAll = true; + if (filterUnpicked) { + auto* newFilterNode = graph::Filter::make(qctx, newTvNode, filterUnpicked); + newFilterNode->setOutputVar(filter->outputVar()); + newFilterNode->setColNames(filter->colNames()); + auto newFilterGNode = OptGroupNode::create(octx, newFilterNode, filterGroup); + // assemble the new Traverse group below Filter + auto newTvGroup = OptGroup::create(octx); + auto newTvGNode = newTvGroup->makeGroupNode(newTvNode); + newTvGNode->setDeps(tvGNode->dependencies()); + newFilterGNode->setDeps({newTvGroup}); + newFilterNode->setInputVar(newTvNode->outputVar()); + result.newGroupNodes.emplace_back(newFilterGNode); + } else { + // replace the new Traverse node with the old Filter group + auto newTvGNode = OptGroupNode::create(octx, newTvNode, filterGroup); + newTvNode->setOutputVar(filter->outputVar()); + newTvGNode->setDeps(tvGNode->dependencies()); + result.newGroupNodes.emplace_back(newTvGNode); + } return result; } diff --git a/src/graph/optimizer/rule/PushFilterThroughAppendVerticesRule.cpp b/src/graph/optimizer/rule/PushFilterThroughAppendVerticesRule.cpp new file mode 100644 index 00000000000..0f16dc0bd46 --- /dev/null +++ b/src/graph/optimizer/rule/PushFilterThroughAppendVerticesRule.cpp @@ -0,0 +1,123 @@ +/* Copyright (c) 2023 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include "graph/optimizer/rule/PushFilterThroughAppendVerticesRule.h" + +#include "common/expression/Expression.h" +#include "graph/optimizer/OptContext.h" +#include "graph/optimizer/OptGroup.h" +#include "graph/planner/plan/PlanNode.h" +#include "graph/planner/plan/Query.h" +#include "graph/util/ExpressionUtils.h" +#include "graph/visitor/ExtractFilterExprVisitor.h" + +using nebula::Expression; +using nebula::graph::AppendVertices; +using nebula::graph::Filter; +using nebula::graph::PlanNode; +using nebula::graph::QueryContext; + +namespace nebula { +namespace opt { + +std::unique_ptr PushFilterThroughAppendVerticesRule::kInstance = + std::unique_ptr(new PushFilterThroughAppendVerticesRule()); + +PushFilterThroughAppendVerticesRule::PushFilterThroughAppendVerticesRule() { + RuleSet::QueryRules().addRule(this); +} + +const Pattern& PushFilterThroughAppendVerticesRule::pattern() const { + static Pattern pattern = + Pattern::create(PlanNode::Kind::kFilter, {Pattern::create(PlanNode::Kind::kAppendVertices)}); + return pattern; +} + +StatusOr PushFilterThroughAppendVerticesRule::transform( + OptContext* octx, const MatchedResult& matched) const { + auto* oldFilterGroupNode = matched.node; + auto* oldFilterGroup = oldFilterGroupNode->group(); + auto* oldFilterNode = static_cast(oldFilterGroupNode->node()); + auto* condition = oldFilterNode->condition(); + auto* oldAvGroupNode = matched.dependencies[0].node; + auto* oldAvNode = static_cast(oldAvGroupNode->node()); + auto& dstNode = oldAvNode->nodeAlias(); + + auto inputColNames = oldAvNode->inputVars().front()->colNames; + auto qctx = octx->qctx(); + + auto picker = [&inputColNames, &dstNode](const Expression* expr) -> bool { + auto finder = [&inputColNames, &dstNode](const Expression* e) -> bool { + if (e->isPropertyExpr() && + std::find(inputColNames.begin(), + inputColNames.end(), + (static_cast(e)->prop())) == inputColNames.end()) { + return true; + } + if (e->kind() == Expression::Kind::kVar && + static_cast(e)->var() == dstNode) { + return true; + } + return false; + }; + graph::FindVisitor visitor(finder); + const_cast(expr)->accept(&visitor); + return visitor.results().empty(); + }; + Expression* filterPicked = nullptr; + Expression* filterUnpicked = nullptr; + graph::ExpressionUtils::splitFilter(condition, picker, &filterPicked, &filterUnpicked); + + if (!filterPicked) { + return TransformResult::noTransform(); + } + + // produce new Filter node below + auto* newBelowFilterNode = + graph::Filter::make(qctx, const_cast(oldAvNode->dep()), filterPicked); + newBelowFilterNode->setInputVar(oldAvNode->inputVar()); + newBelowFilterNode->setColNames(oldAvNode->inputVars().front()->colNames); + auto newBelowFilterGroup = OptGroup::create(octx); + auto newFilterGroupNode = newBelowFilterGroup->makeGroupNode(newBelowFilterNode); + newFilterGroupNode->setDeps(oldAvGroupNode->dependencies()); + + // produce new AppendVertices node + auto* newAvNode = static_cast(oldAvNode->clone()); + newAvNode->setInputVar(newBelowFilterNode->outputVar()); + + TransformResult result; + result.eraseAll = true; + if (filterUnpicked) { + // produce new Filter node above + auto* newAboveFilterNode = graph::Filter::make(octx->qctx(), newAvNode, filterUnpicked); + newAboveFilterNode->setOutputVar(oldFilterNode->outputVar()); + auto newAboveFilterGroupNode = OptGroupNode::create(octx, newAboveFilterNode, oldFilterGroup); + + auto newAvGroup = OptGroup::create(octx); + auto newAvGroupNode = newAvGroup->makeGroupNode(newAvNode); + newAvGroupNode->setDeps({newBelowFilterGroup}); + newAvNode->setInputVar(newBelowFilterNode->outputVar()); + newAboveFilterGroupNode->setDeps({newAvGroup}); + newAboveFilterNode->setInputVar(newAvNode->outputVar()); + result.newGroupNodes.emplace_back(newAboveFilterGroupNode); + } else { + newAvNode->setOutputVar(oldFilterNode->outputVar()); + // newAvNode's col names, on the hand, should inherit those of the oldAvNode + // since they are the same project. + newAvNode->setColNames(oldAvNode->outputVarPtr()->colNames); + auto newAvGroupNode = OptGroupNode::create(octx, newAvNode, oldFilterGroup); + newAvGroupNode->setDeps({newBelowFilterGroup}); + newAvNode->setInputVar(newBelowFilterNode->outputVar()); + result.newGroupNodes.emplace_back(newAvGroupNode); + } + return result; +} + +std::string PushFilterThroughAppendVerticesRule::toString() const { + return "PushFilterThroughAppendVerticesRule"; +} + +} // namespace opt +} // namespace nebula diff --git a/src/graph/optimizer/rule/PushFilterThroughAppendVerticesRule.h b/src/graph/optimizer/rule/PushFilterThroughAppendVerticesRule.h new file mode 100644 index 00000000000..db2ce986fab --- /dev/null +++ b/src/graph/optimizer/rule/PushFilterThroughAppendVerticesRule.h @@ -0,0 +1,46 @@ +/* Copyright (c) 2023 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#ifndef GRAPH_OPTIMIZER_RULE_PUSHFILTERTHROUGHAPPENDVERTICES_H_ +#define GRAPH_OPTIMIZER_RULE_PUSHFILTERTHROUGHAPPENDVERTICES_H_ + +#include "graph/optimizer/OptRule.h" + +namespace nebula { +namespace opt { + +/* + * Before: + * Filter(e.likeness > 78 and v.prop > 40) + * | + * AppendVertices(v) + * + * After : + * Filter(v.prop > 40) + * | + * AppendVertices(v) + * | + * Filter(e.likeness > 78) + * + */ + +class PushFilterThroughAppendVerticesRule final : public OptRule { + public: + const Pattern &pattern() const override; + + StatusOr transform(OptContext *ctx, const MatchedResult &matched) const override; + + std::string toString() const override; + + private: + PushFilterThroughAppendVerticesRule(); + + static std::unique_ptr kInstance; +}; + +} // namespace opt +} // namespace nebula + +#endif // GRAPH_OPTIMIZER_RULE_PUSHFILTERTHROUGHAPPENDVERTICES_H_ diff --git a/tests/tck/features/bugfix/LackFilterGetEdges.feature b/tests/tck/features/bugfix/LackFilterGetEdges.feature index b13a0ae0bf7..210f2148e79 100644 --- a/tests/tck/features/bugfix/LackFilterGetEdges.feature +++ b/tests/tck/features/bugfix/LackFilterGetEdges.feature @@ -8,6 +8,25 @@ Feature: Test lack filter of get edges transform # #5145 Scenario: Lack filter of get edges transform + When profiling query: + """ + match ()-[e*1]->() + where e[0].likeness > 78 or uuid() > 100 + return rank(e[0]) AS re limit 3 + """ + Then the result should be, in any order: + | re | + | 0 | + | 0 | + | 0 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 24 | Project | 20 | | + | 20 | Limit | 21 | | + | 21 | AppendVertices | 23 | | + | 23 | Traverse | 22 | { "edge filter": "((*.likeness>78) OR (uuid()>100))"} | + | 22 | ScanVertices | 3 | | + | 3 | Start | | | When profiling query: """ match ()-[e]->() diff --git a/tests/tck/features/match/SeekById.feature b/tests/tck/features/match/SeekById.feature index 5917f341d77..a325cbecf4a 100644 --- a/tests/tck/features/match/SeekById.feature +++ b/tests/tck/features/match/SeekById.feature @@ -860,6 +860,7 @@ Feature: Match seek by id | 11 | Project | 8 | | | 8 | Filter | 4 | | | 4 | AppendVertices | 10 | | + | 10 | Filter | 10 | | | 10 | Traverse | 2 | | | 2 | Dedup | 4 | | | 1 | PassThrough | 3 | | diff --git a/tests/tck/features/optimizer/CollapseProjectRule.feature b/tests/tck/features/optimizer/CollapseProjectRule.feature index 924b61635ba..a7b7afaf3ed 100644 --- a/tests/tck/features/optimizer/CollapseProjectRule.feature +++ b/tests/tck/features/optimizer/CollapseProjectRule.feature @@ -6,6 +6,7 @@ Feature: Collapse Project Rule Background: Given a graph with space named "nba" + @czp Scenario: Collapse Project When profiling query: """ @@ -99,6 +100,7 @@ Feature: Collapse Project Rule | 14 | Project | 12 | | | 12 | Filter | 6 | | | 6 | AppendVertices | 5 | | + | 5 | Filter | 5 | | | 5 | Traverse | 4 | | | 4 | Traverse | 2 | | | 2 | Dedup | 1 | | diff --git a/tests/tck/features/optimizer/PrunePropertiesRule.feature b/tests/tck/features/optimizer/PrunePropertiesRule.feature index 5731e68177c..b360166f8a3 100644 --- a/tests/tck/features/optimizer/PrunePropertiesRule.feature +++ b/tests/tck/features/optimizer/PrunePropertiesRule.feature @@ -579,6 +579,7 @@ Feature: Prune Properties rule | 21 | Project | 20 | | | 20 | Filter | 25 | | | 25 | AppendVertices | 24 | { "props": "[{ \"props\":[\"name\",\"_tag\"]}]" } | + | 24 | Filter | 24 | | | 24 | Traverse | 23 | {"vertexProps": "[{ \"props\":[\"age\"]}]", "edgeProps": "[{ \"props\": [\"_type\", \"_rank\", \"_dst\"]}]" } | | 23 | Traverse | 22 | {"vertexProps": "", "edgeProps": "[{ \"props\": [\"_type\", \"_rank\", \"_dst\"]}]" } | | 22 | Traverse | 2 | {"vertexProps": "", "edgeProps": "[{ \"props\": [\"_type\", \"_rank\", \"_dst\"]}, { \"props\": [\"_type\", \"_rank\", \"_dst\"]}]" } | diff --git a/tests/tck/features/optimizer/PushFilterDownBugFixes.feature b/tests/tck/features/optimizer/PushFilterDownBugFixes.feature index c52ee81cc16..1a71028e0f0 100644 --- a/tests/tck/features/optimizer/PushFilterDownBugFixes.feature +++ b/tests/tck/features/optimizer/PushFilterDownBugFixes.feature @@ -24,6 +24,7 @@ Feature: Bug fixes on filter push-down | 11 | Project | 10 | | | 10 | Filter | 6 | | | 6 | AppendVertices | 5 | | + | 5 | Filter | 5 | | | 5 | Traverse | 4 | | | 4 | Traverse | 2 | | | 2 | Dedup | 1 | | @@ -45,6 +46,7 @@ Feature: Bug fixes on filter push-down | 11 | Project | 10 | | | 10 | Filter | 6 | | | 6 | AppendVertices | 5 | | + | 5 | Filter | 5 | | | 5 | Traverse | 4 | | | 4 | Traverse | 2 | | | 2 | Dedup | 1 | | diff --git a/tests/tck/features/optimizer/PushFilterDownCrossJoinRule.feature b/tests/tck/features/optimizer/PushFilterDownCrossJoinRule.feature index ee2263c921c..ad578e635bf 100644 --- a/tests/tck/features/optimizer/PushFilterDownCrossJoinRule.feature +++ b/tests/tck/features/optimizer/PushFilterDownCrossJoinRule.feature @@ -18,15 +18,16 @@ Feature: Push Filter down HashInnerJoin rule | count(e) | | 8 | And the execution plan should be: - | id | name | dependencies | operator info | - | 11 | Aggregate | 14 | | - | 14 | CrossJoin | 1,16 | | - | 1 | Project | 2 | | - | 2 | Start | | | - | 16 | Project | 15 | | - | 15 | Filter | 18 | {"condition": "((id($-.v1) IN [\"Tim Duncan\",\"Tony Parker\"]) AND (id($-.v2) IN [\"Tim Duncan\",\"Tony Parker\"]))"} | - | 18 | AppendVertices | 17 | | - | 17 | Traverse | 4 | | - | 4 | Dedup | 3 | | - | 3 | PassThrough | 5 | | - | 5 | Start | | | + | id | name | dependencies | operator info | + | 11 | Aggregate | 14 | | + | 14 | CrossJoin | 1,16 | | + | 1 | Project | 2 | | + | 2 | Start | | | + | 16 | Project | 15 | | + | 15 | Filter | 18 | {"condition": "(id($-.v2) IN [\"Tim Duncan\",\"Tony Parker\"])"} | + | 18 | AppendVertices | 17 | | + | 17 | Filter | 17 | {"condition": "(id($-.v1) IN [\"Tim Duncan\",\"Tony Parker\"])"} | + | 17 | Traverse | 4 | | + | 4 | Dedup | 3 | | + | 3 | PassThrough | 5 | | + | 5 | Start | | | diff --git a/tests/tck/features/optimizer/PushFilterDownTraverseRule.feature b/tests/tck/features/optimizer/PushFilterDownTraverseRule.feature index ce9de5da759..d0ee360932d 100644 --- a/tests/tck/features/optimizer/PushFilterDownTraverseRule.feature +++ b/tests/tck/features/optimizer/PushFilterDownTraverseRule.feature @@ -137,9 +137,9 @@ Feature: Push Filter down Traverse rule | 17 | Aggregate | 16 | | | | 16 | HashLeftJoin | 10,15 | | | | 10 | Dedup | 28 | | | - | 28 | Project | 22 | | | - | 22 | Filter | 26 | | | + | 28 | Project | 26 | | | | 26 | AppendVertices | 25 | | | + | 25 | Filter | 25 | | | | 25 | Traverse | 24 | | {"filter": "(serve.start_year>2010)"} | | 24 | Traverse | 2 | | | | 2 | Dedup | 1 | | | diff --git a/tests/tck/features/optimizer/PushLimitDownProjectRule.feature b/tests/tck/features/optimizer/PushLimitDownProjectRule.feature index 5af5c3525ff..54dcf98fa2b 100644 --- a/tests/tck/features/optimizer/PushLimitDownProjectRule.feature +++ b/tests/tck/features/optimizer/PushLimitDownProjectRule.feature @@ -27,6 +27,7 @@ Feature: Push Limit down project rule | 16 | Limit | 11 | | | 11 | Filter | 4 | | | 4 | AppendVertices | 3 | | + | 3 | Filter | 3 | | | 3 | Traverse | 2 | | | 2 | Dedup | 1 | | | 1 | PassThrough | 0 | | diff --git a/tests/tck/features/optimizer/RemoveAppendVerticesBelowJoinRule.feature b/tests/tck/features/optimizer/RemoveAppendVerticesBelowJoinRule.feature index c04a130e4b6..bc5eb8b182d 100644 --- a/tests/tck/features/optimizer/RemoveAppendVerticesBelowJoinRule.feature +++ b/tests/tck/features/optimizer/RemoveAppendVerticesBelowJoinRule.feature @@ -50,9 +50,9 @@ Feature: Remove AppendVertices Below Join | 17 | Aggregate | 16 | | | 16 | HashLeftJoin | 10,15 | {"hashKeys": ["_joinkey($-.friendTeam)", "_joinkey($-.friend)"], "probeKeys": ["$-.friendTeam", "_joinkey($-.friend)"]} | | 10 | Dedup | 28 | | - | 28 | Project | 22 | | - | 22 | Filter | 26 | | + | 28 | Project | 26 | | | 26 | AppendVertices | 25 | | + | 25 | Filter | 25 | | | 25 | Traverse | 24 | | | 24 | Traverse | 2 | | | 2 | Dedup | 1 | | From 313f5eb5ec2528a1fffae1081f8a79f7e6c187d6 Mon Sep 17 00:00:00 2001 From: "kyle.cao" Date: Fri, 7 Apr 2023 20:21:37 +0800 Subject: [PATCH 08/20] Enhancement/optimize edge all predicate (#5481) --- src/common/meta/SchemaManager.h | 20 + src/graph/executor/query/TraverseExecutor.cpp | 8 +- src/graph/optimizer/CMakeLists.txt | 4 +- .../rule/EmbedEdgeAllPredIntoTraverseRule.cpp | 220 +++++++++++ .../rule/EmbedEdgeAllPredIntoTraverseRule.h | 39 ++ .../rule/GeoPredicateIndexScanBaseRule.cpp | 2 +- src/graph/util/ExpressionUtils.cpp | 40 ++ src/graph/util/ExpressionUtils.h | 4 + .../visitor/ExtractFilterExprVisitor.cpp | 83 +++- src/graph/visitor/ExtractFilterExprVisitor.h | 26 +- .../optimizer/CollapseProjectRule.feature | 1 - .../EmbedEdgeAllPredIntoTraverseRule.feature | 358 ++++++++++++++++++ 12 files changed, 788 insertions(+), 17 deletions(-) create mode 100644 src/graph/optimizer/rule/EmbedEdgeAllPredIntoTraverseRule.cpp create mode 100644 src/graph/optimizer/rule/EmbedEdgeAllPredIntoTraverseRule.h create mode 100644 tests/tck/features/optimizer/EmbedEdgeAllPredIntoTraverseRule.feature diff --git a/src/common/meta/SchemaManager.h b/src/common/meta/SchemaManager.h index 5e742ea0e20..78b0114b634 100644 --- a/src/common/meta/SchemaManager.h +++ b/src/common/meta/SchemaManager.h @@ -40,6 +40,16 @@ class SchemaManager { virtual StatusOr getPartsNum(GraphSpaceID space) = 0; + std::shared_ptr getTagSchema(GraphSpaceID space, + const std::string& tag, + SchemaVer ver = -1) { + auto tagId = toTagID(space, tag); + if (!tagId.ok()) { + return nullptr; + } + return getTagSchema(space, tagId.value(), ver); + } + virtual std::shared_ptr getTagSchema(GraphSpaceID space, TagID tag, SchemaVer ver = -1) = 0; @@ -47,6 +57,16 @@ class SchemaManager { // Returns a negative number when the schema does not exist virtual StatusOr getLatestTagSchemaVersion(GraphSpaceID space, TagID tag) = 0; + std::shared_ptr getEdgeSchema(GraphSpaceID space, + const std::string& edge, + SchemaVer ver = -1) { + auto edgeType = toEdgeType(space, edge); + if (!edgeType.ok()) { + return nullptr; + } + return getEdgeSchema(space, edgeType.value(), ver); + } + virtual std::shared_ptr getEdgeSchema(GraphSpaceID space, EdgeType edge, SchemaVer ver = -1) = 0; diff --git a/src/graph/executor/query/TraverseExecutor.cpp b/src/graph/executor/query/TraverseExecutor.cpp index 9425917b66d..899f851a92f 100644 --- a/src/graph/executor/query/TraverseExecutor.cpp +++ b/src/graph/executor/query/TraverseExecutor.cpp @@ -62,8 +62,12 @@ Status TraverseExecutor::buildRequestVids() { auto vidType = SchemaUtil::propTypeToValueType(metaVidType.get_type()); for (; iter->valid(); iter->next()) { const auto& vid = src->eval(ctx(iter)); - DCHECK_EQ(vid.type(), vidType) - << "Mismatched vid type: " << vid.type() << ", space vid type: " << vidType; + // FIXME(czp): Remove this DCHECK for now, we should check vid type at compile-time + if (vid.type() != vidType) { + return Status::Error("Vid type mismatched."); + } + // DCHECK_EQ(vid.type(), vidType) + // << "Mismatched vid type: " << vid.type() << ", space vid type: " << vidType; if (vid.type() == vidType) { vids_.emplace(vid); } diff --git a/src/graph/optimizer/CMakeLists.txt b/src/graph/optimizer/CMakeLists.txt index bd22e5204ed..aeb1b0f41da 100644 --- a/src/graph/optimizer/CMakeLists.txt +++ b/src/graph/optimizer/CMakeLists.txt @@ -35,6 +35,7 @@ nebula_add_library( rule/PushFilterDownInnerJoinRule.cpp rule/PushFilterDownNodeRule.cpp rule/PushFilterDownScanVerticesRule.cpp + rule/PushFilterDownTraverseRule.cpp rule/PushVFilterDownScanVerticesRule.cpp rule/OptimizeEdgeIndexScanByFilterRule.cpp rule/OptimizeTagIndexScanByFilterRule.cpp @@ -57,8 +58,9 @@ nebula_add_library( rule/PushLimitDownScanEdgesAppendVerticesRule.cpp rule/PushTopNDownIndexScanRule.cpp rule/PushLimitDownScanEdgesRule.cpp - rule/PushFilterDownTraverseRule.cpp + rule/PushFilterThroughAppendVerticesRule.cpp rule/RemoveAppendVerticesBelowJoinRule.cpp + rule/EmbedEdgeAllPredIntoTraverseRule.cpp rule/PushFilterThroughAppendVerticesRule.cpp ) diff --git a/src/graph/optimizer/rule/EmbedEdgeAllPredIntoTraverseRule.cpp b/src/graph/optimizer/rule/EmbedEdgeAllPredIntoTraverseRule.cpp new file mode 100644 index 00000000000..1a58da9c0b9 --- /dev/null +++ b/src/graph/optimizer/rule/EmbedEdgeAllPredIntoTraverseRule.cpp @@ -0,0 +1,220 @@ +/* Copyright (c) 2023 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include "graph/optimizer/rule/EmbedEdgeAllPredIntoTraverseRule.h" + +#include "common/expression/AttributeExpression.h" +#include "common/expression/ConstantExpression.h" +#include "common/expression/Expression.h" +#include "common/expression/PredicateExpression.h" +#include "common/expression/PropertyExpression.h" +#include "common/expression/VariableExpression.h" +#include "graph/optimizer/OptContext.h" +#include "graph/optimizer/OptGroup.h" +#include "graph/planner/plan/PlanNode.h" +#include "graph/planner/plan/Query.h" +#include "graph/util/ExpressionUtils.h" +#include "graph/visitor/RewriteVisitor.h" + +using nebula::Expression; +using nebula::graph::Filter; +using nebula::graph::PlanNode; +using nebula::graph::QueryContext; +using nebula::graph::Traverse; + +namespace nebula { +namespace opt { + +std::unique_ptr EmbedEdgeAllPredIntoTraverseRule::kInstance = + std::unique_ptr(new EmbedEdgeAllPredIntoTraverseRule()); + +EmbedEdgeAllPredIntoTraverseRule::EmbedEdgeAllPredIntoTraverseRule() { + RuleSet::QueryRules().addRule(this); +} + +const Pattern& EmbedEdgeAllPredIntoTraverseRule::pattern() const { + static Pattern pattern = + Pattern::create(PlanNode::Kind::kFilter, {Pattern::create(PlanNode::Kind::kTraverse)}); + return pattern; +} + +bool EmbedEdgeAllPredIntoTraverseRule::match(OptContext* ctx, const MatchedResult& matched) const { + return OptRule::match(ctx, matched); +} + +bool isEdgeAllPredicate(const Expression* e, + const std::string& edgeAlias, + std::string& innerEdgeVar) { + // reset the inner edge var name + innerEdgeVar = ""; + if (e->kind() != Expression::Kind::kPredicate) { + return false; + } + auto* pe = static_cast(e); + if (pe->name() != "all" || !pe->hasInnerVar()) { + return false; + } + auto var = pe->innerVar(); + if (!pe->collection()->isPropertyExpr()) { + return false; + } + // check edge collection expression + if (static_cast(pe->collection())->prop() != edgeAlias) { + return false; + } + auto ves = graph::ExpressionUtils::collectAll(pe->filter(), {Expression::Kind::kAttribute}); + for (const auto& ve : ves) { + auto iv = static_cast(ve)->left(); + + // check inner vars + if (iv->kind() != Expression::Kind::kVar) { + return false; + } + // only care inner edge vars + if (!static_cast(iv)->isInner()) { + // FIXME(czp): support parameter/variables in edge `all` predicate + return false; + } + + // edge property in AttributeExpression must be Constant string + auto ep = static_cast(ve)->right(); + if (ep->kind() != Expression::Kind::kConstant) { + return false; + } + if (!static_cast(ep)->value().isStr()) { + return false; + } + } + + innerEdgeVar = var; + return true; +} + +// Pick sub-predicate +// rewrite edge `all` predicates to single-hop edge predicate +Expression* rewriteEdgeAllPredicate(const Expression* expr, const std::string& edgeAlias) { + std::string innerEdgeVar; + auto matcher = [&edgeAlias, &innerEdgeVar](const Expression* e) -> bool { + return isEdgeAllPredicate(e, edgeAlias, innerEdgeVar); + }; + auto rewriter = [&innerEdgeVar](const Expression* e) -> Expression* { + DCHECK_EQ(e->kind(), Expression::Kind::kPredicate); + auto fe = static_cast(e)->filter(); + + auto innerMatcher = [&innerEdgeVar](const Expression* ae) { + if (ae->kind() != Expression::Kind::kAttribute) { + return false; + } + auto innerEdgeVarExpr = static_cast(ae)->left(); + if (innerEdgeVarExpr->kind() != Expression::Kind::kVar) { + return false; + } + return static_cast(innerEdgeVarExpr)->var() == innerEdgeVar; + }; + + auto innerRewriter = [](const Expression* ae) { + DCHECK_EQ(ae->kind(), Expression::Kind::kAttribute); + auto attributeExpr = static_cast(ae); + auto* right = attributeExpr->right(); + // edge property name expressions have been checked in the external matcher + DCHECK_EQ(right->kind(), Expression::Kind::kConstant); + auto& prop = static_cast(right)->value().getStr(); + return EdgePropertyExpression::make(ae->getObjPool(), "*", prop); + }; + // Rewrite all the inner var edge attribute expressions of `all` predicate's oldFilterNode to + // EdgePropertyExpression + return graph::RewriteVisitor::transform(fe, std::move(innerMatcher), std::move(innerRewriter)); + }; + return graph::RewriteVisitor::transform(expr, std::move(matcher), std::move(rewriter)); +} + +StatusOr EmbedEdgeAllPredIntoTraverseRule::transform( + OptContext* octx, const MatchedResult& matched) const { + auto* oldFilterGroupNode = matched.node; + auto* oldFilterGroup = oldFilterGroupNode->group(); + auto* oldFilterNode = static_cast(oldFilterGroupNode->node()); + auto* condition = oldFilterNode->condition(); + auto* oldTvGroupNode = matched.dependencies[0].node; + auto* oldTvNode = static_cast(oldTvGroupNode->node()); + auto& edgeAlias = oldTvNode->edgeAlias(); + auto qctx = octx->qctx(); + + // Pick all predicates containing edge `all` predicates under the AND semantics + auto picker = [&edgeAlias](const Expression* expr) -> bool { + bool neverPicked = false; + auto finder = [&neverPicked, &edgeAlias](const Expression* e) -> bool { + if (neverPicked) { + return false; + } + // UnaryNot change the semantics of `all` predicate to `any`, resulting in the inability to + // scatter the edge `all` predicate into a single-hop edge predicate(not cover double-not + // cases) + if (e->kind() == Expression::Kind::kUnaryNot) { + neverPicked = true; + return false; + } + // Not used, the picker only cares if there is an edge `all` predicate in the current operand + std::string innerVar; + return isEdgeAllPredicate(e, edgeAlias, innerVar); + }; + graph::FindVisitor visitor(finder); + const_cast(expr)->accept(&visitor); + return !visitor.results().empty(); + }; + Expression* filterPicked = nullptr; + Expression* filterUnpicked = nullptr; + graph::ExpressionUtils::splitFilter(condition, picker, &filterPicked, &filterUnpicked); + + if (!filterPicked) { + return TransformResult::noTransform(); + } + + // reconnect the existing edge filters + auto* edgeFilter = rewriteEdgeAllPredicate(filterPicked, edgeAlias); + auto* oldEdgeFilter = oldTvNode->eFilter(); + Expression* newEdgeFilter = + oldEdgeFilter ? LogicalExpression::makeAnd( + oldEdgeFilter->getObjPool(), edgeFilter, oldEdgeFilter->clone()) + : edgeFilter; + + // produce new Traverse node + auto* newTvNode = static_cast(oldTvNode->clone()); + newTvNode->setEdgeFilter(newEdgeFilter); + newTvNode->setInputVar(oldTvNode->inputVar()); + newTvNode->setColNames(oldTvNode->outputVarPtr()->colNames); + + // connect the optimized plan + TransformResult result; + result.eraseAll = true; + if (filterUnpicked) { + // assemble the new Filter node with the old Filter group + auto* newAboveFilterNode = graph::Filter::make(qctx, newTvNode, filterUnpicked); + newAboveFilterNode->setOutputVar(oldFilterNode->outputVar()); + newAboveFilterNode->setColNames(oldFilterNode->colNames()); + auto newAboveFilterGroupNode = OptGroupNode::create(octx, newAboveFilterNode, oldFilterGroup); + // assemble the new Traverse group below Filter + auto newTvGroup = OptGroup::create(octx); + auto newTvGroupNode = newTvGroup->makeGroupNode(newTvNode); + newTvGroupNode->setDeps(oldTvGroupNode->dependencies()); + newAboveFilterGroupNode->setDeps({newTvGroup}); + newAboveFilterNode->setInputVar(newTvNode->outputVar()); + result.newGroupNodes.emplace_back(newAboveFilterGroupNode); + } else { + // replace the new Traverse node with the old Filter group + auto newTvGroupNode = OptGroupNode::create(octx, newTvNode, oldFilterGroup); + newTvNode->setOutputVar(oldFilterNode->outputVar()); + newTvGroupNode->setDeps(oldTvGroupNode->dependencies()); + result.newGroupNodes.emplace_back(newTvGroupNode); + } + + return result; +} + +std::string EmbedEdgeAllPredIntoTraverseRule::toString() const { + return "EmbedEdgeAllPredIntoTraverseRule"; +} + +} // namespace opt +} // namespace nebula diff --git a/src/graph/optimizer/rule/EmbedEdgeAllPredIntoTraverseRule.h b/src/graph/optimizer/rule/EmbedEdgeAllPredIntoTraverseRule.h new file mode 100644 index 00000000000..3e917d51aea --- /dev/null +++ b/src/graph/optimizer/rule/EmbedEdgeAllPredIntoTraverseRule.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2023 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#pragma once + +#include "graph/optimizer/OptRule.h" + +namespace nebula { +namespace opt { + +/* + * Before: + * Filter(all(i in e where i.likeness > 78)) + * | + * Traverse + * + * After : + * Traverse(eFilter_: *.likeness > 78) + */ +class EmbedEdgeAllPredIntoTraverseRule final : public OptRule { + public: + const Pattern &pattern() const override; + + bool match(OptContext *ctx, const MatchedResult &matched) const override; + + StatusOr transform(OptContext *ctx, const MatchedResult &matched) const override; + + std::string toString() const override; + + private: + EmbedEdgeAllPredIntoTraverseRule(); + + static std::unique_ptr kInstance; +}; + +} // namespace opt +} // namespace nebula diff --git a/src/graph/optimizer/rule/GeoPredicateIndexScanBaseRule.cpp b/src/graph/optimizer/rule/GeoPredicateIndexScanBaseRule.cpp index 8e285de2349..cc0c5a443de 100644 --- a/src/graph/optimizer/rule/GeoPredicateIndexScanBaseRule.cpp +++ b/src/graph/optimizer/rule/GeoPredicateIndexScanBaseRule.cpp @@ -164,7 +164,7 @@ StatusOr GeoPredicateIndexScanBaseRule::transform( } TransformResult result; result.newGroupNodes.emplace_back(optScanNode); - result.eraseCurr = true; + result.eraseAll = true; return result; } diff --git a/src/graph/util/ExpressionUtils.cpp b/src/graph/util/ExpressionUtils.cpp index 4fa1bad8439..3217e6beee0 100644 --- a/src/graph/util/ExpressionUtils.cpp +++ b/src/graph/util/ExpressionUtils.cpp @@ -1038,6 +1038,11 @@ void ExpressionUtils::splitFilter(const Expression *expr, std::vector &operands = logicExpr->operands(); for (auto &operand : operands) { + // TODO(czp): Sink all NOTs to second layer [[Refactor]] + // TODO(czp): If find any not, dont pick this operand for now + if (ExpressionUtils::findAny(operand, {Expression::Kind::kUnaryNot})) { + filterUnpickedPtr->addOperand(operand->clone()); + } if (picker(operand)) { filterPickedPtr->addOperand(operand->clone()); } else { @@ -1668,5 +1673,40 @@ bool ExpressionUtils::isOneStepEdgeProp(const std::string &edgeAlias, const Expr return graph::RewriteVisitor::transform(expr, matcher, rewriter); } +// Transform Label Tag property expression like $-.v.player.name to Tag property like player.name +// for more friendly to push down +// \param pool object pool to hold ownership of objects alloacted +// \param node the name of node, i.e. v in pattern (v) +// \param expr the filter expression +/*static*/ Expression *ExpressionUtils::rewriteVertexPropertyFilter(ObjectPool *pool, + const std::string &node, + Expression *expr) { + graph::RewriteVisitor::Matcher matcher = [&node](const Expression *e) -> bool { + if (e->kind() != Expression::Kind::kLabelTagProperty) { + return false; + } + auto *ltpExpr = static_cast(e); + auto *labelExpr = ltpExpr->label(); + DCHECK(labelExpr->kind() == Expression::Kind::kInputProperty || + labelExpr->kind() == Expression::Kind::kVarProperty); + if (labelExpr->kind() != Expression::Kind::kInputProperty && + labelExpr->kind() != Expression::Kind::kVarProperty) { + return false; + } + auto *inputExpr = static_cast(labelExpr); + if (inputExpr->prop() != node) { + return false; + } + return true; + }; + graph::RewriteVisitor::Rewriter rewriter = [pool](const Expression *e) -> Expression * { + DCHECK_EQ(e->kind(), Expression::Kind::kLabelTagProperty); + auto *ltpExpr = static_cast(e); + auto *tagPropExpr = TagPropertyExpression::make(pool, ltpExpr->sym(), ltpExpr->prop()); + return tagPropExpr; + }; + return graph::RewriteVisitor::transform(expr, matcher, rewriter); +} + } // namespace graph } // namespace nebula diff --git a/src/graph/util/ExpressionUtils.h b/src/graph/util/ExpressionUtils.h index b1ff2bbc312..eba95c53e42 100644 --- a/src/graph/util/ExpressionUtils.h +++ b/src/graph/util/ExpressionUtils.h @@ -255,6 +255,10 @@ class ExpressionUtils { static Expression* rewriteEdgePropertyFilter(ObjectPool* pool, const std::string& edgeAlias, Expression* expr); + + static Expression* rewriteVertexPropertyFilter(ObjectPool* pool, + const std::string& node, + Expression* expr); }; } // namespace graph diff --git a/src/graph/visitor/ExtractFilterExprVisitor.cpp b/src/graph/visitor/ExtractFilterExprVisitor.cpp index 77692f4d526..f4a97e1ecc7 100644 --- a/src/graph/visitor/ExtractFilterExprVisitor.cpp +++ b/src/graph/visitor/ExtractFilterExprVisitor.cpp @@ -35,6 +35,19 @@ void ExtractFilterExprVisitor::visit(TagPropertyExpression *expr) { canBePushed_ = false; return; } + if (spaceId_ > 0) { + // Storage don't support nonexists tag or property + auto schema = schemaMng_->getTagSchema(spaceId_, expr->sym()); + if (schema == nullptr) { + canBePushed_ = false; + return; + } + auto field = schema->field(expr->prop()); + if (field == nullptr && (expr->prop() != kTag && expr->prop() != kVid)) { + canBePushed_ = false; + return; + } + } switch (pushType_) { case PushType::kGetNeighbors: canBePushed_ = false; @@ -48,7 +61,20 @@ void ExtractFilterExprVisitor::visit(TagPropertyExpression *expr) { } } -void ExtractFilterExprVisitor::visit(EdgePropertyExpression *) { +void ExtractFilterExprVisitor::visit(EdgePropertyExpression *expr) { + if (spaceId_ > 0) { + // Storage don't support nonexists edge or property + auto schema = schemaMng_->getEdgeSchema(spaceId_, expr->sym()); + if (schema == nullptr) { + canBePushed_ = false; + return; + } + auto field = schema->field(expr->prop()); + if (field == nullptr) { + canBePushed_ = false; + return; + } + } switch (pushType_) { case PushType::kGetNeighbors: case PushType::kGetEdges: @@ -88,7 +114,20 @@ void ExtractFilterExprVisitor::visit(DestPropertyExpression *) { } } -void ExtractFilterExprVisitor::visit(SourcePropertyExpression *) { +void ExtractFilterExprVisitor::visit(SourcePropertyExpression *expr) { + if (spaceId_ > 0) { + // Storage don't support nonexists tag or property + auto schema = schemaMng_->getTagSchema(spaceId_, expr->sym()); + if (schema == nullptr) { + canBePushed_ = false; + return; + } + auto field = schema->field(expr->prop()); + if (field == nullptr) { + canBePushed_ = false; + return; + } + } switch (pushType_) { case PushType::kGetNeighbors: canBePushed_ = true; @@ -100,7 +139,15 @@ void ExtractFilterExprVisitor::visit(SourcePropertyExpression *) { } } -void ExtractFilterExprVisitor::visit(EdgeSrcIdExpression *) { +void ExtractFilterExprVisitor::visit(EdgeSrcIdExpression *expr) { + if (spaceId_ > 0) { + // Storage don't support nonexists edge + auto schema = schemaMng_->getEdgeSchema(spaceId_, expr->sym()); + if (schema == nullptr) { + canBePushed_ = false; + return; + } + } switch (pushType_) { case PushType::kGetNeighbors: case PushType::kGetEdges: @@ -112,7 +159,15 @@ void ExtractFilterExprVisitor::visit(EdgeSrcIdExpression *) { } } -void ExtractFilterExprVisitor::visit(EdgeTypeExpression *) { +void ExtractFilterExprVisitor::visit(EdgeTypeExpression *expr) { + if (spaceId_ > 0) { + // Storage don't support nonexists edge + auto schema = schemaMng_->getEdgeSchema(spaceId_, expr->sym()); + if (schema == nullptr) { + canBePushed_ = false; + return; + } + } switch (pushType_) { case PushType::kGetNeighbors: case PushType::kGetEdges: @@ -124,7 +179,15 @@ void ExtractFilterExprVisitor::visit(EdgeTypeExpression *) { } } -void ExtractFilterExprVisitor::visit(EdgeRankExpression *) { +void ExtractFilterExprVisitor::visit(EdgeRankExpression *expr) { + if (spaceId_ > 0) { + // Storage don't support nonexists edge + auto schema = schemaMng_->getEdgeSchema(spaceId_, expr->sym()); + if (schema == nullptr) { + canBePushed_ = false; + return; + } + } switch (pushType_) { case PushType::kGetNeighbors: case PushType::kGetEdges: @@ -136,7 +199,15 @@ void ExtractFilterExprVisitor::visit(EdgeRankExpression *) { } } -void ExtractFilterExprVisitor::visit(EdgeDstIdExpression *) { +void ExtractFilterExprVisitor::visit(EdgeDstIdExpression *expr) { + if (spaceId_ > 0) { + // Storage don't support nonexists edge + auto schema = schemaMng_->getEdgeSchema(spaceId_, expr->sym()); + if (schema == nullptr) { + canBePushed_ = false; + return; + } + } switch (pushType_) { case PushType::kGetNeighbors: case PushType::kGetEdges: diff --git a/src/graph/visitor/ExtractFilterExprVisitor.h b/src/graph/visitor/ExtractFilterExprVisitor.h index ed917092997..67deec512ad 100644 --- a/src/graph/visitor/ExtractFilterExprVisitor.h +++ b/src/graph/visitor/ExtractFilterExprVisitor.h @@ -8,6 +8,7 @@ #include #include "common/expression/ExprVisitorImpl.h" +#include "common/meta/SchemaManager.h" namespace nebula { namespace graph { @@ -15,6 +16,11 @@ namespace graph { class ExtractFilterExprVisitor final : public ExprVisitorImpl { public: explicit ExtractFilterExprVisitor(ObjectPool *ObjPool) : pool_(ObjPool) {} + ExtractFilterExprVisitor(ObjectPool *ObjPool, + GraphSpaceID spaceId, + meta::SchemaManager *schemaMng) + : pool_(ObjPool), spaceId_(spaceId), schemaMng_(schemaMng) {} + explicit ExtractFilterExprVisitor(ObjectPool *ObjPool, std::vector colNames) : pool_(ObjPool), colNames_(std::move(colNames)) {} @@ -30,20 +36,26 @@ class ExtractFilterExprVisitor final : public ExprVisitorImpl { return remainedExpr_; } - static ExtractFilterExprVisitor makePushGetNeighbors(ObjectPool *pool) { - ExtractFilterExprVisitor visitor(pool); + static ExtractFilterExprVisitor makePushGetNeighbors(ObjectPool *pool, + GraphSpaceID spaceId = -1, + meta::SchemaManager *schemaMng = nullptr) { + ExtractFilterExprVisitor visitor(pool, spaceId, schemaMng); visitor.pushType_ = PushType::kGetNeighbors; return visitor; } - static ExtractFilterExprVisitor makePushGetVertices(ObjectPool *pool) { - ExtractFilterExprVisitor visitor(pool); + static ExtractFilterExprVisitor makePushGetVertices(ObjectPool *pool, + GraphSpaceID spaceId = -1, + meta::SchemaManager *schemaMng = nullptr) { + ExtractFilterExprVisitor visitor(pool, spaceId, schemaMng); visitor.pushType_ = PushType::kGetVertices; return visitor; } - static ExtractFilterExprVisitor makePushGetEdges(ObjectPool *pool) { - ExtractFilterExprVisitor visitor(pool); + static ExtractFilterExprVisitor makePushGetEdges(ObjectPool *pool, + GraphSpaceID spaceId = -1, + meta::SchemaManager *schemaMng = nullptr) { + ExtractFilterExprVisitor visitor(pool, spaceId, schemaMng); visitor.pushType_ = PushType::kGetEdges; return visitor; } @@ -97,6 +109,8 @@ class ExtractFilterExprVisitor final : public ExprVisitorImpl { bool remainedExprFromAnd_{false}; Expression *extractedExpr_{nullptr}; PushType pushType_{PushType::kGetNeighbors}; + GraphSpaceID spaceId_{-1}; + meta::SchemaManager *schemaMng_{nullptr}; std::vector colNames_; }; diff --git a/tests/tck/features/optimizer/CollapseProjectRule.feature b/tests/tck/features/optimizer/CollapseProjectRule.feature index a7b7afaf3ed..5e083827567 100644 --- a/tests/tck/features/optimizer/CollapseProjectRule.feature +++ b/tests/tck/features/optimizer/CollapseProjectRule.feature @@ -6,7 +6,6 @@ Feature: Collapse Project Rule Background: Given a graph with space named "nba" - @czp Scenario: Collapse Project When profiling query: """ diff --git a/tests/tck/features/optimizer/EmbedEdgeAllPredIntoTraverseRule.feature b/tests/tck/features/optimizer/EmbedEdgeAllPredIntoTraverseRule.feature new file mode 100644 index 00000000000..80398fca974 --- /dev/null +++ b/tests/tck/features/optimizer/EmbedEdgeAllPredIntoTraverseRule.feature @@ -0,0 +1,358 @@ +# Copyright (c) 2023 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License. +@czp +Feature: Embed edge all predicate into Traverse + + Background: + Given a graph with space named "nba" + + Scenario: Embed edge all predicate into Traverse + When profiling query: + """ + MATCH (v:player)-[e:like*1]->(n) + WHERE all(i in e where i.likeness>90) + RETURN [i in e | i.likeness] AS likeness, n.player.age AS nage + """ + Then the result should be, in any order: + | likeness | nage | + | [99] | 33 | + | [99] | 31 | + | [99] | 29 | + | [99] | 30 | + | [99] | 25 | + | [99] | 34 | + | [99] | 41 | + | [99] | 32 | + | [99] | 30 | + | [99] | 42 | + | [99] | 36 | + | [95] | 41 | + | [95] | 42 | + | [100] | 31 | + | [95] | 30 | + | [95] | 41 | + | [95] | 36 | + | [100] | 43 | + | [99] | 34 | + | [99] | 38 | + And the execution plan should be: + | id | name | dependencies | profiling data | operator info | + | 7 | Project | 11 | | | + | 11 | AppendVertices | 13 | | | + | 13 | Traverse | 1 | | {"filter": "(like.likeness>90)"} | + | 1 | IndexScan | 2 | | | + | 2 | Start | | | | + When profiling query: + """ + MATCH (v:player)-[e:like*1]->(n) + WHERE all(i in e where i.likeness>90 or i.likeness<0) + RETURN [i in e | i.likeness] AS likeness, n.player.age AS nage + """ + Then the result should be, in any order: + | likeness | nage | + | [99] | 33 | + | [99] | 31 | + | [99] | 29 | + | [99] | 30 | + | [99] | 25 | + | [99] | 34 | + | [99] | 41 | + | [99] | 32 | + | [99] | 30 | + | [99] | 42 | + | [99] | 36 | + | [95] | 41 | + | [95] | 42 | + | [100] | 31 | + | [95] | 30 | + | [95] | 41 | + | [95] | 36 | + | [100] | 43 | + | [99] | 34 | + | [99] | 38 | + | [-1] | 43 | + | [-1] | 33 | + And the execution plan should be: + | id | name | dependencies | profiling data | operator info | + | 7 | Project | 11 | | | + | 11 | AppendVertices | 13 | | | + | 13 | Traverse | 1 | | {"filter": "((like.likeness>90) OR (like.likeness<0))"} | + | 1 | IndexScan | 2 | | | + | 2 | Start | | | | + When profiling query: + """ + MATCH (v:player)-[e:like*1]->(n) + WHERE all(i in e where i.likeness>90 and v.player.name <> "x") + RETURN [i in e | i.likeness] AS likeness, n.player.age AS nage + """ + Then the result should be, in any order: + | likeness | nage | + | [99] | 33 | + | [99] | 31 | + | [99] | 29 | + | [99] | 30 | + | [99] | 25 | + | [99] | 34 | + | [99] | 41 | + | [99] | 32 | + | [99] | 30 | + | [99] | 42 | + | [99] | 36 | + | [95] | 41 | + | [95] | 42 | + | [100] | 31 | + | [95] | 30 | + | [95] | 41 | + | [95] | 36 | + | [100] | 43 | + | [99] | 34 | + | [99] | 38 | + And the execution plan should be: + | id | name | dependencies | profiling data | operator info | + | 7 | Project | 11 | | | + | 11 | Filter | 11 | | {"condition": "all(__VAR_0 IN $e WHERE (($__VAR_0.likeness>90) AND (v.player.name!=\"x\")))"} | + | 11 | AppendVertices | 13 | | | + | 13 | Traverse | 1 | | | + | 1 | IndexScan | 2 | | | + | 2 | Start | | | | + When profiling query: + """ + MATCH (v:player)-[e:like*2]->(n) + WHERE all(i in e where i.likeness>90) + RETURN [i in e | i.likeness] AS likeness, n.player.age AS nage + """ + Then the result should be, in any order: + | likeness | nage | + | [99, 100] | 43 | + | [99, 95] | 41 | + | [99, 95] | 36 | + | [99, 95] | 41 | + | [99, 95] | 42 | + | [95, 95] | 41 | + | [95, 95] | 36 | + | [95, 95] | 41 | + | [95, 95] | 42 | + | [99, 99] | 38 | + | [99, 99] | 34 | + And the execution plan should be: + | id | name | dependencies | profiling data | operator info | + | 7 | Project | 11 | | | + | 11 | AppendVertices | 13 | | | + | 13 | Traverse | 1 | | {"filter": "(like.likeness>90)"} | + | 1 | IndexScan | 2 | | | + | 2 | Start | | | | + When profiling query: + """ + MATCH (v:player)-[e:like*2..4]->(n) + WHERE all(i in e where i.likeness>90) + RETURN [i in e | i.likeness] AS likeness, n.player.age AS nage + """ + Then the result should be, in any order: + | likeness | nage | + | [99, 100] | 43 | + | [99, 95] | 41 | + | [99, 95] | 36 | + | [99, 95] | 41 | + | [99, 95] | 42 | + | [99, 95, 95] | 41 | + | [99, 95, 95] | 42 | + | [99, 95, 95] | 41 | + | [99, 95, 95] | 36 | + | [99, 95, 95, 95] | 41 | + | [99, 95, 95, 95] | 41 | + | [95, 95] | 41 | + | [95, 95] | 36 | + | [95, 95, 95] | 41 | + | [95, 95] | 41 | + | [95, 95] | 42 | + | [95, 95, 95] | 41 | + | [99, 99] | 38 | + | [99, 99] | 34 | + And the execution plan should be: + | id | name | dependencies | profiling data | operator info | + | 7 | Project | 11 | | | + | 11 | AppendVertices | 13 | | | + | 13 | Traverse | 1 | | {"filter": "(like.likeness>90)"} | + | 1 | IndexScan | 2 | | | + | 2 | Start | | | | + When profiling query: + """ + MATCH (v:player)-[e:like*0..5]->(n) + WHERE all(i in e where i.likeness>90) AND size(e)>0 AND (n.player.age>0) == true + RETURN [i in e | i.likeness] AS likeness, n.player.age AS nage + """ + Then the result should be, in any order: + | likeness | nage | + | [99, 99] | 34 | + | [99] | 38 | + | [99, 99] | 38 | + | [99] | 34 | + | [100] | 43 | + | [95, 95, 95] | 41 | + | [95, 95] | 42 | + | [95, 95] | 41 | + | [95] | 36 | + | [95] | 41 | + | [95] | 30 | + | [100] | 31 | + | [95, 95, 95] | 41 | + | [95, 95] | 36 | + | [95, 95] | 41 | + | [95] | 42 | + | [95] | 41 | + | [99, 95, 95, 95] | 41 | + | [99, 95, 95, 95] | 41 | + | [99, 95, 95] | 36 | + | [99, 95, 95] | 41 | + | [99, 95, 95] | 42 | + | [99, 95, 95] | 41 | + | [99, 95] | 42 | + | [99, 95] | 41 | + | [99, 95] | 36 | + | [99, 95] | 41 | + | [99, 100] | 43 | + | [99] | 36 | + | [99] | 42 | + | [99] | 30 | + | [99] | 32 | + | [99] | 41 | + | [99] | 34 | + | [99] | 25 | + | [99] | 30 | + | [99] | 29 | + | [99] | 31 | + | [99] | 33 | + And the execution plan should be: + | id | name | dependencies | profiling data | operator info | + | 7 | Project | 15 | | | + | 15 | Filter | 11 | | {"condition": "((n.player.age>0)==true)"} | + | 11 | AppendVertices | 14 | | | + | 14 | Filter | 13 | | {"condition": "(size($e)>0)"} | + | 13 | Traverse | 1 | | {"edge filter": "(*.likeness>90)"} | + | 1 | IndexScan | 2 | | | + | 2 | Start | | | | + When profiling query: + """ + MATCH (v:player)-[e:like*1..5]->(n) + WHERE all(i in e where i.likeness>90) AND size(e)>0 AND (n.player.age>0) == true + RETURN [i in e | i.likeness] AS likeness, n.player.age AS nage + """ + Then the result should be, in any order: + | likeness | nage | + | [99, 99] | 34 | + | [99] | 38 | + | [99, 99] | 38 | + | [99] | 34 | + | [100] | 43 | + | [95, 95, 95] | 41 | + | [95, 95] | 42 | + | [95, 95] | 41 | + | [95] | 36 | + | [95] | 41 | + | [95] | 30 | + | [100] | 31 | + | [95, 95, 95] | 41 | + | [95, 95] | 36 | + | [95, 95] | 41 | + | [95] | 42 | + | [95] | 41 | + | [99, 95, 95, 95] | 41 | + | [99, 95, 95, 95] | 41 | + | [99, 95, 95] | 36 | + | [99, 95, 95] | 41 | + | [99, 95, 95] | 42 | + | [99, 95, 95] | 41 | + | [99, 95] | 42 | + | [99, 95] | 41 | + | [99, 95] | 36 | + | [99, 95] | 41 | + | [99, 100] | 43 | + | [99] | 36 | + | [99] | 42 | + | [99] | 30 | + | [99] | 32 | + | [99] | 41 | + | [99] | 34 | + | [99] | 25 | + | [99] | 30 | + | [99] | 29 | + | [99] | 31 | + | [99] | 33 | + And the execution plan should be: + | id | name | dependencies | profiling data | operator info | + | 7 | Project | 15 | | | + | 15 | Filter | 11 | | {"condition": "((n.player.age>0)==true)"} | + | 11 | AppendVertices | 14 | | | + | 14 | Filter | 13 | | {"condition": "(size($e)>0)"} | + | 13 | Traverse | 1 | | {"filter": "(like.likeness>90)"} | + | 1 | IndexScan | 2 | | | + | 2 | Start | | | | + When profiling query: + """ + MATCH (v:player)-[e:like*0..5]->(n) + WHERE all(i in e where i.likeness>90) AND not all(i in e where i.likeness > 89) + RETURN [i in e | i.likeness] AS likeness, n.player.age AS nage + """ + Then the result should be, in any order: + | likeness | nage | + And the execution plan should be: + | id | name | dependencies | profiling data | operator info | + | 7 | Project | 11 | | | + | 11 | AppendVertices | 14 | | | + | 14 | Filter | 13 | | {"condition": "(!(all(__VAR_1 IN $e WHERE ($__VAR_1.likeness>89))) AND !(all(__VAR_1 IN $e WHERE ($__VAR_1.likeness>89))) AND !(all(__VAR_1 IN $e WHERE ($__VAR_1.likeness>89))))"} | + | 13 | Traverse | 1 | | {"edge filter": "(*.likeness>90)"} | + | 1 | IndexScan | 2 | | | + | 2 | Start | | | | + When profiling query: + """ + MATCH (person:player)-[e1:like*1..2]-(friend:player) + WHERE id(person) == "Tony Parker" AND id(friend) != "Tony Parker" AND all(i in e1 where i.likeness > 0) + MATCH (friend)-[served:serve]->(friendTeam:team) + WHERE served.start_year > 2010 AND all(i in e1 where i.likeness > 1) + WITH DISTINCT friend, friendTeam + OPTIONAL MATCH (friend)<-[e2:like*2..4]-(friend2:player)<-[:like]-(friendTeam) + WITH friendTeam, count(friend2) AS numFriends, e2 + WHERE e2 IS NULL OR all(i in e2 where i IS NULL) + RETURN + friendTeam.team.name AS teamName, + numFriends, + [i in e2 | i.likeness] AS likeness + ORDER BY teamName DESC + LIMIT 8 + """ + Then the result should be, in any order, with relax comparison: + | teamName | numFriends | likeness | + | "Warriors" | 0 | NULL | + | "Trail Blazers" | 0 | NULL | + | "Spurs" | 0 | NULL | + | "Rockets" | 0 | NULL | + | "Raptors" | 0 | NULL | + | "Pistons" | 0 | NULL | + | "Lakers" | 0 | NULL | + | "Kings" | 0 | NULL | + And the execution plan should be: + | id | name | dependencies | profiling data | operator info | + | 28 | TopN | 24 | | | + | 24 | Project | 30 | | | + | 30 | Aggregate | 29 | | | + | 29 | Filter | 44 | | {"condition": "($e2 IS NULL OR all(__VAR_2 IN $e2 WHERE $__VAR_2 IS NULL))"} | + | 44 | HashLeftJoin | 15,43 | | | + | 15 | Dedup | 14 | | | + | 14 | Project | 35 | | | + | 35 | HashInnerJoin | 53,50 | | | + | 53 | Filter | 52 | | {"condition": "(id($friend)!=\"Tony Parker\")"} | + | 52 | AppendVertices | 59 | | | + | 59 | Filter | 60 | | {"condition": "(id($person)==\"Tony Parker\")"} | + | 60 | Traverse | 2 | | {"filter": "((like.likeness>0) AND (like.likeness>1))"} | + | 2 | Dedup | 1 | | | + | 1 | PassThrough | 3 | | | + | 3 | Start | | | | + | 50 | Project | 55 | | | + | 55 | AppendVertices | 61 | | | + | 61 | Traverse | 8 | | | + | 8 | Argument | | | | + | 43 | Project | 39 | | | + | 39 | Traverse | 17 | | | + | 17 | Traverse | 16 | | | + | 16 | Argument | | | | From e637d9ad7f6e3a50b185e7765f0d4ad1f63b30ed Mon Sep 17 00:00:00 2001 From: codesigner Date: Tue, 11 Apr 2023 16:03:22 +0800 Subject: [PATCH 09/20] fix eval contains filter on storaged (#5485) * fix eval contains filter on storaged * add tck case * add tck case * fix tck * fix lint * fix lint --- .../expression/RelationalExpression.cpp | 18 ++++++---- .../features/bugfix/ContainsFilter.feature | 33 +++++++++++++++++++ 2 files changed, 45 insertions(+), 6 deletions(-) create mode 100644 tests/tck/features/bugfix/ContainsFilter.feature diff --git a/src/common/expression/RelationalExpression.cpp b/src/common/expression/RelationalExpression.cpp index 887562f1cb3..645dbf4272d 100644 --- a/src/common/expression/RelationalExpression.cpp +++ b/src/common/expression/RelationalExpression.cpp @@ -115,7 +115,8 @@ const Value& RelationalExpression::eval(ExpressionContext& ctx) { case Kind::kContains: { if (lhs.isBadNull() || rhs.isBadNull()) { result_ = Value::kNullBadType; - } else if ((!lhs.isNull() && !lhs.isStr()) || (!rhs.isNull() && !rhs.isStr())) { + } else if ((!lhs.isNull() && !lhs.empty() && !lhs.isStr()) || + (!rhs.isNull() && !rhs.empty() && !rhs.isStr())) { result_ = Value::kNullBadType; } else if (lhs.isStr() && rhs.isStr()) { result_ = lhs.getStr().size() >= rhs.getStr().size() && @@ -128,7 +129,8 @@ const Value& RelationalExpression::eval(ExpressionContext& ctx) { case Kind::kNotContains: { if (lhs.isBadNull() || rhs.isBadNull()) { result_ = Value::kNullBadType; - } else if ((!lhs.isNull() && !lhs.isStr()) || (!rhs.isNull() && !rhs.isStr())) { + } else if ((!lhs.isNull() && !lhs.empty() && !lhs.isStr()) || + (!rhs.isNull() && !rhs.empty() && !rhs.isStr())) { result_ = Value::kNullBadType; } else if (lhs.isStr() && rhs.isStr()) { result_ = !(lhs.getStr().size() >= rhs.getStr().size() && @@ -141,7 +143,8 @@ const Value& RelationalExpression::eval(ExpressionContext& ctx) { case Kind::kStartsWith: { if (lhs.isBadNull() || rhs.isBadNull()) { result_ = Value::kNullBadType; - } else if ((!lhs.isNull() && !lhs.isStr()) || (!rhs.isNull() && !rhs.isStr())) { + } else if ((!lhs.isNull() && !lhs.empty() && !lhs.isStr()) || + (!rhs.isNull() && !rhs.empty() && !rhs.isStr())) { result_ = Value::kNullBadType; } else if (lhs.isStr() && rhs.isStr()) { result_ = @@ -154,7 +157,8 @@ const Value& RelationalExpression::eval(ExpressionContext& ctx) { case Kind::kNotStartsWith: { if (lhs.isBadNull() || rhs.isBadNull()) { result_ = Value::kNullBadType; - } else if ((!lhs.isNull() && !lhs.isStr()) || (!rhs.isNull() && !rhs.isStr())) { + } else if ((!lhs.isNull() && !lhs.empty() && !lhs.isStr()) || + (!rhs.isNull() && !rhs.empty() && !rhs.isStr())) { result_ = Value::kNullBadType; } else if (lhs.isStr() && rhs.isStr()) { result_ = @@ -167,7 +171,8 @@ const Value& RelationalExpression::eval(ExpressionContext& ctx) { case Kind::kEndsWith: { if (lhs.isBadNull() || rhs.isBadNull()) { result_ = Value::kNullBadType; - } else if ((!lhs.isNull() && !lhs.isStr()) || (!rhs.isNull() && !rhs.isStr())) { + } else if ((!lhs.isNull() && !lhs.empty() && !lhs.isStr()) || + (!rhs.isNull() && !rhs.empty() && !rhs.isStr())) { result_ = Value::kNullBadType; } else if (lhs.isStr() && rhs.isStr()) { result_ = @@ -182,7 +187,8 @@ const Value& RelationalExpression::eval(ExpressionContext& ctx) { case Kind::kNotEndsWith: { if (lhs.isBadNull() || rhs.isBadNull()) { result_ = Value::kNullBadType; - } else if ((!lhs.isNull() && !lhs.isStr()) || (!rhs.isNull() && !rhs.isStr())) { + } else if ((!lhs.isNull() && !lhs.empty() && !lhs.isStr()) || + (!rhs.isNull() && !rhs.empty() && !rhs.isStr())) { result_ = Value::kNullBadType; } else if (lhs.isStr() && rhs.isStr()) { result_ = !(lhs.getStr().size() >= rhs.getStr().size() && diff --git a/tests/tck/features/bugfix/ContainsFilter.feature b/tests/tck/features/bugfix/ContainsFilter.feature new file mode 100644 index 00000000000..fc898c53a3f --- /dev/null +++ b/tests/tck/features/bugfix/ContainsFilter.feature @@ -0,0 +1,33 @@ +# Copyright (c) 2022 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License. +@tag1 +Feature: contains filter + + Background: + Given a graph with space named "nba" + + Scenario: contains filter + When executing query: + """ + MATCH (n:player{name:"Tim Duncan"})-[e]->(m) where m.player.name contains "Tony Parker" RETURN n,e,m ORDER BY m; + """ + Then the result should be, in order: + | n | e | m | + | ("Tim Duncan" :player{age: 42, name: "Tim Duncan"} :bachelor{name: "Tim Duncan", speciality: "psychology"}) | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | + | ("Tim Duncan" :player{age: 42, name: "Tim Duncan"} :bachelor{name: "Tim Duncan", speciality: "psychology"}) | [:teammate "Tim Duncan"->"Tony Parker" @0 {end_year: 2016, start_year: 2001}] | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | + When executing query: + """ + MATCH (n:player{name:"Tim Duncan"})-[e]->(m) where m.player.name starts with "Manu" RETURN n,e,m ORDER BY m; + """ + Then the result should be, in order: + | n | e | m | + | ("Tim Duncan" :player{age: 42, name: "Tim Duncan"} :bachelor{name: "Tim Duncan", speciality: "psychology"}) | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | + | ("Tim Duncan" :player{age: 42, name: "Tim Duncan"} :bachelor{name: "Tim Duncan", speciality: "psychology"}) | [:teammate "Tim Duncan"->"Manu Ginobili" @0 {end_year: 2016, start_year: 2002}] | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | + When executing query: + """ + MATCH (n:player{name:"Tim Duncan"})-[e]->(m) where m.team.name ends with "urs" RETURN n,e,m ORDER BY m; + """ + Then the result should be, in order: + | n | e | m | + | ("Tim Duncan" :player{age: 42, name: "Tim Duncan"} :bachelor{name: "Tim Duncan", speciality: "psychology"}) | [:serve "Tim Duncan"->"Spurs" @0 {end_year: 2016, start_year: 1997}] | ("Spurs" :team{name: "Spurs"}) | From c0ab9644fda6268db67ef310d30207150fe78b45 Mon Sep 17 00:00:00 2001 From: "kyle.cao" Date: Tue, 11 Apr 2023 16:39:19 +0800 Subject: [PATCH 10/20] Fix expression util function (#5487) fmt Co-authored-by: Sophie <84560950+Sophie-Xie@users.noreply.github.com> --- src/graph/util/ExpressionUtils.cpp | 1 + .../EmbedEdgeAllPredIntoTraverseRule.feature | 15 +++++++-------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/graph/util/ExpressionUtils.cpp b/src/graph/util/ExpressionUtils.cpp index 3217e6beee0..ad7fa5b3d56 100644 --- a/src/graph/util/ExpressionUtils.cpp +++ b/src/graph/util/ExpressionUtils.cpp @@ -1042,6 +1042,7 @@ void ExpressionUtils::splitFilter(const Expression *expr, // TODO(czp): If find any not, dont pick this operand for now if (ExpressionUtils::findAny(operand, {Expression::Kind::kUnaryNot})) { filterUnpickedPtr->addOperand(operand->clone()); + continue; } if (picker(operand)) { filterPickedPtr->addOperand(operand->clone()); diff --git a/tests/tck/features/optimizer/EmbedEdgeAllPredIntoTraverseRule.feature b/tests/tck/features/optimizer/EmbedEdgeAllPredIntoTraverseRule.feature index 80398fca974..e4e5c1abc09 100644 --- a/tests/tck/features/optimizer/EmbedEdgeAllPredIntoTraverseRule.feature +++ b/tests/tck/features/optimizer/EmbedEdgeAllPredIntoTraverseRule.feature @@ -1,7 +1,6 @@ # Copyright (c) 2023 vesoft inc. All rights reserved. # # This source code is licensed under Apache 2.0 License. -@czp Feature: Embed edge all predicate into Traverse Background: @@ -297,13 +296,13 @@ Feature: Embed edge all predicate into Traverse Then the result should be, in any order: | likeness | nage | And the execution plan should be: - | id | name | dependencies | profiling data | operator info | - | 7 | Project | 11 | | | - | 11 | AppendVertices | 14 | | | - | 14 | Filter | 13 | | {"condition": "(!(all(__VAR_1 IN $e WHERE ($__VAR_1.likeness>89))) AND !(all(__VAR_1 IN $e WHERE ($__VAR_1.likeness>89))) AND !(all(__VAR_1 IN $e WHERE ($__VAR_1.likeness>89))))"} | - | 13 | Traverse | 1 | | {"edge filter": "(*.likeness>90)"} | - | 1 | IndexScan | 2 | | | - | 2 | Start | | | | + | id | name | dependencies | profiling data | operator info | + | 7 | Project | 11 | | | + | 11 | AppendVertices | 14 | | | + | 14 | Filter | 13 | | {"condition": "!(all(__VAR_1 IN $e WHERE ($__VAR_1.likeness>89)))"} | + | 13 | Traverse | 1 | | {"edge filter": "(*.likeness>90)"} | + | 1 | IndexScan | 2 | | | + | 2 | Start | | | | When profiling query: """ MATCH (person:player)-[e1:like*1..2]-(friend:player) From eec8deb7bcba27fd4a685087cfcea19b261beabc Mon Sep 17 00:00:00 2001 From: codesigner Date: Wed, 12 Apr 2023 11:18:36 +0800 Subject: [PATCH 11/20] fix ContainsFilter random fail (#5489) --- tests/tck/features/bugfix/ContainsFilter.feature | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tck/features/bugfix/ContainsFilter.feature b/tests/tck/features/bugfix/ContainsFilter.feature index fc898c53a3f..b5af177c63e 100644 --- a/tests/tck/features/bugfix/ContainsFilter.feature +++ b/tests/tck/features/bugfix/ContainsFilter.feature @@ -10,7 +10,7 @@ Feature: contains filter Scenario: contains filter When executing query: """ - MATCH (n:player{name:"Tim Duncan"})-[e]->(m) where m.player.name contains "Tony Parker" RETURN n,e,m ORDER BY m; + MATCH (n:player{name:"Tim Duncan"})-[e]->(m) where m.player.name contains "Tony Parker" RETURN n,e,m ORDER BY e; """ Then the result should be, in order: | n | e | m | @@ -18,7 +18,7 @@ Feature: contains filter | ("Tim Duncan" :player{age: 42, name: "Tim Duncan"} :bachelor{name: "Tim Duncan", speciality: "psychology"}) | [:teammate "Tim Duncan"->"Tony Parker" @0 {end_year: 2016, start_year: 2001}] | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | When executing query: """ - MATCH (n:player{name:"Tim Duncan"})-[e]->(m) where m.player.name starts with "Manu" RETURN n,e,m ORDER BY m; + MATCH (n:player{name:"Tim Duncan"})-[e]->(m) where m.player.name starts with "Manu" RETURN n,e,m ORDER BY e; """ Then the result should be, in order: | n | e | m | From b562814512429adb72b91c52686b9518bb5eba2c Mon Sep 17 00:00:00 2001 From: dutor <440396+dutor@users.noreply.github.com> Date: Thu, 13 Apr 2023 18:09:08 +0800 Subject: [PATCH 12/20] Fixed graphd startup issue (#5493) --- src/graph/service/GraphServer.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/graph/service/GraphServer.cpp b/src/graph/service/GraphServer.cpp index 9f211dae343..0529d30314b 100644 --- a/src/graph/service/GraphServer.cpp +++ b/src/graph/service/GraphServer.cpp @@ -66,13 +66,23 @@ bool GraphServer::start() { } catch (const std::exception &e) { FLOG_ERROR("Exception thrown while starting the graph RPC server: %s", e.what()); } - serverStatus_.store(STATUS_STOPPED); + { + std::unique_lock lkStop(muStop_); + serverStatus_.store(STATUS_STOPPED); + cvStop_.notify_one(); + } FLOG_INFO("nebula-graphd on %s:%d has been stopped", localHost_.host.c_str(), localHost_.port); }); while (serverStatus_ == STATUS_UNINITIALIZED) { - std::this_thread::sleep_for(std::chrono::microseconds(100)); + std::this_thread::sleep_for(std::chrono::microseconds(1000)); + } + // In case `thriftServer_->serve()` fails, we wait a short while + std::this_thread::sleep_for(std::chrono::microseconds(1000)); + if (serverStatus_ != STATUS_RUNNING) { + return false; } + return true; } @@ -89,10 +99,8 @@ void GraphServer::waitUntilStop() { void GraphServer::notifyStop() { std::unique_lock lkStop(muStop_); - if (serverStatus_ == STATUS_RUNNING) { - serverStatus_ = STATUS_STOPPED; - cvStop_.notify_one(); - } + serverStatus_ = STATUS_STOPPED; + cvStop_.notify_one(); } // Stop the server. From 214e6ee85a9db8025585b495436523a979677610 Mon Sep 17 00:00:00 2001 From: jimingquan Date: Fri, 14 Apr 2023 10:40:44 +0800 Subject: [PATCH 13/20] fix prunproperties (#5494) --- src/graph/visitor/PropertyTrackerVisitor.cpp | 16 ++++----- src/graph/visitor/PrunePropertiesVisitor.cpp | 5 +++ tests/tck/features/match/MatchGroupBy.feature | 36 +++++++++++++++++++ 3 files changed, 49 insertions(+), 8 deletions(-) diff --git a/src/graph/visitor/PropertyTrackerVisitor.cpp b/src/graph/visitor/PropertyTrackerVisitor.cpp index 3e20f09f8e7..8fd0e42513f 100644 --- a/src/graph/visitor/PropertyTrackerVisitor.cpp +++ b/src/graph/visitor/PropertyTrackerVisitor.cpp @@ -51,7 +51,8 @@ void PropertyTracker::insertVertexProp(const std::string &name, vertexPropsMap[name][tagId].emplace(propName); } else { auto propIter = iter->second.find(tagId); - if (propIter == iter->second.end()) { + if (propIter == iter->second.end() || propName == "*") { + iter->second.erase(tagId); std::unordered_set temp({propName}); iter->second.emplace(tagId, std::move(temp)); } else { @@ -209,18 +210,17 @@ void PropertyTrackerVisitor::visit(AttributeExpression *expr) { switch (lhs->kind()) { case Expression::Kind::kInputProperty: case Expression::Kind::kVarProperty: { - // maybe: $e.prop auto *varPropExpr = static_cast(lhs); auto &entityAlias = varPropExpr->prop(); - propsUsed_.insertEdgeProp(entityAlias, unknownType_, propName); - // maybe: $v.tag + // maybe : $v.tag auto ret = qctx_->schemaMng()->toTagID(space_, propName); - if (!ret.ok()) { - status_ = std::move(ret).status(); + if (ret.ok()) { + auto tagId = ret.value(); + propsUsed_.insertVertexProp(entityAlias, tagId, "*"); break; } - auto tagId = ret.value(); - propsUsed_.insertVertexProp(entityAlias, tagId, "*"); + // maybe: $e.prop + propsUsed_.insertEdgeProp(entityAlias, unknownType_, propName); break; } case Expression::Kind::kCase: { // (case xxx).name diff --git a/src/graph/visitor/PrunePropertiesVisitor.cpp b/src/graph/visitor/PrunePropertiesVisitor.cpp index 8cae8f06e1a..648e749bf9c 100644 --- a/src/graph/visitor/PrunePropertiesVisitor.cpp +++ b/src/graph/visitor/PrunePropertiesVisitor.cpp @@ -286,6 +286,11 @@ void PrunePropertiesVisitor::pruneCurrent(Traverse *node) { } auto tagIter = usedVertexProps.find(tagId); if (tagIter != usedVertexProps.end()) { + auto &tagProps = tagIter->second; + if (tagProps.find("*") != tagProps.end()) { + prunedVertexProps->emplace_back(vertexProp); + continue; + } usedProps.insert(tagIter->second.begin(), tagIter->second.end()); } if (usedProps.empty()) { diff --git a/tests/tck/features/match/MatchGroupBy.feature b/tests/tck/features/match/MatchGroupBy.feature index 6fe3ae0c320..74020ea8815 100644 --- a/tests/tck/features/match/MatchGroupBy.feature +++ b/tests/tck/features/match/MatchGroupBy.feature @@ -200,3 +200,39 @@ Feature: Match GroupBy | "Danny Green" | 36 | 1 | | "Dejounte Murray" | 33 | 1 | | "Dirk Nowitzki" | 42 | 1 | + + Scenario: [9] Match GroupBy + When executing query: + """ + MATCH p = (a)-[b:like]->(c)-[:serve]->(d:team) + WHERE id(a) == 'Tim Duncan' + WITH + a, + collect( + CASE c.player.name + WHEN null THEN null + ELSE [c.player.name, b.likeness, d.team.name] + END + ) AS res + RETURN res + """ + Then the result should be, in order, with relax comparison: + | res | + | [["Tony Parker", 95, "Hornets"], ["Tony Parker", 95, "Spurs"], ["Manu Ginobili", 95, "Spurs"]] | + When executing query: + """ + MATCH p = (a)-[b:like]->(c)-[:serve]->(d:team) + WHERE id(a) == 'Tim Duncan' + WITH + a, + collect( + CASE c.player.name + WHEN null THEN null + ELSE [c.player, b.likeness, d.team.name] + END + ) AS res + RETURN res + """ + Then the result should be, in order, with relax comparison: + | res | + | [[{age: 36, name: "Tony Parker"}, 95, "Hornets"], [{age: 36, name: "Tony Parker"}, 95, "Spurs"], [{age: 41, name: "Manu Ginobili"}, 95, "Spurs"]] | From ae3c5d251d16213e73c2b048cbd0deedb1042db4 Mon Sep 17 00:00:00 2001 From: Cheng Xuntao <7731943+xtcyclist@users.noreply.github.com> Date: Fri, 14 Apr 2023 18:19:02 +0800 Subject: [PATCH 14/20] stop the pushing down of not expressions that are not rewritten to proper forms. (#5502) --- src/graph/visitor/ExtractFilterExprVisitor.cpp | 15 +++++++++++++++ src/graph/visitor/ExtractFilterExprVisitor.h | 1 + 2 files changed, 16 insertions(+) diff --git a/src/graph/visitor/ExtractFilterExprVisitor.cpp b/src/graph/visitor/ExtractFilterExprVisitor.cpp index f4a97e1ecc7..6a36564ba21 100644 --- a/src/graph/visitor/ExtractFilterExprVisitor.cpp +++ b/src/graph/visitor/ExtractFilterExprVisitor.cpp @@ -231,6 +231,21 @@ void ExtractFilterExprVisitor::visit(ColumnExpression *) { canBePushed_ = false; } +void ExtractFilterExprVisitor::visit(UnaryExpression *expr) { + if (expr->kind() == Expression::Kind::kUnaryNot && + (expr->operand()->kind() == Expression::Kind::kLogicalAnd || + expr->operand()->kind() == Expression::Kind::kLogicalOr)) { + // The NOT operation in this kind of expressions should had been reduced. + // In case it had not been reduced, pushing it down would cause wrong results. + canBePushed_ = false; + return; + } + if (expr->operand() != nullptr) { + expr->operand()->accept(this); + return; + } +} + // @return: whether this logical expr satisfies split condition bool ExtractFilterExprVisitor::visitLogicalAnd(LogicalExpression *expr, std::vector &flags) { DCHECK_EQ(expr->kind(), Expression::Kind::kLogicalAnd); diff --git a/src/graph/visitor/ExtractFilterExprVisitor.h b/src/graph/visitor/ExtractFilterExprVisitor.h index 67deec512ad..40bccf7b46a 100644 --- a/src/graph/visitor/ExtractFilterExprVisitor.h +++ b/src/graph/visitor/ExtractFilterExprVisitor.h @@ -84,6 +84,7 @@ class ExtractFilterExprVisitor final : public ExprVisitorImpl { void visit(LogicalExpression *) override; void visit(ColumnExpression *) override; void visit(SubscriptRangeExpression *) override; + void visit(UnaryExpression *) override; private: enum class PushType { From 9ce7a6fcdd53b0ba48bd2ac3c701c9915591ab79 Mon Sep 17 00:00:00 2001 From: "kyle.cao" Date: Mon, 17 Apr 2023 11:08:58 +0800 Subject: [PATCH 15/20] Fix edge all predicate with rank function (#5503) Co-authored-by: Sophie <84560950+Sophie-Xie@users.noreply.github.com> --- .../rule/EmbedEdgeAllPredIntoTraverseRule.cpp | 4 ++++ .../features/match/MultiLineMultiQueryParts.feature | 11 +++++++++++ 2 files changed, 15 insertions(+) diff --git a/src/graph/optimizer/rule/EmbedEdgeAllPredIntoTraverseRule.cpp b/src/graph/optimizer/rule/EmbedEdgeAllPredIntoTraverseRule.cpp index 1a58da9c0b9..d66bd1d5d77 100644 --- a/src/graph/optimizer/rule/EmbedEdgeAllPredIntoTraverseRule.cpp +++ b/src/graph/optimizer/rule/EmbedEdgeAllPredIntoTraverseRule.cpp @@ -65,6 +65,10 @@ bool isEdgeAllPredicate(const Expression* e, return false; } auto ves = graph::ExpressionUtils::collectAll(pe->filter(), {Expression::Kind::kAttribute}); + if (ves.empty()) { + // innerVar.prop not exists + return false; + } for (const auto& ve : ves) { auto iv = static_cast(ve)->left(); diff --git a/tests/tck/features/match/MultiLineMultiQueryParts.feature b/tests/tck/features/match/MultiLineMultiQueryParts.feature index 1f0030c2831..60b1163777e 100644 --- a/tests/tck/features/match/MultiLineMultiQueryParts.feature +++ b/tests/tck/features/match/MultiLineMultiQueryParts.feature @@ -298,6 +298,17 @@ Feature: Multi Line Multi Query Parts Then the result should be, in order: | scount | lcount | | 19 | 110 | + When executing query: + """ + MATCH (m:player{name:"Tim Duncan"})-[e:like*2..3]-(n)--() + WHERE all(i in e where rank(i)==0) + WITH m,count(*) AS lcount + MATCH (m)--(n) + RETURN count(*) AS scount, lcount + """ + Then the result should be, in order: + | scount | lcount | + | 19 | 2888 | When executing query: """ MATCH (m:player{name:"Tim Duncan"})-[:like]-(n)--() From fbdf275bb2f44127aef73f85200a4595136232fe Mon Sep 17 00:00:00 2001 From: jimingquan Date: Mon, 17 Apr 2023 14:34:56 +0800 Subject: [PATCH 16/20] rewrite param in subgraph & path (#5500) * check param in subgraph * rewrite param in path --------- Co-authored-by: Sophie <84560950+Sophie-Xie@users.noreply.github.com> --- src/graph/validator/FindPathValidator.cpp | 21 +++++++++-- src/graph/validator/GetSubgraphValidator.cpp | 24 +++++++++--- tests/tck/features/yield/parameter.feature | 39 +++++++++++++++++++- 3 files changed, 73 insertions(+), 11 deletions(-) diff --git a/src/graph/validator/FindPathValidator.cpp b/src/graph/validator/FindPathValidator.cpp index fbd6244f39a..6b45ae37467 100644 --- a/src/graph/validator/FindPathValidator.cpp +++ b/src/graph/validator/FindPathValidator.cpp @@ -37,15 +37,28 @@ Status FindPathValidator::validateWhere(WhereClause* where) { return Status::OK(); } // Not Support $-、$var、$$.tag.prop、$^.tag.prop、agg - auto expr = where->filter(); - if (ExpressionUtils::findAny(expr, + auto filterExpr = where->filter(); + if (ExpressionUtils::findAny(filterExpr, {Expression::Kind::kSrcProperty, Expression::Kind::kDstProperty, Expression::Kind::kVarProperty, Expression::Kind::kInputProperty})) { - return Status::SemanticError("Not support `%s' in where sentence.", expr->toString().c_str()); + return Status::SemanticError("Not support `%s' in where sentence.", + filterExpr->toString().c_str()); } - where->setFilter(ExpressionUtils::rewriteLabelAttr2EdgeProp(expr)); + + auto undefinedParams = graph::ExpressionUtils::ExtractInnerVars(filterExpr, qctx_); + if (!undefinedParams.empty()) { + return Status::SemanticError( + "Undefined parameters: " + + std::accumulate(++undefinedParams.begin(), + undefinedParams.end(), + *undefinedParams.begin(), + [](auto& lhs, auto& rhs) { return lhs + ", " + rhs; })); + } + auto* newFilter = graph::ExpressionUtils::rewriteParameter(filterExpr, qctx_); + + where->setFilter(ExpressionUtils::rewriteLabelAttr2EdgeProp(newFilter)); auto filter = where->filter(); auto typeStatus = deduceExprType(filter); diff --git a/src/graph/validator/GetSubgraphValidator.cpp b/src/graph/validator/GetSubgraphValidator.cpp index 4e2989fe20d..91e4e4d8f6d 100644 --- a/src/graph/validator/GetSubgraphValidator.cpp +++ b/src/graph/validator/GetSubgraphValidator.cpp @@ -126,17 +126,29 @@ Status GetSubgraphValidator::validateWhere(WhereClause* where) { if (where == nullptr) { return Status::OK(); } - auto* expr = where->filter(); - if (ExpressionUtils::findAny(expr, + auto* filterExpr = where->filter(); + if (ExpressionUtils::findAny(filterExpr, {Expression::Kind::kAggregate, Expression::Kind::kSrcProperty, Expression::Kind::kVarProperty, Expression::Kind::kInputProperty, Expression::Kind::kLogicalOr})) { - return Status::SemanticError("Not support `%s' in where sentence.", expr->toString().c_str()); + return Status::SemanticError("Not support `%s' in where sentence.", + filterExpr->toString().c_str()); } - where->setFilter(ExpressionUtils::rewriteLabelAttr2EdgeProp(expr)); + auto undefinedParams = graph::ExpressionUtils::ExtractInnerVars(filterExpr, qctx_); + if (!undefinedParams.empty()) { + return Status::SemanticError( + "Undefined parameters: " + + std::accumulate(++undefinedParams.begin(), + undefinedParams.end(), + *undefinedParams.begin(), + [](auto& lhs, auto& rhs) { return lhs + ", " + rhs; })); + } + auto* newFilter = graph::ExpressionUtils::rewriteParameter(filterExpr, qctx_); + + where->setFilter(ExpressionUtils::rewriteLabelAttr2EdgeProp(newFilter)); auto filter = where->filter(); auto typeStatus = deduceExprType(filter); NG_RETURN_IF_ERROR(typeStatus); @@ -169,11 +181,11 @@ Status GetSubgraphValidator::validateWhere(WhereClause* where) { } auto condition = filter->clone(); - if (ExpressionUtils::findAny(expr, {Expression::Kind::kDstProperty})) { + if (ExpressionUtils::findAny(filter, {Expression::Kind::kDstProperty})) { auto visitor = ExtractFilterExprVisitor::makePushGetVertices(qctx_->objPool()); filter->accept(&visitor); if (!visitor.ok()) { - return Status::SemanticError("Push target vertices filter error: " + expr->toString()); + return Status::SemanticError("Push target vertices filter error: " + filter->toString()); } subgraphCtx_->edgeFilter = visitor.remainedExpr(); auto tagFilter = visitor.extractedExpr() ? visitor.extractedExpr() : filter; diff --git a/tests/tck/features/yield/parameter.feature b/tests/tck/features/yield/parameter.feature index 854a402ecc8..df32e3b31c1 100644 --- a/tests/tck/features/yield/parameter.feature +++ b/tests/tck/features/yield/parameter.feature @@ -6,7 +6,7 @@ Feature: Parameter Background: Given an empty graph And load "nba" csv data to a new space - Given parameters: {"p1":1,"p2":true,"p3":"Tim Duncan","p4":3.3,"p5":[1,true,3],"p6":{"a":3,"b":false,"c":"Tim Duncan"},"p7":{"a":{"b":{"c":"Tim Duncan","d":[1,2,3,true,"Tim Duncan"]}}},"p8":"Manu Ginobili", "p9":["Tim Duncan","Tony Parker"]} + Given parameters: {"p1":1,"p2":true,"p3":"Tim Duncan","p4":3.3,"p5":[1,true,3],"p6":{"a":3,"b":false,"c":"Tim Duncan"},"p7":{"a":{"b":{"c":"Tim Duncan","d":[1,2,3,true,"Tim Duncan"]}}},"p8":"Manu Ginobili", "p9":["Tim Duncan","Tony Parker"], "p10":90} Scenario: [param-test-001] without define param When executing query: @@ -269,6 +269,21 @@ Feature: Parameter MATCH (v:player) where v.player.age < $unknown_distance RETURN v """ Then a SemanticError should be raised at runtime: Undefined parameters: unknown_distance + When executing query: + """ + GET SUBGRAPH FROM 'Tim Duncan' WHERE like.likeness < $unknown_distance YIELD edges as e + """ + Then a SemanticError should be raised at runtime: Undefined parameters: unknown_distance + When executing query: + """ + FIND ALL PATH FROM 'Tim Duncan' TO 'Tony Parker' OVER like WHERE like.likeness > $unknown_distance YIELD path as p + """ + Then a SemanticError should be raised at runtime: Undefined parameters: unknown_distance + When executing query: + """ + FIND SHORTEST PATH FROM 'Tim Duncan' TO 'Tony Parker' OVER like WHERE like.likeness > $unknown_distance YIELD path as p + """ + Then a SemanticError should be raised at runtime: Undefined parameters: unknown_distance When executing query: """ MATCH (v:player) RETURN v LIMIT $p6 @@ -345,6 +360,28 @@ Feature: Parameter | v | | BAD_TYPE | | BAD_TYPE | + When executing query: + """ + GET SUBGRAPH FROM 'Tim Duncan' WHERE like.likeness > $p10 YIELD edges AS e + """ + Then the result should be, in any order: + | e | + | [[:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}], [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}], [:like "Dejounte Murray"->"Tim Duncan" @0 {likeness: 99}], [:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}]] | + | [[:like "Tony Parker"->"Manu Ginobili" @0 {likeness: 95}], [:like "Dejounte Murray"->"Manu Ginobili" @0 {likeness: 99}], [:like "Dejounte Murray"->"Tony Parker" @0 {likeness: 99}]] | + When executing query: + """ + FIND ALL PATH FROM 'Tim Duncan' TO 'Tony Parker' OVER like WHERE like.likeness > $p10-1 YIELD path AS p + """ + Then the result should be, in any order, with relax comparison: + | p | + | <("Tim Duncan")-[:like@0 {likeness: 95}]->("Tony Parker")> | + | <("Tim Duncan")-[:like@0 {likeness: 95}]->("Manu Ginobili")-[:like@0 {likeness: 90}]->("Tim Duncan")-[:like@0 {likeness: 95}]->("Tony Parker")> | + When executing query: + """ + FIND ALL PATH FROM 'Tim Duncan' TO 'Tony Parker' OVER like WHERE like.likeness > $p5[10] YIELD path AS p + """ + Then the result should be, in any order: + | p | Scenario: [param-test-013] DML Given an empty graph From 1ecbdbaa2d9cd6d09ebc4952ca85d3bf38e7aa47 Mon Sep 17 00:00:00 2001 From: Yee <2520865+yixinglu@users.noreply.github.com> Date: Mon, 17 Apr 2023 17:32:09 +0800 Subject: [PATCH 17/20] Fix concurrent bug about session count (#5496) --- src/common/session/SessionManager.h | 42 +++++++++++++++++++++-- src/graph/session/GraphSessionManager.cpp | 25 ++------------ src/graph/session/GraphSessionManager.h | 6 ---- 3 files changed, 42 insertions(+), 31 deletions(-) diff --git a/src/common/session/SessionManager.h b/src/common/session/SessionManager.h index 00ef5fe63ce..c1d0f32ad61 100644 --- a/src/common/session/SessionManager.h +++ b/src/common/session/SessionManager.h @@ -6,6 +6,7 @@ #ifndef COMMON_SESSION_SESSIONMANAGER_H_ #define COMMON_SESSION_SESSIONMANAGER_H_ +#include #include #include "clients/meta/MetaClient.h" @@ -23,7 +24,7 @@ namespace nebula { class SessionCount { private: - std::atomic count_ = 1; + std::atomic count_{0}; public: int fetch_add(int step) { @@ -75,11 +76,48 @@ class SessionManager { protected: using SessionPtr = std::shared_ptr; using SessionCountPtr = std::shared_ptr; + + // Get session count pointer according to key + SessionCountPtr sessionCnt(const std::string& key) { + folly::RWSpinLock::ReadHolder rh(&sessCntLock_); + auto iter = userIpSessionCount_.find(key); + if (iter != userIpSessionCount_.end()) { + return iter->second; + } + return nullptr; + } + + // add sessionCount + void addSessionCount(std::string& key) { + auto sessCntPtr = sessionCnt(key); + if (!sessCntPtr) { + folly::RWSpinLock::WriteHolder wh(&sessCntLock_); + auto iter = userIpSessionCount_.emplace(key, std::make_shared()); + sessCntPtr = iter.first->second; + } + sessCntPtr->fetch_add(1); + } + + // sub sessionCount + void subSessionCount(std::string& key) { + auto countFindPtr = sessionCnt(key); + if (countFindPtr) { + auto count = countFindPtr->fetch_sub(1); + if (count == 1) { + folly::RWSpinLock::WriteHolder wh(&sessCntLock_); + userIpSessionCount_.erase(key); + } + } + } + folly::ConcurrentHashMap activeSessions_; - folly::ConcurrentHashMap userIpSessionCount_; std::unique_ptr scavenger_; meta::MetaClient* metaClient_{nullptr}; HostAddr myAddr_; + + private: + folly::RWSpinLock sessCntLock_; + std::unordered_map userIpSessionCount_; }; } // namespace nebula diff --git a/src/graph/session/GraphSessionManager.cpp b/src/graph/session/GraphSessionManager.cpp index fbd7d02fd43..b8f0d3f5b96 100644 --- a/src/graph/session/GraphSessionManager.cpp +++ b/src/graph/session/GraphSessionManager.cpp @@ -114,9 +114,8 @@ folly::Future>> GraphSessionManager::cre // check the number of sessions per user per ip std::string key = userName + clientIp; auto maxSessions = FLAGS_max_sessions_per_ip_per_user; - auto uiscFindPtr = userIpSessionCount_.find(key); - if (uiscFindPtr != userIpSessionCount_.end() && maxSessions > 0 && - uiscFindPtr->second.get()->get() > maxSessions - 1) { + auto uiscFindPtr = sessionCnt(key); + if (maxSessions > 0 && uiscFindPtr && uiscFindPtr->get() > maxSessions - 1) { return Status::Error( "Create Session failed: Too many sessions created from %s by user %s. " "the threshold is %d. You can change it by modifying '%s' in nebula-graphd.conf", @@ -362,25 +361,5 @@ void GraphSessionManager::removeSessionFromLocalCache(const std::vectorsecond.get()->fetch_add(1); - } else { - userIpSessionCount_.insert_or_assign(key, std::make_shared()); - } -} - -void GraphSessionManager::subSessionCount(std::string& key) { - auto countFindPtr = userIpSessionCount_.find(key); - if (countFindPtr == userIpSessionCount_.end()) { - VLOG(1) << "Session count not found for key: " << key; - return; - } - auto count = countFindPtr->second.get()->fetch_sub(1); - if (count <= 0) { - userIpSessionCount_.erase(countFindPtr); - } -} } // namespace graph } // namespace nebula diff --git a/src/graph/session/GraphSessionManager.h b/src/graph/session/GraphSessionManager.h index cc13f1a956e..045403b877e 100644 --- a/src/graph/session/GraphSessionManager.h +++ b/src/graph/session/GraphSessionManager.h @@ -112,12 +112,6 @@ class GraphSessionManager final : public SessionManager { // Updates session info locally. // session: ClientSession which will be updated. void updateSessionInfo(ClientSession* session); - - // add sessionCount - void addSessionCount(std::string& key); - - // sub sessionCount - void subSessionCount(std::string& key); }; } // namespace graph From a0e8493195e5015d4b416a0ad5f243913cd6851c Mon Sep 17 00:00:00 2001 From: "kyle.cao" Date: Tue, 18 Apr 2023 14:09:02 +0800 Subject: [PATCH 18/20] Fix regex expression (#5507) --- src/common/expression/RelationalExpression.cpp | 3 ++- .../features/expression/RelationalExpr.feature | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/common/expression/RelationalExpression.cpp b/src/common/expression/RelationalExpression.cpp index 645dbf4272d..70342658130 100644 --- a/src/common/expression/RelationalExpression.cpp +++ b/src/common/expression/RelationalExpression.cpp @@ -37,7 +37,8 @@ const Value& RelationalExpression::eval(ExpressionContext& ctx) { case Kind::kRelREG: { if (lhs.isBadNull() || rhs.isBadNull()) { result_ = Value::kNullBadType; - } else if ((!lhs.isNull() && !lhs.isStr()) || (!rhs.isNull() && !rhs.isStr())) { + } else if ((!lhs.isNull() && !lhs.empty() && !lhs.isStr()) || + (!rhs.isNull() && !rhs.empty() && !rhs.isStr())) { result_ = Value::kNullBadType; } else if (lhs.isStr() && rhs.isStr()) { try { diff --git a/tests/tck/features/expression/RelationalExpr.feature b/tests/tck/features/expression/RelationalExpr.feature index 16662f0b5b8..bcf77a3a125 100644 --- a/tests/tck/features/expression/RelationalExpr.feature +++ b/tests/tck/features/expression/RelationalExpr.feature @@ -233,3 +233,21 @@ Feature: RelationalExpression | 2 | AppendVertices | 6 | | | 6 | IndexScan | 0 | {"indexCtx": {"columnHints":{"scanType":"RANGE"}}} | | 0 | Start | | | + + Scenario: Transform Relational expr in MATCH clause + When profiling query: + """ + MATCH (v:player{name: "Tim Duncan"})-[e]->(m) WHERE m.player.name =~ 'Tony.*' RETURN id(m) AS id + """ + Then the result should be, in any order: + | id | + | "Tony Parker" | + | "Tony Parker" | + And the execution plan should be: + | id | name | dependencies | operator info | + | 13 | Project | 12 | | + | 12 | Filter | 12 | | + | 12 | AppendVertices | 11 | | + | 11 | Traverse | 8 | | + | 8 | IndexScan | 0 | | + | 0 | Start | | | From a3d1bea26a66977258ac46846f91ca2d79582e44 Mon Sep 17 00:00:00 2001 From: George <58841610+Shinji-IkariG@users.noreply.github.com> Date: Wed, 19 Apr 2023 14:32:17 +0800 Subject: [PATCH 19/20] Update requirements.txt (#5512) Solidified tomli version to solve centos7 compatibility issues --- tests/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/requirements.txt b/tests/requirements.txt index b04b8c5a448..c51118515e5 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -15,3 +15,4 @@ ply==3.10 pyyaml==5.4 fastcov==1.13 deepdiff==5.7.0 +tomli==1.2.3 From abc9f35f4efb2fba9e0277389bc3600825064e02 Mon Sep 17 00:00:00 2001 From: Yichen Wang <18348405+Aiee@users.noreply.github.com> Date: Wed, 19 Apr 2023 18:15:52 +0800 Subject: [PATCH 20/20] Update cluster id (#5514) --- src/meta/KVBasedClusterIdMan.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/meta/KVBasedClusterIdMan.h b/src/meta/KVBasedClusterIdMan.h index 78c853d3c9b..d74292be542 100644 --- a/src/meta/KVBasedClusterIdMan.h +++ b/src/meta/KVBasedClusterIdMan.h @@ -29,8 +29,17 @@ class ClusterIdMan { * @return */ static ClusterID create(const std::string& metaAddrs) { + // Generate random salt + std::random_device rd; + std::mt19937 gen(rd()); + std::string randomBytes(16, ' '); + std::generate_n(randomBytes.begin(), 16, std::ref(gen)); + + // Concatenate salt with input string + std::string saltedInput = metaAddrs + randomBytes; + std::hash hash_fn; - auto clusterId = hash_fn(metaAddrs); + auto clusterId = hash_fn(saltedInput); uint64_t mask = 0x7FFFFFFFFFFFFFFF; clusterId &= mask; LOG(INFO) << "Create ClusterId " << clusterId;