Skip to content

Commit

Permalink
Fix multiple match joined on edge produce wrong result (#4923)
Browse files Browse the repository at this point in the history
* Fix multiple match joined on edge produce wrong result

* refine

* refine

* address comments

* fix lint

* fix build

* fix unwind type inference

* add some code comment

* rename to _joinkey

* minor refine
  • Loading branch information
codesigner committed Nov 24, 2022
1 parent 819b84d commit 8842efb
Show file tree
Hide file tree
Showing 11 changed files with 168 additions and 8 deletions.
26 changes: 26 additions & 0 deletions src/common/datatypes/Edge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* This source code is licensed under Apache 2.0 License.
*/

#include <common/base/Logging.h>
#include <common/datatypes/Edge.h>
#include <folly/String.h>
#include <folly/hash/Hash.h>
Expand Down Expand Up @@ -141,6 +142,31 @@ bool Edge::keyEqual(const Edge& rhs) const {
return src == rhs.dst && dst == rhs.src && ranking == rhs.ranking;
}

std::string Edge::id() const {
std::string s;
if (src.type() == Value::Type::INT) {
EdgeType t = type > 0 ? type : -type;
const int64_t& srcId = type > 0 ? src.getInt() : dst.getInt();
const int64_t& dstId = type > 0 ? dst.getInt() : src.getInt();
s.reserve(sizeof(srcId) + sizeof(dstId) + sizeof(type) + sizeof(ranking));
s.append(reinterpret_cast<const char*>(&srcId), sizeof(srcId));
s.append(reinterpret_cast<const char*>(&dstId), sizeof(dstId));
s.append(reinterpret_cast<const char*>(&t), sizeof(t));
s.append(reinterpret_cast<const char*>(&ranking), sizeof(ranking));
} else {
DCHECK(src.type() == Value::Type::STRING);
EdgeType t = type > 0 ? type : -type;
const std::string& srcId = type > 0 ? src.getStr() : dst.getStr();
const std::string& dstId = type > 0 ? dst.getStr() : src.getStr();
s.reserve(srcId.size() + dstId.size() + sizeof(t) + sizeof(ranking));
s.append(srcId.data(), srcId.size());
s.append(dstId.data(), dstId.size());
s.append(reinterpret_cast<const char*>(&t), sizeof(t));
s.append(reinterpret_cast<const char*>(&ranking), sizeof(ranking));
}
return s;
}

} // namespace nebula

namespace std {
Expand Down
3 changes: 3 additions & 0 deletions src/common/datatypes/Edge.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ struct Edge {
const Value& value(const std::string& key) const;

bool keyEqual(const Edge& rhs) const;

// Return this edge's id encoded in string
std::string id() const;
};

inline std::ostream& operator<<(std::ostream& os, const Edge& v) {
Expand Down
39 changes: 39 additions & 0 deletions src/common/datatypes/test/EdgeTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,43 @@ TEST(Edge, hashEdge) {
EXPECT_NE(edge1, edge5);
}

TEST(Edge, id) {
{
Edge edge1(0, 1, 1, "like", 100, {});

Edge edge2(0, 1, 1, "like", 100, {});
EXPECT_EQ(edge1.id(), edge2.id());

Edge edge3(1, 1, 1, "like", 100, {});
EXPECT_NE(edge1.id(), edge3.id());

Edge edge4(0, 2, 1, "like", 100, {});
EXPECT_NE(edge1.id(), edge4.id());

Edge edge5(0, 1, -1, "like", 100, {});
EXPECT_NE(edge1.id(), edge5.id());

Edge edge6(0, 1, 1, "like", 101, {});
EXPECT_NE(edge1.id(), edge6.id());
}
{
Edge edge1("aaa", "bbb", 1, "like", 100, {});

Edge edge2("aaa", "bbb", 1, "like", 100, {});
EXPECT_EQ(edge1.id(), edge2.id());

Edge edge3("aab", "bbb", 1, "like", 100, {});
EXPECT_NE(edge1.id(), edge3.id());

Edge edge4("aaa", "bba", 1, "like", 100, {});
EXPECT_NE(edge1.id(), edge4.id());

Edge edge5("aaa", "bbb", 2, "like", 100, {});
EXPECT_NE(edge1.id(), edge5.id());

Edge edge6("aaa", "bbb", 1, "like", 99, {});
EXPECT_NE(edge1.id(), edge6.id());
}
}

} // namespace nebula
27 changes: 27 additions & 0 deletions src/common/function/FunctionManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1841,6 +1841,33 @@ FunctionManager::FunctionManager() {
}
};
}
{
auto &attr = functions_["_joinkey"];
attr.minArity_ = 1;
attr.maxArity_ = 1;
attr.isAlwaysPure_ = true;
attr.body_ = [](const auto &args) -> Value {
const Value &value = args[0].get();
switch (value.type()) {
case Value::Type::NULLVALUE: {
return Value::kNullValue;
}
case Value::Type::VERTEX: {
return value.getVertex().vid;
}
// NOTE:
// id() on Edge is designed to be used get a Join key when
// Join operator performed on edge, the returned id is a
// string encoded the {src, dst, type, ranking} tuple
case Value::Type::EDGE: {
return value.getEdge().id();
}
default: {
return Value::kNullBadType;
}
}
};
}
{
auto &attr = functions_["tags"];
attr.minArity_ = 1;
Expand Down
2 changes: 1 addition & 1 deletion src/graph/context/ast/CypherAstContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ struct EdgeInfo {
Expression* filter{nullptr};
};

enum class AliasType : int8_t { kNode, kEdge, kPath, kDefault };
enum class AliasType : int8_t { kNode, kEdge, kPath, kEdgeList, kDefault };

struct ScanInfo {
Expression* filter{nullptr};
Expand Down
2 changes: 1 addition & 1 deletion src/graph/planner/match/MatchPathPlanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ Status MatchPathPlanner::leftExpandFromNode(
auto* pool = qctx->objPool();
auto args = ArgumentList::make(pool);
args->addArgument(InputPropertyExpression::make(pool, nodeInfos[startIndex].alias));
nextTraverseStart = FunctionCallExpression::make(pool, "id", args);
nextTraverseStart = FunctionCallExpression::make(pool, "_joinkey", args);
}
bool reversely = true;
for (size_t i = startIndex; i > 0; --i) {
Expand Down
15 changes: 14 additions & 1 deletion src/graph/planner/match/MatchPlanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,20 @@ Status MatchPlanner::connectMatchPlan(SubPlan& queryPlan, MatchClauseContext* ma
}
std::unordered_set<std::string> intersectedAliases;
for (auto& alias : matchCtx->aliasesGenerated) {
if (matchCtx->aliasesAvailable.find(alias.first) != matchCtx->aliasesAvailable.end()) {
auto it = matchCtx->aliasesAvailable.find(alias.first);
if (it != matchCtx->aliasesAvailable.end()) {
// Joined type should be same
if (it->second != alias.second) {
return Status::SemanticError(fmt::format("{} binding to different type: {} vs {}",
alias.first,
AliasTypeName[static_cast<int>(alias.second)],
AliasTypeName[static_cast<int>(it->second)]));
}
// Joined On EdgeList is not supported
if (alias.second == AliasType::kEdgeList) {
return Status::SemanticError(alias.first +
" defined with type EdgeList, which cannot be joined on");
}
intersectedAliases.insert(alias.first);
}
}
Expand Down
1 change: 1 addition & 0 deletions src/graph/planner/match/MatchPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class MatchPlanner final : public Planner {
StatusOr<SubPlan> transform(AstContext* astCtx) override;

private:
static constexpr std::array AliasTypeName = {"Node", "Edge", "Path", "EdgeList", "Default"};
bool tailConnected_{false};
StatusOr<SubPlan> genPlan(CypherClauseContextBase* clauseCtx);
Status connectMatchPlan(SubPlan& queryPlan, MatchClauseContext* matchCtx);
Expand Down
4 changes: 2 additions & 2 deletions src/graph/planner/match/SegmentsConnector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ SubPlan SegmentsConnector::innerJoin(QueryContext* qctx,
for (auto& alias : intersectedAliases) {
auto* args = ArgumentList::make(pool);
args->addArgument(InputPropertyExpression::make(pool, alias));
auto* expr = FunctionCallExpression::make(pool, "id", args);
auto* expr = FunctionCallExpression::make(pool, "_joinkey", args);
hashKeys.emplace_back(expr);
probeKeys.emplace_back(expr->clone());
}
Expand All @@ -46,7 +46,7 @@ SubPlan SegmentsConnector::leftJoin(QueryContext* qctx,
for (auto& alias : intersectedAliases) {
auto* args = ArgumentList::make(pool);
args->addArgument(InputPropertyExpression::make(pool, alias));
auto* expr = FunctionCallExpression::make(pool, "id", args);
auto* expr = FunctionCallExpression::make(pool, "_joinkey", args);
hashKeys.emplace_back(expr);
probeKeys.emplace_back(expr->clone());
}
Expand Down
25 changes: 22 additions & 3 deletions src/graph/validator/MatchValidator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,16 +283,21 @@ Status MatchValidator::buildEdgeInfo(const MatchPath *path,
edgeInfos[i].types.emplace_back(typeName.value());
}
}
AliasType aliasType = AliasType::kEdge;
auto *stepRange = edge->range();
if (stepRange != nullptr) {
NG_RETURN_IF_ERROR(validateStepRange(stepRange));
edgeInfos[i].range = stepRange;
// Type of [e*1..2], [e*2] should be inference to EdgeList
if (stepRange->max() > stepRange->min() || stepRange->min() > 1) {
aliasType = AliasType::kEdgeList;
}
}
if (alias.empty()) {
anonymous = true;
alias = vctx_->anonVarGen()->getVar();
} else {
if (!aliases.emplace(alias, AliasType::kEdge).second) {
if (!aliases.emplace(alias, aliasType).second) {
return Status::SemanticError("`%s': Redefined alias", alias.c_str());
}
}
Expand Down Expand Up @@ -601,14 +606,28 @@ Status MatchValidator::validateUnwind(const UnwindClause *unwindClause,
}

auto labelExprs = ExpressionUtils::collectAll(unwindCtx.unwindExpr, {Expression::Kind::kLabel});
std::vector<AliasType> types;
for (auto *labelExpr : labelExprs) {
DCHECK_EQ(labelExpr->kind(), Expression::Kind::kLabel);
auto label = static_cast<const LabelExpression *>(labelExpr)->name();
if (!unwindCtx.aliasesAvailable.count(label)) {
auto it = unwindCtx.aliasesAvailable.find(label);
if (it == unwindCtx.aliasesAvailable.end()) {
return Status::SemanticError("Variable `%s` not defined", label.c_str());
}
types.push_back(it->second);
}
// UNWIND Type Inference:
// Example: UNWIND x,y AS z
// if x,y have same type
// set z to the same type
// else
// set z to default
AliasType aliasType = AliasType::kDefault;
if (types.size() > 0 &&
std::adjacent_find(types.begin(), types.end(), std::not_equal_to<>()) == types.end()) {
aliasType = types[0];
}
unwindCtx.aliasesGenerated.emplace(unwindCtx.alias, AliasType::kDefault);
unwindCtx.aliasesGenerated.emplace(unwindCtx.alias, aliasType);
if (unwindCtx.aliasesAvailable.count(unwindCtx.alias) > 0) {
return Status::SemanticError("Variable `%s` already declared", unwindCtx.alias.c_str());
}
Expand Down
32 changes: 32 additions & 0 deletions tests/tck/features/bugfix/MatchJoinOnEdge.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Copyright (c) 2020 vesoft inc. All rights reserved.
#
# This source code is licensed under Apache 2.0 License.
Feature: Test match used in pipe

Background:
Given a graph with space named "nba"

Scenario: Multiple Match joined on edge
When executing query:
"""
MATCH (v:player)-[e:like*1..2]->(u) WHERE v.player.name=="Tim Duncan" MATCH (vv:player)-[e:like]->() WHERE vv.player.name=="Tony Parker"return v, u
"""
Then a SemanticError should be raised at runtime: e binding to different type: Edge vs EdgeList
When executing query:
"""
MATCH (v:player)-[e:like*2]->(u) WHERE v.player.name=="Tim Duncan" MATCH (vv:player)-[e:like]->() WHERE vv.player.name=="Tony Parker"return v, u
"""
Then a SemanticError should be raised at runtime: e binding to different type: Edge vs EdgeList
When executing query:
"""
MATCH (v:player)-[e:like]->() WHERE v.player.name=="Tim Duncan" MATCH (u:player)-[e:like]->() WHERE u.player.name=="Tony Parker"return v
"""
Then the result should be, in any order:
| v |
When executing query:
"""
MATCH (v:player)-[e:like]->() WHERE v.player.name=="Tim Duncan" MATCH ()-[e:like]->(u:player) WHERE u.player.name=="Tony Parker"return v, u
"""
Then the result should be, in any order:
| v | u |
| ("Tim Duncan" :player{age: 42, name: "Tim Duncan"} :bachelor{name: "Tim Duncan", speciality: "psychology"}) | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) |

0 comments on commit 8842efb

Please sign in to comment.