From 219d000a30ea1d0f4f41a4f835aca7a90d54df7a Mon Sep 17 00:00:00 2001 From: "hs.zhang" <22708345+cangfengzhs@users.noreply.github.com> Date: Mon, 8 Nov 2021 17:18:43 +0800 Subject: [PATCH] Refactor storage index (#3196) * stash * commit IndexEdge/VertexScanNode * commit IndexSelectionNode * commit Projection/Dedup Node * commit IndexLimitNode * stash * stash * stash * stash * add index int test * add double/string1 test * add string2/string3 test * finish index_scan_node unittest * add index node test * pass lookupindex test * pass ttl test * pass all unittest * remove debug log * fix bug * fix bug * fix bug * clear file * clear some useless code * add comments for IndexNode * add comments to IndexScanNode * ad comment to Selection/Projection/DedupNode * fix some init bug * fix bug to support geo * converge qualified strategy define together * address comments * address some comments; Modify IndexNode::next return Type * fix bug * add some comments * Add blank lines between function definitions * fix compile error * modify new file license * Modify test to avoid the bug mentioned in issue3191 * modify license error --- src/common/utils/IndexKeyUtils.h | 55 +- src/common/utils/test/CMakeLists.txt | 3 + src/daemons/CMakeLists.txt | 2 + src/graph/optimizer/OptimizerUtils.cpp | 13 +- src/interface/storage.thrift | 4 + src/kvstore/test/CMakeLists.txt | 1 + src/storage/CMakeLists.txt | 9 + src/storage/CommonUtils.h | 3 + src/storage/ExprVisitorBase.cpp | 85 + src/storage/ExprVisitorBase.h | 71 + src/storage/exec/IndexDedupNode.cpp | 93 + src/storage/exec/IndexDedupNode.h | 79 + src/storage/exec/IndexEdgeNode.h | 107 - src/storage/exec/IndexEdgeScanNode.cpp | 145 ++ src/storage/exec/IndexEdgeScanNode.h | 49 + src/storage/exec/IndexFilterNode.h | 170 -- src/storage/exec/IndexLimitNode.cpp | 51 + src/storage/exec/IndexLimitNode.h | 26 + src/storage/exec/IndexNode.cpp | 30 + src/storage/exec/IndexNode.h | 192 ++ src/storage/exec/IndexOutputNode.h | 356 ---- src/storage/exec/IndexProjectionNode.cpp | 63 + src/storage/exec/IndexProjectionNode.h | 46 + src/storage/exec/IndexScanNode.cpp | 547 ++++++ src/storage/exec/IndexScanNode.h | 566 ++++-- src/storage/exec/IndexSelectionNode.cpp | 78 + src/storage/exec/IndexSelectionNode.h | 136 ++ src/storage/exec/IndexVertexNode.h | 99 - src/storage/exec/IndexVertexScanNode.cpp | 117 ++ src/storage/exec/IndexVertexScanNode.h | 57 + src/storage/exec/StorageIterator.h | 2 +- src/storage/index/LookupBaseProcessor-inl.h | 472 ----- src/storage/index/LookupBaseProcessor.h | 92 - src/storage/index/LookupProcessor.cpp | 307 ++- src/storage/index/LookupProcessor.h | 52 +- src/storage/test/CMakeLists.txt | 14 + src/storage/test/IndexScanTest.cpp | 3 +- src/storage/test/IndexTest.cpp | 1937 +++++++++++++++++++ src/storage/test/IndexTestUtil.h | 611 ++++++ src/storage/test/LookupIndexTest.cpp | 4 + src/tools/db-dump/CMakeLists.txt | 1 + src/tools/meta-dump/CMakeLists.txt | 1 + tests/tck/features/lookup/LookUp.feature | 1 - 43 files changed, 5143 insertions(+), 1607 deletions(-) create mode 100644 src/storage/ExprVisitorBase.cpp create mode 100644 src/storage/ExprVisitorBase.h create mode 100644 src/storage/exec/IndexDedupNode.cpp create mode 100644 src/storage/exec/IndexDedupNode.h delete mode 100644 src/storage/exec/IndexEdgeNode.h create mode 100644 src/storage/exec/IndexEdgeScanNode.cpp create mode 100644 src/storage/exec/IndexEdgeScanNode.h delete mode 100644 src/storage/exec/IndexFilterNode.h create mode 100644 src/storage/exec/IndexLimitNode.cpp create mode 100644 src/storage/exec/IndexLimitNode.h create mode 100644 src/storage/exec/IndexNode.cpp create mode 100644 src/storage/exec/IndexNode.h delete mode 100644 src/storage/exec/IndexOutputNode.h create mode 100644 src/storage/exec/IndexProjectionNode.cpp create mode 100644 src/storage/exec/IndexProjectionNode.h create mode 100644 src/storage/exec/IndexScanNode.cpp create mode 100644 src/storage/exec/IndexSelectionNode.cpp create mode 100644 src/storage/exec/IndexSelectionNode.h delete mode 100644 src/storage/exec/IndexVertexNode.h create mode 100644 src/storage/exec/IndexVertexScanNode.cpp create mode 100644 src/storage/exec/IndexVertexScanNode.h delete mode 100644 src/storage/index/LookupBaseProcessor-inl.h delete mode 100644 src/storage/index/LookupBaseProcessor.h create mode 100644 src/storage/test/IndexTest.cpp create mode 100644 src/storage/test/IndexTestUtil.h diff --git a/src/common/utils/IndexKeyUtils.h b/src/common/utils/IndexKeyUtils.h index 23288827422..5bb6c5c07a8 100644 --- a/src/common/utils/IndexKeyUtils.h +++ b/src/common/utils/IndexKeyUtils.h @@ -6,13 +6,14 @@ #ifndef COMMON_UTILS_INDEXKEYUTILS_H_ #define COMMON_UTILS_INDEXKEYUTILS_H_ +#include + #include "codec/RowReader.h" #include "common/base/Base.h" #include "common/base/StatusOr.h" #include "common/geo/GeoIndex.h" #include "common/utils/Types.h" #include "interface/gen-cpp2/meta_types.h" - namespace nebula { using PropertyType = nebula::cpp2::PropertyType; @@ -197,39 +198,39 @@ class IndexKeyUtils final { */ static std::string encodeDouble(double v) { - if (v < 0) { - /** - * TODO : now, the -(std::numeric_limits::min()) - * have a problem of precision overflow. current return value is -nan. - */ - auto* c1 = reinterpret_cast(&v); - auto i = *reinterpret_cast(c1); - i = -(std::numeric_limits::max() + i); - auto* c2 = reinterpret_cast(&i); - v = *reinterpret_cast(c2); + if (std::isnan(v)) { + return std::string(sizeof(double), '\xFF'); + } else if (v >= 0) { + auto val = folly::Endian::big(v); + auto* c = reinterpret_cast(&val); + c[0] |= 0x80; + std::string raw; + raw.reserve(sizeof(double)); + raw.append(c, sizeof(double)); + return raw; + } else { + int64_t* x = reinterpret_cast(&v); + *x = ~(*x); + auto val = folly::Endian::big(v); + auto* c = reinterpret_cast(&val); + std::string raw; + raw.reserve(sizeof(double)); + raw.append(c, sizeof(double)); + return raw; } - auto val = folly::Endian::big(v); - auto* c = reinterpret_cast(&val); - c[0] ^= 0x80; - std::string raw; - raw.reserve(sizeof(double)); - raw.append(c, sizeof(double)); - return raw; } static double decodeDouble(const folly::StringPiece& raw) { - char* v = const_cast(raw.data()); - v[0] ^= 0x80; - auto val = *reinterpret_cast(v); + int64_t val = *reinterpret_cast(raw.data()); val = folly::Endian::big(val); if (val < 0) { - auto* c1 = reinterpret_cast(&val); - auto i = *reinterpret_cast(c1); - i = -(std::numeric_limits::max() + i); - auto* c2 = reinterpret_cast(&i); - val = *reinterpret_cast(c2); + val &= 0x7fffffffffffffff; + } else { + val = ~val; } - return val; + double ret; + ::memcpy(&ret, &val, 8); + return ret; } static std::string encodeTime(const nebula::Time& t) { diff --git a/src/common/utils/test/CMakeLists.txt b/src/common/utils/test/CMakeLists.txt index 5e78743cf97..7cbc4c2b045 100644 --- a/src/common/utils/test/CMakeLists.txt +++ b/src/common/utils/test/CMakeLists.txt @@ -11,6 +11,7 @@ nebula_add_test( $ $ $ + $ $ LIBRARIES gtest @@ -30,6 +31,7 @@ nebula_add_test( $ $ $ + $ $ LIBRARIES gtest @@ -52,6 +54,7 @@ nebula_add_test( $ $ $ + $ $ LIBRARIES gtest diff --git a/src/daemons/CMakeLists.txt b/src/daemons/CMakeLists.txt index 67c57580366..ac98b67ff03 100644 --- a/src/daemons/CMakeLists.txt +++ b/src/daemons/CMakeLists.txt @@ -45,6 +45,7 @@ set(storage_meta_deps $ $ $ + $ $ ) @@ -128,6 +129,7 @@ nebula_add_executable( $ $ $ + $ $ ${common_deps} LIBRARIES diff --git a/src/graph/optimizer/OptimizerUtils.cpp b/src/graph/optimizer/OptimizerUtils.cpp index 3c0de166552..0537bd2fd1b 100644 --- a/src/graph/optimizer/OptimizerUtils.cpp +++ b/src/graph/optimizer/OptimizerUtils.cpp @@ -464,18 +464,7 @@ Value OptimizerUtils::normalizeValue(const meta::cpp2::ColumnDef& col, const Val if (!col.type.type_length_ref().has_value()) { return Value::kNullBadType; } - if (!v.isStr()) { - return v; - } - auto len = static_cast(*col.get_type().get_type_length()); - if (v.getStr().size() > len) { - return Value(v.getStr().substr(0, len)); - } else { - std::string s; - s.reserve(len); - s.append(v.getStr()).append(len - v.getStr().size(), '\0'); - return Value(std::move(s)); - } + return v; } case Value::Type::__EMPTY__: case Value::Type::NULLVALUE: diff --git a/src/interface/storage.thrift b/src/interface/storage.thrift index 9099ec610e2..be82a0ca2e3 100644 --- a/src/interface/storage.thrift +++ b/src/interface/storage.thrift @@ -506,6 +506,10 @@ struct IndexColumnHint { 2: ScanType scan_type, 3: common.Value begin_value, 4: common.Value end_value, + // When `columnhint` means ` >= begin_value`, `include_begin` is true + // and include_end is similar + 5: bool include_begin = true, + 6: bool include_end = false, } struct IndexQueryContext { diff --git a/src/kvstore/test/CMakeLists.txt b/src/kvstore/test/CMakeLists.txt index 75a807479cb..7c872b29eda 100644 --- a/src/kvstore/test/CMakeLists.txt +++ b/src/kvstore/test/CMakeLists.txt @@ -34,6 +34,7 @@ set(KVSTORE_TEST_LIBS $ $ $ + $ $ ) diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt index a1b081f3d43..c182f8b4f99 100644 --- a/src/storage/CMakeLists.txt +++ b/src/storage/CMakeLists.txt @@ -27,6 +27,7 @@ nebula_add_library( nebula_add_library( graph_storage_service_handler OBJECT GraphStorageServiceHandler.cpp + ExprVisitorBase.cpp context/StorageExpressionContext.cpp mutate/AddVerticesProcessor.cpp mutate/DeleteVerticesProcessor.cpp @@ -40,6 +41,14 @@ nebula_add_library( query/ScanVertexProcessor.cpp query/ScanEdgeProcessor.cpp index/LookupProcessor.cpp + exec/IndexNode.cpp + exec/IndexDedupNode.cpp + exec/IndexEdgeScanNode.cpp + exec/IndexLimitNode.cpp + exec/IndexProjectionNode.cpp + exec/IndexScanNode.cpp + exec/IndexSelectionNode.cpp + exec/IndexVertexScanNode.cpp ) nebula_add_library( diff --git a/src/storage/CommonUtils.h b/src/storage/CommonUtils.h index 009680c6476..66479abb766 100644 --- a/src/storage/CommonUtils.h +++ b/src/storage/CommonUtils.h @@ -200,6 +200,9 @@ struct RuntimeContext { ObjectPool* objPool() { return &planContext_->objPool_; } bool isPlanKilled() { + if (env() == nullptr) { + return false; + } return env()->metaClient_ && env()->metaClient_->checkIsPlanKilled(planContext_->sessionId_, planContext_->planId_); } diff --git a/src/storage/ExprVisitorBase.cpp b/src/storage/ExprVisitorBase.cpp new file mode 100644 index 00000000000..9f56e8a977b --- /dev/null +++ b/src/storage/ExprVisitorBase.cpp @@ -0,0 +1,85 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ +#include "storage/ExprVisitorBase.h" +namespace nebula { +namespace storage { +void ExprVisitorBase::visit(ConstantExpression *expr) { UNUSED(expr); } +void ExprVisitorBase::visit(UnaryExpression *expr) { expr->operand()->accept(this); } +void ExprVisitorBase::visit(TypeCastingExpression *expr) { expr->operand()->accept(this); } +void ExprVisitorBase::visit(LabelExpression *expr) { fatal(expr); } +void ExprVisitorBase::visit(LabelAttributeExpression *expr) { fatal(expr); } +// binary expression +void ExprVisitorBase::visit(ArithmeticExpression *expr) { + expr->left()->accept(this); + expr->right()->accept(this); +} +void ExprVisitorBase::visit(RelationalExpression *expr) { + expr->left()->accept(this); + expr->right()->accept(this); +} +void ExprVisitorBase::visit(SubscriptExpression *expr) { + expr->left()->accept(this); + expr->right()->accept(this); +} +void ExprVisitorBase::visit(AttributeExpression *expr) { fatal(expr); } +void ExprVisitorBase::visit(LogicalExpression *expr) { + for (auto operand : expr->operands()) { + operand->accept(this); + } +} +// function call +void ExprVisitorBase::visit(FunctionCallExpression *expr) { + for (auto arg : expr->args()->args()) { + arg->accept(this); + } +} +void ExprVisitorBase::visit(AggregateExpression *expr) { fatal(expr); } +void ExprVisitorBase::visit(UUIDExpression *expr) { UNUSED(expr); } +// variable expression +void ExprVisitorBase::visit(VariableExpression *expr) { fatal(expr); } +void ExprVisitorBase::visit(VersionedVariableExpression *expr) { fatal(expr); } +// container expression +void ExprVisitorBase::visit(ListExpression *expr) { + for (auto item : expr->items()) { + item->accept(this); + } +} +void ExprVisitorBase::visit(SetExpression *expr) { + for (auto item : expr->items()) { + item->accept(this); + } +} +void ExprVisitorBase::visit(MapExpression *expr) { UNUSED(expr); } +// property Expression +void ExprVisitorBase::visit(TagPropertyExpression *expr) { UNUSED(expr); } +void ExprVisitorBase::visit(EdgePropertyExpression *expr) { UNUSED(expr); } +void ExprVisitorBase::visit(InputPropertyExpression *expr) { UNUSED(expr); } +void ExprVisitorBase::visit(VariablePropertyExpression *expr) { UNUSED(expr); } +void ExprVisitorBase::visit(DestPropertyExpression *expr) { UNUSED(expr); } +void ExprVisitorBase::visit(SourcePropertyExpression *expr) { UNUSED(expr); } +void ExprVisitorBase::visit(EdgeSrcIdExpression *expr) { UNUSED(expr); } +void ExprVisitorBase::visit(EdgeTypeExpression *expr) { UNUSED(expr); } +void ExprVisitorBase::visit(EdgeRankExpression *expr) { UNUSED(expr); } +void ExprVisitorBase::visit(EdgeDstIdExpression *expr) { UNUSED(expr); } +// vertex/edge expression +void ExprVisitorBase::visit(VertexExpression *expr) { UNUSED(expr); } +void ExprVisitorBase::visit(EdgeExpression *expr) { UNUSED(expr); } +// case expression +void ExprVisitorBase::visit(CaseExpression *expr) { UNUSED(expr); } +// path build expression +void ExprVisitorBase::visit(PathBuildExpression *expr) { fatal(expr); } +// column expression +void ExprVisitorBase::visit(ColumnExpression *expr) { fatal(expr); } +// predicate expression +void ExprVisitorBase::visit(PredicateExpression *expr) { fatal(expr); } +// list comprehension expression +void ExprVisitorBase::visit(ListComprehensionExpression *expr) { fatal(expr); } +// reduce expression +void ExprVisitorBase::visit(ReduceExpression *expr) { fatal(expr); } +// subscript range expression +void ExprVisitorBase::visit(SubscriptRangeExpression *expr) { fatal(expr); } + +} // namespace storage +} // namespace nebula diff --git a/src/storage/ExprVisitorBase.h b/src/storage/ExprVisitorBase.h new file mode 100644 index 00000000000..015f02d5aca --- /dev/null +++ b/src/storage/ExprVisitorBase.h @@ -0,0 +1,71 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#pragma once +#include "common/expression/ExprVisitor.h" +namespace nebula { +namespace storage { + +class ExprVisitorBase : public ::nebula::ExprVisitor { + public: + void visit(ConstantExpression *expr) override; + void visit(UnaryExpression *expr) override; + void visit(TypeCastingExpression *expr) override; + void visit(LabelExpression *expr) override; + void visit(LabelAttributeExpression *expr) override; + // binary expression + void visit(ArithmeticExpression *expr) override; + void visit(RelationalExpression *expr) override; + void visit(SubscriptExpression *expr) override; + void visit(AttributeExpression *expr) override; + void visit(LogicalExpression *expr) override; + // function call + void visit(FunctionCallExpression *expr) override; + void visit(AggregateExpression *expr) override; + void visit(UUIDExpression *expr) override; + // variable expression + void visit(VariableExpression *expr) override; + void visit(VersionedVariableExpression *expr) override; + // container expression + void visit(ListExpression *expr) override; + void visit(SetExpression *expr) override; + void visit(MapExpression *expr) override; + // property Expression + void visit(TagPropertyExpression *expr) override; + void visit(EdgePropertyExpression *expr) override; + void visit(InputPropertyExpression *expr) override; + void visit(VariablePropertyExpression *expr) override; + void visit(DestPropertyExpression *expr) override; + void visit(SourcePropertyExpression *expr) override; + void visit(EdgeSrcIdExpression *expr) override; + void visit(EdgeTypeExpression *expr) override; + void visit(EdgeRankExpression *expr) override; + void visit(EdgeDstIdExpression *expr) override; + // vertex/edge expression + void visit(VertexExpression *expr) override; + void visit(EdgeExpression *expr) override; + // case expression + void visit(CaseExpression *expr) override; + // path build expression + void visit(PathBuildExpression *expr) override; + // column expression + void visit(ColumnExpression *expr) override; + // predicate expression + void visit(PredicateExpression *expr) override; + // list comprehension expression + void visit(ListComprehensionExpression *expr) override; + // reduce expression + void visit(ReduceExpression *expr) override; + // subscript range expression + void visit(SubscriptRangeExpression *expr) override; + + private: + using ::nebula::ExprVisitor::visit; + inline void fatal(Expression *expr) { + LOG(FATAL) << "Unexpect expression kind " << static_cast(expr->kind()) << " at storage"; + } +}; +} // namespace storage +} // namespace nebula diff --git a/src/storage/exec/IndexDedupNode.cpp b/src/storage/exec/IndexDedupNode.cpp new file mode 100644 index 00000000000..c27faf11eec --- /dev/null +++ b/src/storage/exec/IndexDedupNode.cpp @@ -0,0 +1,93 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ +#include "storage/exec/IndexDedupNode.h" +namespace nebula { +namespace storage { +IndexDedupNode::IndexDedupNode(const IndexDedupNode& node) + : IndexNode(node), dedupColumns_(node.dedupColumns_), dedupPos_(node.dedupPos_) {} + +IndexDedupNode::IndexDedupNode(RuntimeContext* context, const std::vector& dedupColumn) + : IndexNode(context, "IndexDedupNode"), dedupColumns_(dedupColumn) {} +::nebula::cpp2::ErrorCode IndexDedupNode::init(InitContext& ctx) { + for (auto& col : dedupColumns_) { + ctx.requiredColumns.insert(col); + } + // The contents of `ctx` should be the same when all child nodes are initialized, and `ctx` should + // be the same after initialization. + for (size_t i = 0; i < children_.size() - 1; i++) { + auto tmp = ctx; // + auto ret = children_[i]->init(tmp); + if (ret != ::nebula::cpp2::ErrorCode::SUCCEEDED) { + return ret; + } + } + auto ret = children_.back()->init(ctx); + if (ret != ::nebula::cpp2::ErrorCode::SUCCEEDED) { + return ret; + } + for (auto& col : dedupColumns_) { + dedupPos_.push_back(ctx.retColMap[col]); + } + return ::nebula::cpp2::ErrorCode::SUCCEEDED; +} + +::nebula::cpp2::ErrorCode IndexDedupNode::doExecute(PartitionID partId) { + currentChild_ = 0; + dedupSet_.clear(); + return IndexNode::doExecute(partId); +} + +IndexNode::Result IndexDedupNode::iterateCurrentChild(size_t currentChild) { + auto& child = *children_[currentChild]; + Result result; + do { + result = child.next(); + // error or meet end + if (!result.hasData()) { + return result; + } + auto dedupResult = dedup(result.row()); + if (!dedupResult) { + continue; + } + return result; + } while (true); +} + +IndexNode::Result IndexDedupNode::doNext() { + Result result; + while (currentChild_ < children_.size()) { + result = iterateCurrentChild(currentChild_); + // error + if (!result.success()) { + return result; + } + // finish iterate one child + if (!result.hasData()) { + currentChild_++; + continue; + } + return result; + } + return Result(); +} + +IndexDedupNode::RowWrapper::RowWrapper(const Row& row, const std::vector& posList) { + values_.reserve(posList.size()); + for (auto p : posList) { + values_.emplace_back(row[p]); + } +} + +std::unique_ptr IndexDedupNode::copy() { + return std::make_unique(*this); +} + +std::string IndexDedupNode::identify() { + return fmt::format("{}(dedup=[{}])", name_, folly::join(',', dedupColumns_)); +} + +} // namespace storage +} // namespace nebula diff --git a/src/storage/exec/IndexDedupNode.h b/src/storage/exec/IndexDedupNode.h new file mode 100644 index 00000000000..8025c08124b --- /dev/null +++ b/src/storage/exec/IndexDedupNode.h @@ -0,0 +1,79 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ +#pragma once +#include "common/datatypes/DataSet.h" +#include "folly/container/F14Set.h" +#include "storage/exec/IndexNode.h" +namespace nebula { +namespace storage { +/** + * + * IndexDedupNode + * + * reference: IndexNode + * + * `IndexDedupNode` is the class which is used to eliminate duplicate rows of data returned by + * multiple child nodes. + * ┌───────────┐ + * │ IndexNode │ + * └─────┬─────┘ + * │ + * ┌────────┴───────┐ + * │ IndexDedupNode │ + * └────────────────┘ + * Member: + * `dedupColumns_`: columns' name which are used to dedup + * `dedupPos_` : dedup columns' position in child return row + * `dedupSet_` : the set which record the rows have been return to parent + * `currentChild_`: current iterate child + */ + +class IndexDedupNode : public IndexNode { + public: + IndexDedupNode(const IndexDedupNode& node); + IndexDedupNode(RuntimeContext* context, const std::vector& dedupColumn); + ::nebula::cpp2::ErrorCode init(InitContext& ctx) override; + std::unique_ptr copy() override; + std::string identify() override; + + private: + inline bool dedup(const Row& row); + ::nebula::cpp2::ErrorCode doExecute(PartitionID partId) override; + Result doNext() override; + Result iterateCurrentChild(size_t currentChild); + // Define RowWrapper for dedup + class RowWrapper { + public: + RowWrapper(const Row& row, const std::vector& posList); + inline const List& values() const { return values_; } + + private: + List values_; + }; + // End of RowWrapper + struct Hasher { + size_t operator()(const RowWrapper& wrapper) const { + return std::hash()(wrapper.values()); + } + }; + struct Equal { + bool operator()(const RowWrapper& a, const RowWrapper& b) const { + return a.values() == b.values(); + } + }; + std::vector dedupColumns_; + std::vector dedupPos_; + folly::F14FastSet dedupSet_; + size_t currentChild_ = 0; +}; + +/* Definition of inline function */ +inline bool IndexDedupNode::dedup(const Row& row) { + auto result = dedupSet_.emplace(row, dedupPos_); + return result.second; +} + +} // namespace storage +} // namespace nebula diff --git a/src/storage/exec/IndexEdgeNode.h b/src/storage/exec/IndexEdgeNode.h deleted file mode 100644 index 74dea76db87..00000000000 --- a/src/storage/exec/IndexEdgeNode.h +++ /dev/null @@ -1,107 +0,0 @@ -/* Copyright (c) 2020 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ -#ifndef STORAGE_EXEC_INDEXEDGENODE_H_ -#define STORAGE_EXEC_INDEXEDGENODE_H_ - -#include "common/base/Base.h" -#include "storage/exec/IndexScanNode.h" -#include "storage/exec/RelNode.h" - -namespace nebula { -namespace storage { - -template -class IndexEdgeNode final : public RelNode { - public: - using RelNode::doExecute; - - IndexEdgeNode(RuntimeContext* context, - IndexScanNode* indexScanNode, - const std::vector>& schemas, - const std::string& schemaName, - int64_t limit = -1) - : context_(context), - indexScanNode_(indexScanNode), - schemas_(schemas), - schemaName_(schemaName), - limit_(limit) { - RelNode::name_ = "IndexEdgeNode"; - } - - nebula::cpp2::ErrorCode doExecute(PartitionID partId) override { - auto ret = RelNode::doExecute(partId); - if (ret != nebula::cpp2::ErrorCode::SUCCEEDED) { - return ret; - } - - auto ttlProp = CommonUtils::ttlProps(context_->edgeSchema_); - - data_.clear(); - std::vector edges; - auto* iter = static_cast(indexScanNode_->iterator()); - while (iter && iter->valid()) { - if (context_->isPlanKilled()) { - return nebula::cpp2::ErrorCode::E_PLAN_IS_KILLED; - } - if (!iter->val().empty() && ttlProp.first) { - auto v = IndexKeyUtils::parseIndexTTL(iter->val()); - if (CommonUtils::checkDataExpiredForTTL( - context_->edgeSchema_, std::move(v), ttlProp.second.second, ttlProp.second.first)) { - iter->next(); - continue; - } - } - storage::cpp2::EdgeKey edge; - edge.set_src(iter->srcId()); - edge.set_edge_type(context_->edgeType_); - edge.set_ranking(iter->ranking()); - edge.set_dst(iter->dstId()); - edges.emplace_back(std::move(edge)); - iter->next(); - } - int64_t count = 0; - for (const auto& edge : edges) { - auto key = NebulaKeyUtils::edgeKey(context_->vIdLen(), - partId, - (*edge.src_ref()).getStr(), - context_->edgeType_, - edge.get_ranking(), - (*edge.dst_ref()).getStr()); - std::string val; - ret = context_->env()->kvstore_->get(context_->spaceId(), partId, key, &val); - if (ret == nebula::cpp2::ErrorCode::SUCCEEDED) { - data_.emplace_back(std::move(key), std::move(val)); - } else if (ret == nebula::cpp2::ErrorCode::E_KEY_NOT_FOUND) { - continue; - } else { - return ret; - } - if (limit_ > 0 && ++count >= limit_) { - break; - } - } - return nebula::cpp2::ErrorCode::SUCCEEDED; - } - - std::vector moveData() { return std::move(data_); } - - const std::vector>& getSchemas() { - return schemas_; - } - - const std::string& getSchemaName() { return schemaName_; } - - private: - RuntimeContext* context_; - IndexScanNode* indexScanNode_; - const std::vector>& schemas_; - const std::string& schemaName_; - int64_t limit_; - std::vector data_; -}; - -} // namespace storage -} // namespace nebula -#endif // STORAGE_EXEC_INDEXEDGENODE_H_ diff --git a/src/storage/exec/IndexEdgeScanNode.cpp b/src/storage/exec/IndexEdgeScanNode.cpp new file mode 100644 index 00000000000..e9242e6e556 --- /dev/null +++ b/src/storage/exec/IndexEdgeScanNode.cpp @@ -0,0 +1,145 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include "storage/exec/IndexEdgeScanNode.h" +namespace nebula { +namespace storage { +IndexEdgeScanNode::IndexEdgeScanNode(const IndexEdgeScanNode& node) + : IndexScanNode(node), edge_(node.edge_) {} +IndexEdgeScanNode::IndexEdgeScanNode(RuntimeContext* context, + IndexID indexId, + const std::vector& columnHint, + ::nebula::kvstore::KVStore* kvstore) + : IndexScanNode(context, "IndexEdgeScanNode", indexId, columnHint, kvstore) { + getIndex = std::function([this](std::shared_ptr& index) { + auto env = this->context_->env(); + auto indexMgr = env->indexMan_; + auto indexVal = indexMgr->getEdgeIndex(this->spaceId_, this->indexId_); + if (!indexVal.ok()) { + return ::nebula::cpp2::ErrorCode::E_INDEX_NOT_FOUND; + } + index = indexVal.value(); + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + }); + getEdge = std::function([this](EdgeSchemas& edge) { + auto env = this->context_->env(); + auto schemaMgr = env->schemaMan_; + auto allSchema = schemaMgr->getAllVerEdgeSchema(this->spaceId_); + auto edgeType = this->index_->get_schema_id().get_edge_type(); + if (!allSchema.ok() || !allSchema.value().count(edgeType)) { + return ::nebula::cpp2::ErrorCode::E_EDGE_NOT_FOUND; + } + edge = allSchema.value().at(edgeType); + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + }); +} + +::nebula::cpp2::ErrorCode IndexEdgeScanNode::init(InitContext& ctx) { + if (auto ret = getIndex(this->index_); UNLIKELY(ret != ::nebula::cpp2::ErrorCode::SUCCEEDED)) { + return ret; + } + if (auto ret = getEdge(edge_); UNLIKELY(ret != ::nebula::cpp2::ErrorCode::SUCCEEDED)) { + return ret; + } + return IndexScanNode::init(ctx); +} + +Row IndexEdgeScanNode::decodeFromIndex(folly::StringPiece key) { + std::vector values(requiredColumns_.size()); + if (colPosMap_.count(kSrc)) { + auto vId = IndexKeyUtils::getIndexSrcId(context_->vIdLen(), key); + if (context_->isIntId()) { + values[colPosMap_[kSrc]] = Value(*reinterpret_cast(vId.data())); + } else { + values[colPosMap_[kSrc]] = Value(vId.subpiece(0, vId.find_first_of('\0')).toString()); + } + } + if (colPosMap_.count(kDst)) { + auto vId = IndexKeyUtils::getIndexDstId(context_->vIdLen(), key); + if (context_->isIntId()) { + values[colPosMap_[kDst]] = Value(*reinterpret_cast(vId.data())); + } else { + values[colPosMap_[kDst]] = Value(vId.subpiece(0, vId.find_first_of('\0')).toString()); + } + } + if (colPosMap_.count(kRank)) { + auto rank = IndexKeyUtils::getIndexRank(context_->vIdLen(), key); + values[colPosMap_[kRank]] = Value(rank); + } + if (colPosMap_.count(kType)) { + values[colPosMap_[kType]] = Value(context_->edgeType_); + } + // Truncate the src/rank/dst at the end to facilitate obtaining the two bytes representing the + // nullableBit directly at the end when needed + key.subtract(context_->vIdLen() * 2 + sizeof(EdgeRanking)); + decodePropFromIndex(key, colPosMap_, values); + return Row(std::move(values)); +} + +nebula::cpp2::ErrorCode IndexEdgeScanNode::getBaseData(folly::StringPiece key, + std::pair& kv) { + auto vIdLen = context_->vIdLen(); + kv.first = NebulaKeyUtils::edgeKey(vIdLen, + partId_, + IndexKeyUtils::getIndexSrcId(vIdLen, key).str(), + context_->edgeType_, + IndexKeyUtils::getIndexRank(vIdLen, key), + IndexKeyUtils::getIndexDstId(vIdLen, key).str()); + return kvstore_->get(context_->spaceId(), partId_, kv.first, &kv.second); +} + +Map IndexEdgeScanNode::decodeFromBase(const std::string& key, + const std::string& value) { + Map values; + auto reader = RowReaderWrapper::getRowReader(edge_, value); + for (auto& col : requiredAndHintColumns_) { + switch (QueryUtils::toReturnColType(col)) { + case QueryUtils::ReturnColType::kType: { + values[col] = Value(context_->edgeType_); + } break; + case QueryUtils::ReturnColType::kSrc: { + auto vId = NebulaKeyUtils::getSrcId(context_->vIdLen(), key); + if (context_->isIntId()) { + values[col] = Value(*reinterpret_cast(vId.data())); + } else { + values[col] = Value(vId.subpiece(0, vId.find_first_of('\0')).toString()); + } + } break; + case QueryUtils::ReturnColType::kDst: { + auto vId = NebulaKeyUtils::getDstId(context_->vIdLen(), key); + if (context_->isIntId()) { + values[col] = Value(*reinterpret_cast(vId.data())); + } else { + values[col] = Value(vId.subpiece(0, vId.find_first_of('\0')).toString()); + } + } break; + case QueryUtils::ReturnColType::kRank: { + values[col] = Value(NebulaKeyUtils::getRank(context_->vIdLen(), key)); + } break; + case QueryUtils::ReturnColType::kOther: { + auto retVal = QueryUtils::readValue(reader.get(), col, edge_.back()->field(col)); + if (!retVal.ok()) { + LOG(FATAL) << "Bad value for field" << col; + } + values[col] = std::move(retVal.value()); + } break; + default: + LOG(FATAL) << "Unexpect column name:" << col; + } + } + return values; +} + +const std::vector>& +IndexEdgeScanNode::getSchema() { + return edge_; +} + +std::unique_ptr IndexEdgeScanNode::copy() { + return std::make_unique(*this); +} + +} // namespace storage +} // namespace nebula diff --git a/src/storage/exec/IndexEdgeScanNode.h b/src/storage/exec/IndexEdgeScanNode.h new file mode 100644 index 00000000000..d21ef95faa5 --- /dev/null +++ b/src/storage/exec/IndexEdgeScanNode.h @@ -0,0 +1,49 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ +#pragma once +#include "common/base/Base.h" +#include "common/utils/NebulaKeyUtils.h" +#include "storage/exec/IndexScanNode.h" +#include "storage/exec/QueryUtils.h" +#include "storage/exec/StorageIterator.h" +namespace nebula { +namespace storage { + +/** + * IndexEdgeScanNode + * + * reference: IndexScanNode + */ + +class IndexEdgeScanNode : public IndexScanNode { + public: + IndexEdgeScanNode(const IndexEdgeScanNode& node); + IndexEdgeScanNode(RuntimeContext* context, + IndexID indexId, + const std::vector& columnHint, + ::nebula::kvstore::KVStore* kvstore); + ::nebula::cpp2::ErrorCode init(InitContext& ctx) override; + std::unique_ptr copy() override; + + private: + Row decodeFromIndex(folly::StringPiece key) override; + nebula::cpp2::ErrorCode getBaseData(folly::StringPiece key, + std::pair& kv) override; + Map decodeFromBase(const std::string& key, const std::string& value) override; + + using EdgeSchemas = std::vector>; + using IndexItem = ::nebula::meta::cpp2::IndexItem; + const EdgeSchemas& getSchema() override; + EdgeSchemas edge_; + + // Convenient for testing + std::function<::nebula::cpp2::ErrorCode(std::shared_ptr&)> getIndex; + std::function<::nebula::cpp2::ErrorCode(EdgeSchemas&)> getEdge; + + FRIEND_TEST(IndexScanTest, Edge); + friend class IndexScanTestHelper; +}; +} // namespace storage +} // namespace nebula diff --git a/src/storage/exec/IndexFilterNode.h b/src/storage/exec/IndexFilterNode.h deleted file mode 100644 index 5cb73ced098..00000000000 --- a/src/storage/exec/IndexFilterNode.h +++ /dev/null @@ -1,170 +0,0 @@ -/* Copyright (c) 2020 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ -#ifndef STORAGE_EXEC_INDEXFILTERNODE_H_ -#define STORAGE_EXEC_INDEXFILTERNODE_H_ - -#include "common/base/Base.h" -#include "common/context/ExpressionContext.h" -#include "common/expression/Expression.h" -#include "storage/exec/IndexEdgeNode.h" -#include "storage/exec/IndexScanNode.h" -#include "storage/exec/IndexVertexNode.h" -#include "storage/exec/RelNode.h" - -namespace nebula { -namespace storage { - -template -class IndexFilterNode final : public RelNode { - public: - using RelNode::doExecute; - - // evalExprByIndex_ is true, all fileds in filter is in index. No need to read - // data anymore. - IndexFilterNode(RuntimeContext* context, - IndexScanNode* indexScanNode, - StorageExpressionContext* exprCtx, - Expression* exp, - bool isEdge, - int64_t limit = -1) - : context_(context), - indexScanNode_(indexScanNode), - exprCtx_(exprCtx), - filterExp_(exp), - isEdge_(isEdge), - limit_(limit) { - evalExprByIndex_ = true; - RelNode::name_ = "IndexFilterNode"; - } - - // evalExprByIndex_ is false, some fileds in filter is out of index, which - // need to read data. - IndexFilterNode(RuntimeContext* context, - IndexEdgeNode* indexEdgeNode, - StorageExpressionContext* exprCtx, - Expression* exp, - int64_t limit = -1) - : context_(context), - indexEdgeNode_(indexEdgeNode), - exprCtx_(exprCtx), - filterExp_(exp), - limit_(limit) { - evalExprByIndex_ = false; - isEdge_ = true; - } - - // evalExprByIndex_ is false, some fileds in filter is out of index, which - // need to read data. - IndexFilterNode(RuntimeContext* context, - IndexVertexNode* indexVertexNode, - StorageExpressionContext* exprCtx, - Expression* exp, - int64_t limit = -1) - : context_(context), - indexVertexNode_(indexVertexNode), - exprCtx_(exprCtx), - filterExp_(exp), - limit_(limit) { - evalExprByIndex_ = false; - isEdge_ = false; - } - - nebula::cpp2::ErrorCode doExecute(PartitionID partId) override { - data_.clear(); - auto ret = RelNode::doExecute(partId); - if (ret != nebula::cpp2::ErrorCode::SUCCEEDED) { - return ret; - } - std::vector data; - if (evalExprByIndex_) { - data = indexScanNode_->moveData(); - } else if (isEdge_) { - data = indexEdgeNode_->moveData(); - } else { - data = indexVertexNode_->moveData(); - } - int64_t count = 0; - for (const auto& k : data) { - if (context_->isPlanKilled()) { - return nebula::cpp2::ErrorCode::E_PLAN_IS_KILLED; - } - if (evalExprByIndex_) { - if (check(k.first)) { - data_.emplace_back(k.first, k.second); - count++; - } - } else { - const auto& schemas = - isEdge_ ? indexEdgeNode_->getSchemas() : indexVertexNode_->getSchemas(); - auto reader = RowReaderWrapper::getRowReader(schemas, k.second); - if (!reader) { - continue; - } - if (check(reader.get(), k.first)) { - data_.emplace_back(k.first, k.second); - count++; - } - } - if (limit_ > 0 && count >= limit_) { - break; - } - } - return nebula::cpp2::ErrorCode::SUCCEEDED; - } - - std::vector moveData() { return std::move(data_); } - - const std::vector>& getSchemas() { - return isEdge_ ? indexEdgeNode_->getSchemas() : indexVertexNode_->getSchemas(); - } - - bool hasNullableCol() const { return exprCtx_->hasNullableCol(); } - - const std::vector& indexCols() const { return exprCtx_->indexCols(); } - - private: - bool check(const std::string& raw) { - if (filterExp_ != nullptr) { - exprCtx_->reset(raw); - auto result = filterExp_->eval(*exprCtx_); - if (result.type() == Value::Type::BOOL) { - return result.getBool(); - } else { - return false; - } - } - return false; - } - - bool check(RowReader* reader, const std::string& raw) { - if (filterExp_ != nullptr) { - exprCtx_->reset(reader, raw); - auto result = filterExp_->eval(*exprCtx_); - if (result.type() == Value::Type::BOOL) { - return result.getBool(); - } else { - return false; - } - } - return false; - } - - private: - RuntimeContext* context_; - IndexScanNode* indexScanNode_{nullptr}; - IndexEdgeNode* indexEdgeNode_{nullptr}; - IndexVertexNode* indexVertexNode_{nullptr}; - StorageExpressionContext* exprCtx_; - Expression* filterExp_; - bool isEdge_; - bool evalExprByIndex_; - int64_t limit_; - std::vector data_{}; -}; - -} // namespace storage -} // namespace nebula - -#endif // STORAGE_EXEC_INDEXFILTERNODE_H_ diff --git a/src/storage/exec/IndexLimitNode.cpp b/src/storage/exec/IndexLimitNode.cpp new file mode 100644 index 00000000000..df5afed4130 --- /dev/null +++ b/src/storage/exec/IndexLimitNode.cpp @@ -0,0 +1,51 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ +#include "storage/exec/IndexLimitNode.h" +namespace nebula { +namespace storage { +IndexLimitNode::IndexLimitNode(const IndexLimitNode& node) + : IndexNode(node), offset_(node.offset_), limit_(node.limit_) {} + +IndexLimitNode::IndexLimitNode(RuntimeContext* context, uint64_t offset, uint64_t limit) + : IndexNode(context, "IndexLimitNode"), offset_(offset), limit_(limit) {} +IndexLimitNode::IndexLimitNode(RuntimeContext* context, uint64_t limit) + : IndexLimitNode(context, 0, limit) {} +nebula::cpp2::ErrorCode IndexLimitNode::doExecute(PartitionID partId) { + currentOffset_ = 0; + return children_[0]->execute(partId); +} + +IndexNode::Result IndexLimitNode::doNext() { + DCHECK_EQ(children_.size(), 1); + auto& child = *children_[0]; + while (UNLIKELY(currentOffset_ < offset_)) { + auto result = child.next(); + if (!result.hasData()) { + return result; + } + currentOffset_++; + } + if (currentOffset_ < offset_ + limit_) { + currentOffset_++; + return child.next(); + } else { + return Result(); + } +} + +std::unique_ptr IndexLimitNode::copy() { + return std::make_unique(*this); +} + +std::string IndexLimitNode::identify() { + if (offset_ > 0) { + return fmt::format("{}(offset={}, limit={})", name_, offset_, limit_); + } else { + return fmt::format("{}(limit={})", name_, limit_); + } +} + +} // namespace storage +} // namespace nebula diff --git a/src/storage/exec/IndexLimitNode.h b/src/storage/exec/IndexLimitNode.h new file mode 100644 index 00000000000..8811ec77a87 --- /dev/null +++ b/src/storage/exec/IndexLimitNode.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ +#pragma once +#include "folly/Likely.h" +#include "storage/exec/IndexNode.h" +namespace nebula { +namespace storage { +class IndexLimitNode : public IndexNode { + public: + IndexLimitNode(const IndexLimitNode& node); + IndexLimitNode(RuntimeContext* context, uint64_t offset, uint64_t limit); + IndexLimitNode(RuntimeContext* context, uint64_t limit); + std::unique_ptr copy() override; + std::string identify() override; + + private: + nebula::cpp2::ErrorCode doExecute(PartitionID partId) override; + Result doNext() override; + const uint64_t offset_, limit_; + uint64_t currentOffset_ = 0; +}; +} // namespace storage + +} // namespace nebula diff --git a/src/storage/exec/IndexNode.cpp b/src/storage/exec/IndexNode.cpp new file mode 100644 index 00000000000..0ce1ab3f401 --- /dev/null +++ b/src/storage/exec/IndexNode.cpp @@ -0,0 +1,30 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include "storage/exec/IndexNode.h" + +#include "folly/Likely.h" +namespace nebula { +namespace storage { +IndexNode::IndexNode(RuntimeContext* context, const std::string& name) + : context_(context), name_(name) { + spaceId_ = context_->spaceId(); +} + +IndexNode::IndexNode(const IndexNode& node) + : context_(node.context_), spaceId_(node.spaceId_), name_(node.name_) {} + +nebula::cpp2::ErrorCode IndexNode::doExecute(PartitionID partId) { + for (auto& child : children_) { + auto ret = child->execute(partId); + if (UNLIKELY(ret != ::nebula::cpp2::ErrorCode::SUCCEEDED)) { + return ret; + } + } + return ::nebula::cpp2::ErrorCode::SUCCEEDED; +} + +} // namespace storage +} // namespace nebula diff --git a/src/storage/exec/IndexNode.h b/src/storage/exec/IndexNode.h new file mode 100644 index 00000000000..0db70ec8030 --- /dev/null +++ b/src/storage/exec/IndexNode.h @@ -0,0 +1,192 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ +#pragma once +#include "common/base/ErrorOr.h" +#include "common/datatypes/DataSet.h" +#include "common/time/Duration.h" +#include "folly/AtomicLinkedList.h" +#include "folly/container/F14Map.h" +#include "interface/gen-cpp2/common_types.h" +#include "storage/CommonUtils.h" +namespace nebula { +namespace storage { +/** + * IndexNode + * + * Indexnode is the base class for each node that makes up the plan tree. + * + * Member: + * `children_` : all children of the node. + * `context_` : runtime context of plan. + * `name_` : node name which should be set in derive node. + * `duration_` : used to record execution time(exclude children node's time). + * `profileDetail_` : whether record execution time or not. + * + * Function: + * The functions is divided into three parts. + * + * First part is used to build node. This part include constructor/destructor, and + * `IndexNode(const IndexNode& node)` is used to cooperate with `copy` to realize + * the deep copy of node.`copy` function needs to be implemented by the derived + * class itself. + * In fact, the build process is divided into two stages. First, the user needs to + * make various derived classes and nd organize them into a plan tree(by + * `children_`).After that, the root node of plan tree needs to call the init + * function and recursively call the init function of all child nodes, `Initcontext` + * will pass parameters between nodes to determine the data format or other + * information to be returned between nodes during execution.Note that `init` needs + * to be executed before `copy`. + * + * Second part is used to access data. + * `execute` is used to initialize some variables at the beginning of each part(e.g + * dedup set, kv iterator, etc.) + * `next` is used to iterate data. Row format has been determined during `init`. + * Batch processing and loop unrolling can be used to optimize performance if + * necessary, but there are no serious performance problems at present. + * `end` and `finish` are used to release resources at the end of execute or plan + * (e.g, external sort free disk,release schema lease if support Online DDL, commit + * write, etc.). + * However, there are no relevant requirements, so it will not be implemented for + * the time being. + * `xxx` is the interface function.It will recursive call child node's `xxx`. `doXxx` + * is the actual execution logic, and the derived class needs to override this + * function + * + * The third part is used to assist in obtaining some detailed information + */ + +using ErrorCode = ::nebula::cpp2::ErrorCode; +template +using Map = folly::F14FastMap; +template +using Set = folly::F14FastSet; +struct InitContext { + // Column required by parent node + Set requiredColumns; + // The format of the row returned to the parent node + std::vector returnColumns; + // The index of name in `returncolumns` + Map retColMap; +}; + +class IndexNode { + public: + /* Iterate result*/ + class Result { + public: + Result() : code_(ErrorCode::SUCCEEDED), empty_(true) {} + Result(const Result& result) : code_(result.code_), row_(result.row_), empty_(result.empty_) {} + Result(Result&& result) + : code_(result.code_), row_(std::move(result.row_)), empty_(result.empty_) {} + explicit Result(ErrorCode code) : code_(code), empty_(true) {} + explicit Result(Row&& row) : row_(row), empty_(false) {} + Result& operator=(Result&& result) { + this->code_ = result.code_; + this->row_ = std::move(result.row_); + this->empty_ = result.empty_; + return *this; + } + inline bool success() { return code_ == ErrorCode::SUCCEEDED; } + inline bool hasData() { return success() && empty_ == false; } + inline Row row() && { return std::move(row_); } + inline Row& row() & { return row_; } + ErrorCode code() { return code_; } + + private: + ErrorCode code_{ErrorCode::SUCCEEDED}; + Row row_; + bool empty_{true}; + }; + /* build */ + IndexNode(const IndexNode& node); + explicit IndexNode(RuntimeContext* context, const std::string& name); + virtual ~IndexNode() = default; + virtual std::unique_ptr copy() = 0; + void addChild(std::unique_ptr child) { children_.emplace_back(std::move(child)); } + const std::vector>& children() { return children_; } + virtual ::nebula::cpp2::ErrorCode init(InitContext& initCtx) { + DCHECK_EQ(children_.size(), 1); + return children_[0]->init(initCtx); + } + /* execution */ + inline nebula::cpp2::ErrorCode execute(PartitionID partId); + inline Result next(); + // inline nebula::cpp2::ErrorCode finish(); + + /* assist */ + const std::string& name() { return name_; } + void enableProfileDetail(); + virtual std::string identify() = 0; + inline const time::Duration& duration(); + + protected: + virtual Result doNext() = 0; + void beforeNext(); + void afterNext(); + virtual nebula::cpp2::ErrorCode doExecute(PartitionID partId); + void beforeExecute(); + void afterExecute(); + + RuntimeContext* context_; + GraphSpaceID spaceId_; + std::vector> children_; + std::string name_; + time::Duration duration_; + bool profileDetail_{false}; +}; + +/* Defination of inline function */ +inline IndexNode::Result IndexNode::next() { + beforeNext(); + if (context_->isPlanKilled()) { + return Result(::nebula::cpp2::ErrorCode::E_PLAN_IS_KILLED); + } + Result ret = doNext(); + afterNext(); + return ret; +} + +inline void IndexNode::beforeNext() { + if (UNLIKELY(profileDetail_)) { + duration_.resume(); + } +} + +inline void IndexNode::afterNext() { + if (UNLIKELY(profileDetail_)) { + duration_.pause(); + } +} + +inline nebula::cpp2::ErrorCode IndexNode::execute(PartitionID partId) { + beforeExecute(); + auto ret = doExecute(partId); + afterExecute(); + return ret; +} + +inline void IndexNode::beforeExecute() { + if (UNLIKELY(profileDetail_)) { + duration_.resume(); + } +} + +inline void IndexNode::afterExecute() { + if (UNLIKELY(profileDetail_)) { + duration_.pause(); + } +} + +inline void IndexNode::enableProfileDetail() { + profileDetail_ = true; + for (auto& child : children_) { + child->enableProfileDetail(); + } +} + +inline const time::Duration& IndexNode::duration() { return duration_; } + +} // namespace storage +} // namespace nebula diff --git a/src/storage/exec/IndexOutputNode.h b/src/storage/exec/IndexOutputNode.h deleted file mode 100644 index d462c9dff42..00000000000 --- a/src/storage/exec/IndexOutputNode.h +++ /dev/null @@ -1,356 +0,0 @@ -/* Copyright (c) 2020 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ -#ifndef STORAGE_EXEC_INDEXOUTPUTNODE_H_ -#define STORAGE_EXEC_INDEXOUTPUTNODE_H_ - -#include "common/base/Base.h" -#include "storage/exec/IndexEdgeNode.h" -#include "storage/exec/IndexFilterNode.h" -#include "storage/exec/IndexScanNode.h" -#include "storage/exec/IndexVertexNode.h" -#include "storage/exec/RelNode.h" - -namespace nebula { -namespace storage { - -template -class IndexOutputNode final : public RelNode { - public: - using RelNode::doExecute; - - enum class IndexResultType : int8_t { - kEdgeFromIndexScan, - kEdgeFromIndexFilter, - kEdgeFromDataScan, - kEdgeFromDataFilter, - kVertexFromIndexScan, - kVertexFromIndexFilter, - kVertexFromDataScan, - kVertexFromDataFilter, - }; - - IndexOutputNode(nebula::DataSet* result, - RuntimeContext* context, - IndexScanNode* indexScanNode, - bool hasNullableCol, - const std::vector& fields) - : result_(result), - context_(context), - indexScanNode_(indexScanNode), - hasNullableCol_(hasNullableCol), - fields_(fields) { - type_ = context_->isEdge() ? IndexResultType::kEdgeFromIndexScan - : IndexResultType::kVertexFromIndexScan; - RelNode::name_ = "IndexOpuputNode"; - } - - IndexOutputNode(nebula::DataSet* result, RuntimeContext* context, IndexEdgeNode* indexEdgeNode) - : result_(result), context_(context), indexEdgeNode_(indexEdgeNode) { - type_ = IndexResultType::kEdgeFromDataScan; - RelNode::name_ = "IndexOpuputNode"; - } - - IndexOutputNode(nebula::DataSet* result, - RuntimeContext* context, - IndexVertexNode* indexVertexNode) - : result_(result), context_(context), indexVertexNode_(indexVertexNode) { - type_ = IndexResultType::kVertexFromDataScan; - RelNode::name_ = "IndexOpuputNode"; - } - - IndexOutputNode(nebula::DataSet* result, - RuntimeContext* context, - IndexFilterNode* indexFilterNode, - bool indexFilter = false) - : result_(result), context_(context), indexFilterNode_(indexFilterNode) { - hasNullableCol_ = indexFilterNode->hasNullableCol(); - fields_ = indexFilterNode_->indexCols(); - if (indexFilter) { - type_ = context_->isEdge() ? IndexResultType::kEdgeFromIndexFilter - : IndexResultType::kVertexFromIndexFilter; - } else { - type_ = context_->isEdge() ? IndexResultType::kEdgeFromDataFilter - : IndexResultType::kVertexFromDataFilter; - } - RelNode::name_ = "IndexOpuputNode"; - } - - nebula::cpp2::ErrorCode doExecute(PartitionID partId) override { - auto ret = RelNode::doExecute(partId); - if (ret != nebula::cpp2::ErrorCode::SUCCEEDED) { - return ret; - } - - switch (type_) { - case IndexResultType::kEdgeFromIndexScan: { - ret = collectResult(indexScanNode_->moveData()); - break; - } - case IndexResultType::kEdgeFromIndexFilter: { - ret = collectResult(indexFilterNode_->moveData()); - break; - } - case IndexResultType::kEdgeFromDataScan: { - ret = collectResult(indexEdgeNode_->moveData()); - break; - } - case IndexResultType::kEdgeFromDataFilter: { - ret = collectResult(indexFilterNode_->moveData()); - break; - } - case IndexResultType::kVertexFromIndexScan: { - ret = collectResult(indexScanNode_->moveData()); - break; - } - case IndexResultType::kVertexFromIndexFilter: { - ret = collectResult(indexFilterNode_->moveData()); - break; - } - case IndexResultType::kVertexFromDataScan: { - ret = collectResult(indexVertexNode_->moveData()); - break; - } - case IndexResultType::kVertexFromDataFilter: { - ret = collectResult(indexFilterNode_->moveData()); - break; - } - } - return ret; - } - - private: - nebula::cpp2::ErrorCode collectResult(const std::vector& data) { - if (context_->isPlanKilled()) { - return nebula::cpp2::ErrorCode::E_PLAN_IS_KILLED; - } - auto ret = nebula::cpp2::ErrorCode::SUCCEEDED; - switch (type_) { - case IndexResultType::kEdgeFromIndexScan: - case IndexResultType::kEdgeFromIndexFilter: { - ret = edgeRowsFromIndex(data); - break; - } - case IndexResultType::kEdgeFromDataScan: - case IndexResultType::kEdgeFromDataFilter: { - ret = edgeRowsFromData(data); - break; - } - case IndexResultType::kVertexFromIndexScan: - case IndexResultType::kVertexFromIndexFilter: { - ret = vertexRowsFromIndex(data); - break; - } - case IndexResultType::kVertexFromDataScan: - case IndexResultType::kVertexFromDataFilter: { - ret = vertexRowsFromData(data); - break; - } - } - return ret; - } - - nebula::cpp2::ErrorCode vertexRowsFromData(const std::vector& data) { - const auto& schemas = type_ == IndexResultType::kVertexFromDataScan - ? indexVertexNode_->getSchemas() - : indexFilterNode_->getSchemas(); - if (schemas.empty()) { - return nebula::cpp2::ErrorCode::E_TAG_NOT_FOUND; - } - for (const auto& val : data) { - Row row; - auto reader = RowReaderWrapper::getRowReader(schemas, val.second); - if (!reader) { - VLOG(1) << "Can't get tag reader"; - return nebula::cpp2::ErrorCode::E_TAG_NOT_FOUND; - } - for (const auto& col : result_->colNames) { - auto ret = addIndexValue(row, reader.get(), val, col, schemas.back().get()); - if (!ret.ok()) { - return nebula::cpp2::ErrorCode::E_INVALID_DATA; - } - } - result_->rows.emplace_back(std::move(row)); - } - return nebula::cpp2::ErrorCode::SUCCEEDED; - } - - nebula::cpp2::ErrorCode vertexRowsFromIndex(const std::vector& data) { - for (const auto& val : data) { - Row row; - for (const auto& col : result_->colNames) { - auto ret = addIndexValue(row, val, col); - if (!ret.ok()) { - return nebula::cpp2::ErrorCode::E_INVALID_DATA; - } - } - result_->rows.emplace_back(std::move(row)); - } - return nebula::cpp2::ErrorCode::SUCCEEDED; - } - - nebula::cpp2::ErrorCode edgeRowsFromData(const std::vector& data) { - const auto& schemas = type_ == IndexResultType::kEdgeFromDataScan - ? indexEdgeNode_->getSchemas() - : indexFilterNode_->getSchemas(); - if (schemas.empty()) { - return nebula::cpp2::ErrorCode::E_EDGE_NOT_FOUND; - } - for (const auto& val : data) { - Row row; - auto reader = RowReaderWrapper::getRowReader(schemas, val.second); - if (!reader) { - VLOG(1) << "Can't get tag reader"; - return nebula::cpp2::ErrorCode::E_EDGE_NOT_FOUND; - } - for (const auto& col : result_->colNames) { - auto ret = addIndexValue(row, reader.get(), val, col, schemas.back().get()); - if (!ret.ok()) { - return nebula::cpp2::ErrorCode::E_INVALID_DATA; - } - } - result_->rows.emplace_back(std::move(row)); - } - return nebula::cpp2::ErrorCode::SUCCEEDED; - } - - nebula::cpp2::ErrorCode edgeRowsFromIndex(const std::vector& data) { - for (const auto& val : data) { - Row row; - for (const auto& col : result_->colNames) { - auto ret = addIndexValue(row, val, col); - if (!ret.ok()) { - return nebula::cpp2::ErrorCode::E_INVALID_DATA; - } - } - result_->rows.emplace_back(std::move(row)); - } - return nebula::cpp2::ErrorCode::SUCCEEDED; - } - - // Add the value by data val - Status addIndexValue(Row& row, - RowReader* reader, - const kvstore::KV& data, - const std::string& col, - const meta::NebulaSchemaProvider* schema) { - switch (QueryUtils::toReturnColType(col)) { - case QueryUtils::ReturnColType::kVid: { - auto vId = NebulaKeyUtils::getVertexId(context_->vIdLen(), data.first); - if (context_->isIntId()) { - row.emplace_back(*reinterpret_cast(vId.data())); - } else { - row.emplace_back(vId.subpiece(0, vId.find_first_of('\0')).toString()); - } - break; - } - case QueryUtils::ReturnColType::kTag: { - row.emplace_back(NebulaKeyUtils::getTagId(context_->vIdLen(), data.first)); - break; - } - case QueryUtils::ReturnColType::kSrc: { - auto src = NebulaKeyUtils::getSrcId(context_->vIdLen(), data.first); - if (context_->isIntId()) { - row.emplace_back(*reinterpret_cast(src.data())); - } else { - row.emplace_back(src.subpiece(0, src.find_first_of('\0')).toString()); - } - break; - } - case QueryUtils::ReturnColType::kType: { - row.emplace_back(NebulaKeyUtils::getEdgeType(context_->vIdLen(), data.first)); - break; - } - case QueryUtils::ReturnColType::kRank: { - row.emplace_back(NebulaKeyUtils::getRank(context_->vIdLen(), data.first)); - break; - } - case QueryUtils::ReturnColType::kDst: { - auto dst = NebulaKeyUtils::getDstId(context_->vIdLen(), data.first); - if (context_->isIntId()) { - row.emplace_back(*reinterpret_cast(dst.data())); - } else { - row.emplace_back(dst.subpiece(0, dst.find_first_of('\0')).toString()); - } - break; - } - default: { - auto retVal = QueryUtils::readValue(reader, col, schema); - if (!retVal.ok()) { - VLOG(3) << "Bad value for field : " << col; - return retVal.status(); - } - row.emplace_back(std::move(retVal.value())); - } - } - return Status::OK(); - } - - // Add the value by index key - Status addIndexValue(Row& row, const kvstore::KV& data, const std::string& col) { - switch (QueryUtils::toReturnColType(col)) { - case QueryUtils::ReturnColType::kVid: { - auto vId = IndexKeyUtils::getIndexVertexID(context_->vIdLen(), data.first); - if (context_->isIntId()) { - row.emplace_back(*reinterpret_cast(vId.data())); - } else { - row.emplace_back(vId.subpiece(0, vId.find_first_of('\0')).toString()); - } - break; - } - case QueryUtils::ReturnColType::kTag: { - row.emplace_back(context_->tagId_); - break; - } - case QueryUtils::ReturnColType::kSrc: { - auto src = IndexKeyUtils::getIndexSrcId(context_->vIdLen(), data.first); - if (context_->isIntId()) { - row.emplace_back(*reinterpret_cast(src.data())); - } else { - row.emplace_back(src.subpiece(0, src.find_first_of('\0')).toString()); - } - break; - } - case QueryUtils::ReturnColType::kType: { - row.emplace_back(context_->edgeType_); - break; - } - case QueryUtils::ReturnColType::kRank: { - row.emplace_back(IndexKeyUtils::getIndexRank(context_->vIdLen(), data.first)); - break; - } - case QueryUtils::ReturnColType::kDst: { - auto dst = IndexKeyUtils::getIndexDstId(context_->vIdLen(), data.first); - if (context_->isIntId()) { - row.emplace_back(*reinterpret_cast(dst.data())); - } else { - row.emplace_back(dst.subpiece(0, dst.find_first_of('\0')).toString()); - } - break; - } - default: { - auto v = IndexKeyUtils::getValueFromIndexKey( - context_->vIdLen(), data.first, col, fields_, context_->isEdge(), hasNullableCol_); - row.emplace_back(std::move(v)); - } - } - return Status::OK(); - } - - private: - nebula::DataSet* result_; - RuntimeContext* context_; - IndexResultType type_; - IndexScanNode* indexScanNode_{nullptr}; - IndexEdgeNode* indexEdgeNode_{nullptr}; - IndexVertexNode* indexVertexNode_{nullptr}; - IndexFilterNode* indexFilterNode_{nullptr}; - bool hasNullableCol_{}; - std::vector fields_; -}; - -} // namespace storage -} // namespace nebula - -#endif // STORAGE_EXEC_INDEXOUTPUTNODE_H_ diff --git a/src/storage/exec/IndexProjectionNode.cpp b/src/storage/exec/IndexProjectionNode.cpp new file mode 100644 index 00000000000..946edafb418 --- /dev/null +++ b/src/storage/exec/IndexProjectionNode.cpp @@ -0,0 +1,63 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ +#include "storage/exec/IndexProjectionNode.h" +namespace nebula { +namespace storage { +IndexProjectionNode::IndexProjectionNode(const IndexProjectionNode& node) + : IndexNode(node), requiredColumns_(node.requiredColumns_), colPos_(node.colPos_) {} +IndexProjectionNode::IndexProjectionNode(RuntimeContext* context, + const std::vector& requiredColumns) + : IndexNode(context, "IndexProjectionNode"), requiredColumns_(requiredColumns) {} +nebula::cpp2::ErrorCode IndexProjectionNode::init(InitContext& ctx) { + DCHECK_EQ(children_.size(), 1); + for (auto& col : requiredColumns_) { + ctx.requiredColumns.insert(col); + } + auto ret = children_[0]->init(ctx); + if (UNLIKELY(ret != ::nebula::cpp2::ErrorCode::SUCCEEDED)) { + return ret; + } + for (auto& col : requiredColumns_) { + auto iter = ctx.retColMap.find(col); + DCHECK(iter != ctx.retColMap.end()); + colPos_[col] = iter->second; + } + ctx.returnColumns = requiredColumns_; + ctx.retColMap.clear(); + for (size_t i = 0; i < ctx.returnColumns.size(); i++) { + ctx.retColMap[ctx.returnColumns[i]] = i; + } + return ::nebula::cpp2::ErrorCode::SUCCEEDED; +} + +IndexNode::Result IndexProjectionNode::doNext() { + DCHECK_EQ(children_.size(), 1); + auto& child = *children_[0]; + Result result = child.next(); + if (result.hasData()) { + result = Result(project(std::move(result).row())); + } + return result; +} + +Row IndexProjectionNode::project(Row&& row) { + Row ret; + ret.reserve(requiredColumns_.size()); + for (auto& col : requiredColumns_) { + ret.emplace_back(std::move(row[colPos_[col]])); + } + return ret; +} + +std::unique_ptr IndexProjectionNode::copy() { + return std::make_unique(*this); +} + +std::string IndexProjectionNode::identify() { + return fmt::format("{}(projectColumn=[{}])", name_, folly::join(",", requiredColumns_)); +} + +} // namespace storage +} // namespace nebula diff --git a/src/storage/exec/IndexProjectionNode.h b/src/storage/exec/IndexProjectionNode.h new file mode 100644 index 00000000000..736971620e9 --- /dev/null +++ b/src/storage/exec/IndexProjectionNode.h @@ -0,0 +1,46 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ +#pragma once + +#include "folly/Likely.h" +#include "storage/exec/IndexNode.h" +namespace nebula { +namespace storage { +/** + * + * IndexProjectionNode + * + * reference: IndexNode + * + * `IndexProjectionNode` is the class which is used to reformat the row to ensure that the format of + * the returned row meets the requirements of RPC request. + * ┌───────────┐ + * │ IndexNode │ + * └─────┬─────┘ + * │ + * ┌──────────┴──────────┐ + * │ IndexProjectionNode │ + * └─────────────────────┘ + * + * Member: + * `requiredColumns_` : Row format required by parent node + * `colPos_` : each column position in child node return row + */ +class IndexProjectionNode : public IndexNode { + public: + IndexProjectionNode(const IndexProjectionNode& node); + IndexProjectionNode(RuntimeContext* context, const std::vector& requiredColumns); + nebula::cpp2::ErrorCode init(InitContext& ctx) override; + std::unique_ptr copy() override; + std::string identify() override; + + private: + Result doNext() override; + Row project(Row&& row); + std::vector requiredColumns_; + Map colPos_; +}; +} // namespace storage +} // namespace nebula diff --git a/src/storage/exec/IndexScanNode.cpp b/src/storage/exec/IndexScanNode.cpp new file mode 100644 index 00000000000..33f344844ac --- /dev/null +++ b/src/storage/exec/IndexScanNode.cpp @@ -0,0 +1,547 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ +#include "storage/exec/IndexScanNode.h" + +namespace nebula { +namespace storage { +// Define of Path +Path::Path(nebula::meta::cpp2::IndexItem* index, + const meta::SchemaProviderIf* schema, + const std::vector& hints, + int64_t vidLen) + : index_(index), schema_(schema), hints_(hints) { + bool nullFlag = false; + for (auto field : index->get_fields()) { + bool tmp = field.nullable_ref().value_or(false); + nullable_.push_back(tmp); + nullFlag |= tmp; + // TODO: improve performance of compute nullable offset in index_key + auto type = IndexKeyUtils::toValueType(field.get_type().get_type()); + auto tmpStr = IndexKeyUtils::encodeNullValue(type, field.get_type().get_type_length()); + index_nullable_offset_ += tmpStr.size(); + totalKeyLength_ += tmpStr.size(); + } + if (!nullFlag) { + nullable_.clear(); + } else { + totalKeyLength_ += 2; + } + if (index_->get_schema_id().tag_id_ref().has_value()) { + totalKeyLength_ += vidLen; + suffixLength_ = vidLen; + } else { + totalKeyLength_ += vidLen * 2 + sizeof(EdgeRanking); + suffixLength_ = vidLen * 2 + sizeof(EdgeRanking); + } +} + +std::unique_ptr Path::make(nebula::meta::cpp2::IndexItem* index, + const meta::SchemaProviderIf* schema, + const std::vector& hints, + int64_t vidLen) { + std::unique_ptr ret; + if (hints.empty() || hints.back().get_scan_type() == cpp2::ScanType::PREFIX) { + ret.reset(new PrefixPath(index, schema, hints, vidLen)); + + } else { + ret.reset(new RangePath(index, schema, hints, vidLen)); + } + return ret; +} + +QualifiedStrategy::Result Path::qualified(const folly::StringPiece& key) { + return strategySet_(key); +} + +std::string Path::encodeValue(const Value& value, + const ColumnTypeDef& colDef, + size_t index, + std::string& key) { + std::string val; + bool isNull = false; + if (colDef.get_type() == ::nebula::cpp2::PropertyType::GEOGRAPHY) { + CHECK_EQ(value.type(), Value::Type::STRING); + val = value.getStr(); + } else if (value.type() == Value::Type::STRING) { + val = IndexKeyUtils::encodeValue(value, *colDef.get_type_length()); + if (val.back() != '\0') { + strategySet_.insert(QualifiedStrategy::constant()); + } + } else if (value.type() == Value::Type::NULLVALUE) { + auto vtype = IndexKeyUtils::toValueType(colDef.get_type()); + val = IndexKeyUtils::encodeNullValue(vtype, colDef.get_type_length()); + isNull = true; + } else { + val = IndexKeyUtils::encodeValue(value); + } + // If the current colDef can be null, then it is necessary to additionally determine whether the + // corresponding value under a nullable is null when parsing the key (the encoding of the maximum + // value, for example, the encoding of INT_MAX and null are the same, both are 8*' \xFF') + if (!nullable_.empty() && nullable_[index] == true) { + if (isNull) { + strategySet_.insert(QualifiedStrategy::checkNull(index, index_nullable_offset_)); + } else { + strategySet_.insert(QualifiedStrategy::checkNull(index, index_nullable_offset_)); + } + } else if (isNull) { + strategySet_.insert(QualifiedStrategy::constant()); + } + key.append(val); + return val; +} + +const std::string& Path::toString() { return serializeString_; } + +// End of Path + +// Define of RangePath +RangePath::RangePath(nebula::meta::cpp2::IndexItem* index, + const meta::SchemaProviderIf* schema, + const std::vector& hints, + int64_t vidLen) + : Path(index, schema, hints, vidLen) { + buildKey(); +} + +void RangePath::resetPart(PartitionID partId) { + std::string p = IndexKeyUtils::indexPrefix(partId); + startKey_ = startKey_.replace(0, p.size(), p); + endKey_ = endKey_.replace(0, p.size(), p); +} + +QualifiedStrategy::Result RangePath::qualified(const Map& rowData) { + for (size_t i = 0; i < hints_.size() - 1; i++) { + auto& hint = hints_[i]; + if (hint.get_begin_value() != rowData.at(hint.get_column_name())) { + return QualifiedStrategy::INCOMPATIBLE; + } + } + auto& hint = hints_.back(); + // TODO(hs.zhang): improve performance.Check include or not during build key. + if (hint.begin_value_ref().is_set()) { + bool ret = includeStart_ ? hint.get_begin_value() <= rowData.at(hint.get_column_name()) + : hint.get_begin_value() < rowData.at(hint.get_column_name()); + if (!ret) { + return QualifiedStrategy::INCOMPATIBLE; + } + } + if (hint.end_value_ref().is_set()) { + bool ret = includeEnd_ ? hint.get_end_value() >= rowData.at(hint.get_column_name()) + : hint.get_end_value() > rowData.at(hint.get_column_name()); + if (!ret) { + return QualifiedStrategy::INCOMPATIBLE; + } + } + return QualifiedStrategy::COMPATIBLE; +} + +void RangePath::buildKey() { + std::string commonIndexPrefix; + commonIndexPrefix.append(IndexKeyUtils::indexPrefix(0, index_->index_id_ref().value())); + auto fieldIter = index_->get_fields().begin(); + for (size_t i = 0; i < hints_.size() - 1; i++, fieldIter++) { + auto& hint = hints_[i]; + CHECK(fieldIter->get_name() == hint.get_column_name()); + auto type = IndexKeyUtils::toValueType(fieldIter->get_type().get_type()); + CHECK(type != Value::Type::STRING || fieldIter->get_type().type_length_ref().has_value()); + encodeValue(hint.get_begin_value(), fieldIter->get_type(), i, commonIndexPrefix); + serializeString_ += + fmt::format("{}={}, ", hint.get_column_name(), hint.get_begin_value().toString()); + } + auto& hint = hints_.back(); + size_t index = hints_.size() - 1; + // The first n-1 columnHint has been spelled out the common prefix, and then according to the nth + // columnHint to determine the RangePath Scan range [a, b). Note that [a, b) must be the range of + // include begin but exclude end. + // [startKey, endKey) = common prefix + [a, b) + auto [a, b] = encodeRange(hint, fieldIter->get_type(), index, commonIndexPrefix.size()); + // left will be `[a`,`(a`, or `[INF` + std::string left = + hint.begin_value_ref().is_set() + ? fmt::format( + "{}{}", hint.get_include_begin() ? '[' : '(', hint.get_begin_value().toString()) + : "[-INF"; + // left will be `b]`,`b)`, or `[INF` + std::string right = + hint.end_value_ref().is_set() + ? fmt::format("{}{}", hint.get_end_value().toString(), hint.get_include_end() ? ']' : ')') + : "INF]"; + serializeString_ += fmt::format("{}={},{}", hint.get_column_name(), left, right); + startKey_ = commonIndexPrefix + a; + endKey_ = commonIndexPrefix + b; + // If `end_value` is not set, `b` will be empty. So `endKey_` should append '\xFF' until + // endKey_.size() > `totalKeyLength_` to indicate positive infinity prefixed with + // `commonIndexPrefix` + if (!hint.end_value_ref().is_set()) { + endKey_.append(totalKeyLength_ - endKey_.size() + 1, '\xFF'); + } +} + +std::tuple RangePath::encodeRange( + const cpp2::IndexColumnHint& hint, + const nebula::meta::cpp2::ColumnTypeDef& colTypeDef, + size_t colIndex, + size_t offset) { + std::string startKey, endKey; + bool needCheckNullable = !nullable_.empty() && nullable_[colIndex]; + if (hint.end_value_ref().is_set()) { + includeEnd_ = hint.get_include_end(); + auto tmp = encodeEndValue(hint.get_end_value(), colTypeDef, endKey, offset); + if (memcmp(tmp.data(), std::string(tmp.size(), '\xFF').data(), tmp.size()) != 0) { + needCheckNullable &= false; + } + } + if (hint.begin_value_ref().is_set()) { + includeStart_ = hint.get_include_begin(); + encodeBeginValue(hint.get_begin_value(), colTypeDef, startKey, offset); + } + if (UNLIKELY(needCheckNullable)) { + strategySet_.insert(QualifiedStrategy::checkNull(colIndex, index_nullable_offset_)); + } + if (UNLIKELY(colTypeDef.get_type() == nebula::cpp2::PropertyType::GEOGRAPHY)) { + strategySet_.insert(QualifiedStrategy::dedupGeoIndex(suffixLength_)); + } + return {startKey, endKey}; +} + +std::string RangePath::encodeBeginValue(const Value& value, + const ColumnTypeDef& colDef, + std::string& key, + size_t offset) { + std::string val; + bool greater = !includeStart_; + CHECK_NE(value.type(), Value::Type::NULLVALUE); + if (colDef.get_type() == ::nebula::cpp2::PropertyType::GEOGRAPHY) { + val = value.getStr(); + } else if (value.type() == Value::Type::STRING) { + bool truncated = false; + val = encodeString(value, *colDef.get_type_length(), truncated); + greater &= !truncated; + if (UNLIKELY(truncated)) { + strategySet_.insert(QualifiedStrategy::compareTruncated(val, offset)); + } + } else if (value.type() == Value::Type::FLOAT) { + bool isNaN = false; + val = encodeFloat(value, isNaN); + greater |= isNaN; + // TODO(hs.zhang): Optimize the logic of judging NaN + strategySet_.insert(QualifiedStrategy::checkNaN(offset)); + } else { + val = IndexKeyUtils::encodeValue(value); + } + if (greater) { + val.append(suffixLength_ + 1, '\xFF'); + } + key += val; + return val; +} + +std::string RangePath::encodeEndValue(const Value& value, + const ColumnTypeDef& colDef, + std::string& key, + size_t offset) { + CHECK_NE(value.type(), Value::Type::NULLVALUE); + std::string val; + bool greater = includeEnd_; + if (colDef.get_type() == ::nebula::cpp2::PropertyType::GEOGRAPHY) { + val = value.getStr(); + } else if (value.type() == Value::Type::STRING) { + bool truncated = false; + val = encodeString(value, *colDef.get_type_length(), truncated); + greater |= truncated; + if (UNLIKELY(truncated)) { + strategySet_.insert(QualifiedStrategy::compareTruncated(val, offset)); + } + } else if (value.type() == Value::Type::FLOAT) { + bool isNaN = false; + val = encodeFloat(value, isNaN); + greater |= isNaN; + if (UNLIKELY(isNaN)) { + strategySet_.insert(QualifiedStrategy::checkNaN(offset)); + } + } else { + val = IndexKeyUtils::encodeValue(value); + } + if (greater) { + val.append(suffixLength_ + 1, '\xFF'); + } + key += val; + return val; +} + +inline std::string RangePath::encodeString(const Value& value, size_t len, bool& truncated) { + std::string val = IndexKeyUtils::encodeValue(value); + if (val.size() < len) { + val.append(len - val.size(), '\x00'); + } else { + val = val.substr(0, len); + truncated = true; + } + return val; +} + +std::string RangePath::encodeFloat(const Value& value, bool& isNaN) { + std::string val = IndexKeyUtils::encodeValue(value); + // check NaN + if (UNLIKELY(memcmp(val.data(), "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", val.size()) == 0)) { + isNaN = true; + } + return val; +} + +// End of RangePath + +// Define of PrefixPath +PrefixPath::PrefixPath(nebula::meta::cpp2::IndexItem* index, + const meta::SchemaProviderIf* schema, + const std::vector& hints, + int64_t vidLen) + : Path(index, schema, hints, vidLen) { + buildKey(); +} + +QualifiedStrategy::Result PrefixPath::qualified(const Map& rowData) { + for (auto& hint : hints_) { + if (hint.get_begin_value() != rowData.at(hint.get_column_name())) { + return QualifiedStrategy::INCOMPATIBLE; + } + } + return QualifiedStrategy::COMPATIBLE; +} + +void PrefixPath::resetPart(PartitionID partId) { + std::string p = IndexKeyUtils::indexPrefix(partId); + prefix_ = prefix_.replace(0, p.size(), p); +} + +void PrefixPath::buildKey() { + std::string common; + common.append(IndexKeyUtils::indexPrefix(0, index_->index_id_ref().value())); + auto fieldIter = index_->get_fields().begin(); + for (size_t i = 0; i < hints_.size(); i++, fieldIter++) { + auto& hint = hints_[i]; + CHECK(fieldIter->get_name() == hint.get_column_name()); + auto type = IndexKeyUtils::toValueType(fieldIter->get_type().get_type()); + CHECK(type != Value::Type::STRING || fieldIter->get_type().type_length_ref().has_value()); + encodeValue(hint.get_begin_value(), fieldIter->get_type(), i, common); + serializeString_ += + fmt::format("{}={}, ", hint.get_column_name(), hint.get_begin_value().toString()); + } + for (; fieldIter != index_->get_fields().end(); fieldIter++) { + if (UNLIKELY(fieldIter->get_type().get_type() == nebula::cpp2::PropertyType::GEOGRAPHY)) { + strategySet_.insert(QualifiedStrategy::dedupGeoIndex(suffixLength_)); + break; + } + } + prefix_ = std::move(common); +} + +// End of PrefixPath +// Define of IndexScan + +IndexScanNode::IndexScanNode(const IndexScanNode& node) + : IndexNode(node), + partId_(node.partId_), + indexId_(node.indexId_), + index_(node.index_), + indexNullable_(node.indexNullable_), + columnHints_(node.columnHints_), + kvstore_(node.kvstore_), + requiredColumns_(node.requiredColumns_), + requiredAndHintColumns_(node.requiredAndHintColumns_), + ttlProps_(node.ttlProps_), + needAccessBase_(node.needAccessBase_), + colPosMap_(node.colPosMap_) { + if (node.path_->isRange()) { + path_ = std::make_unique(*dynamic_cast(node.path_.get())); + } else { + path_ = std::make_unique(*dynamic_cast(node.path_.get())); + } +} + +::nebula::cpp2::ErrorCode IndexScanNode::init(InitContext& ctx) { + DCHECK(requiredColumns_.empty()); + ttlProps_ = CommonUtils::ttlProps(getSchema().back().get()); + requiredAndHintColumns_ = ctx.requiredColumns; + auto schema = getSchema().back(); + for (auto& hint : columnHints_) { + requiredAndHintColumns_.insert(hint.get_column_name()); + } + for (auto& col : ctx.requiredColumns) { + requiredColumns_.push_back(col); + } + ctx.returnColumns = requiredColumns_; + for (size_t i = 0; i < ctx.returnColumns.size(); i++) { + ctx.retColMap[ctx.returnColumns[i]] = i; + } + colPosMap_ = ctx.retColMap; + // Analyze whether the scan needs to access base data. + // TODO(hs.zhang): The performance is better to judge based on whether the string is truncated + auto tmp = ctx.requiredColumns; + for (auto& field : index_->get_fields()) { + if (field.get_type().get_type() == ::nebula::cpp2::PropertyType::FIXED_STRING) { + continue; + } + if (field.get_type().get_type() == ::nebula::cpp2::PropertyType::GEOGRAPHY) { + continue; + } + tmp.erase(field.get_name()); + } + tmp.erase(kVid); + tmp.erase(kTag); + tmp.erase(kRank); + tmp.erase(kSrc); + tmp.erase(kDst); + tmp.erase(kType); + needAccessBase_ = !tmp.empty(); + path_ = Path::make(index_.get(), getSchema().back().get(), columnHints_, context_->vIdLen()); + return ::nebula::cpp2::ErrorCode::SUCCEEDED; +} + +nebula::cpp2::ErrorCode IndexScanNode::doExecute(PartitionID partId) { + partId_ = partId; + auto ret = resetIter(partId); + return ret; +} + +IndexNode::Result IndexScanNode::doNext() { + for (; iter_ && iter_->valid(); iter_->next()) { + if (!checkTTL()) { + continue; + } + auto q = path_->qualified(iter_->key()); + if (q == QualifiedStrategy::INCOMPATIBLE) { + continue; + } + bool compatible = q == QualifiedStrategy::COMPATIBLE; + if (compatible && !needAccessBase_) { + auto key = iter_->key().toString(); + iter_->next(); + Row row = decodeFromIndex(key); + return Result(std::move(row)); + } + std::pair kv; + auto ret = getBaseData(iter_->key(), kv); + if (ret == nebula::cpp2::ErrorCode::SUCCEEDED) { // do nothing + } else if (ret == nebula::cpp2::ErrorCode::E_KEY_NOT_FOUND) { + if (LIKELY(!fatalOnBaseNotFound_)) { + LOG(WARNING) << "base data not found"; + } else { + LOG(FATAL) << "base data not found"; + } + continue; + } else { + return Result(ret); + } + Map rowData = decodeFromBase(kv.first, kv.second); + if (!compatible) { + q = path_->qualified(rowData); + CHECK(q != QualifiedStrategy::UNCERTAIN); + if (q == QualifiedStrategy::INCOMPATIBLE) { + continue; + } + } + Row row; + for (auto& col : requiredColumns_) { + row.emplace_back(std::move(rowData.at(col))); + } + iter_->next(); + return Result(std::move(row)); + } + return Result(); +} + +bool IndexScanNode::checkTTL() { + if (iter_->val().empty() || ttlProps_.first == false) { + return true; + } + auto v = IndexKeyUtils::parseIndexTTL(iter_->val()); + if (CommonUtils::checkDataExpiredForTTL(getSchema().back().get(), + std::move(v), + ttlProps_.second.second, + ttlProps_.second.first)) { + return false; + } + return true; +} + +nebula::cpp2::ErrorCode IndexScanNode::resetIter(PartitionID partId) { + path_->resetPart(partId); + nebula::cpp2::ErrorCode ret = nebula::cpp2::ErrorCode::SUCCEEDED; + if (path_->isRange()) { + auto rangePath = dynamic_cast(path_.get()); + kvstore_->range(spaceId_, partId, rangePath->getStartKey(), rangePath->getEndKey(), &iter_); + } else { + auto prefixPath = dynamic_cast(path_.get()); + ret = kvstore_->prefix(spaceId_, partId, prefixPath->getPrefixKey(), &iter_); + } + return ret; +} + +void IndexScanNode::decodePropFromIndex(folly::StringPiece key, + const Map& colPosMap, + std::vector& values) { + if (colPosMap.empty()) { + return; + } + size_t offset = sizeof(PartitionID) + sizeof(IndexID); + std::bitset<16> nullableBit; + int8_t nullableColPosit = 15; + if (indexNullable_) { + auto bitOffset = key.size() - context_->vIdLen() - sizeof(uint16_t); + auto v = *reinterpret_cast(key.data() + bitOffset); + nullableBit = v; + } + for (auto& field : index_->get_fields()) { + int len = 0; + auto type = IndexKeyUtils::toValueType(field.type.get_type()); + switch (type) { + case Value::Type::BOOL: + len = sizeof(bool); + break; + case Value::Type::INT: + len = sizeof(int64_t); + break; + case Value::Type::FLOAT: + len = sizeof(double); + break; + case Value::Type::STRING: + len = *field.type.get_type_length(); + break; + case Value::Type::TIME: + len = sizeof(int8_t) * 3 + sizeof(int32_t); + break; + case Value::Type::DATE: + len = sizeof(int8_t) * 2 + sizeof(int16_t); + break; + case Value::Type::DATETIME: + len = sizeof(int32_t) + sizeof(int16_t) + sizeof(int8_t) * 5; + break; + case Value::Type::GEOGRAPHY: // colPosMap will never need GEOGRAPHY type + len = 8; + break; + default: + LOG(FATAL) << "Unexpect value type:" << int(field.type.get_type()); + } + if (colPosMap.count(field.get_name())) { + if (indexNullable_ && nullableBit.test(nullableColPosit)) { + values[colPosMap.at(field.get_name())] = Value(NullType::__NULL__); + } else { + values[colPosMap.at(field.get_name())] = + IndexKeyUtils::decodeValue(key.subpiece(offset, len), type); + } + } + offset += len; + nullableColPosit -= 1; + } +} + +std::string IndexScanNode::identify() { + return fmt::format("{}(IndexID={}, Path=({}))", name_, indexId_, path_->toString()); +} + +// End of IndexScan +} // namespace storage +} // namespace nebula diff --git a/src/storage/exec/IndexScanNode.h b/src/storage/exec/IndexScanNode.h index 4c6f7b2dc79..fe8838aa4f8 100644 --- a/src/storage/exec/IndexScanNode.h +++ b/src/storage/exec/IndexScanNode.h @@ -1,186 +1,432 @@ -/* Copyright (c) 2020 vesoft inc. All rights reserved. +/* Copyright (c) 2021 vesoft inc. All rights reserved. * * This source code is licensed under Apache 2.0 License. */ +#pragma once +#include -#ifndef STORAGE_EXEC_INDEXSCANNODE_H_ -#define STORAGE_EXEC_INDEXSCANNODE_H_ +#include +#include #include "common/base/Base.h" -#include "storage/exec/RelNode.h" -#include "storage/exec/StorageIterator.h" - +#include "common/datatypes/DataSet.h" +#include "common/utils/IndexKeyUtils.h" +#include "interface/gen-cpp2/meta_types.h" +#include "interface/gen-cpp2/storage_types.h" +#include "storage/CommonUtils.h" +#include "storage/exec/IndexNode.h" namespace nebula { namespace storage { -template -class IndexScanNode : public RelNode { - public: - using RelNode::doExecute; +/** + * + * IndexScanNode + * + * reference: IndexNode, IndexVertexScanNode, IndexEdgeScanNode + * + * `IndexScanNode` is the base class of the node which need to access disk. It has two derive + * class `IndexVertexScanNode` and `IndexEdgeScanNode` + * + * ┌───────────┐ + * │ IndexNode │ + * └─────┬─────┘ + * │ + * ┌───────┴───────┐ + * │ IndexScanNode │ + * └───────┬───────┘ + * ┌───────────┴────────────┐ + * ┌──────────┴──────────┐ ┌───────────┴─────────┐ + * │ IndexVertexScanNode │ │ IndexEdgeScanNode │ + * └─────────────────────┘ └─────────────────────┘ + * + * `IndexScanNode` will access index data, and then access base data if necessary. + * + * Member: + * `indexId_` : index_ in this Node to access + * `partId_` : part to access.It will be modify while `doExecute` + * `index_` : index defination + * `indexNullable_` : if index contain nullable field or not + * `columnHints_` : + * `path_` : + * `iter_` : current kvstore iterator.It while be reseted `doExecute` and iterated + * during `doNext` + * `kvstore_` : server kvstore + * `requiredColumns_` : row format that `doNext` needs to return + * `requiredAndHintColumns_`: columns that `decodeFromBase` needs to decode + * `ttlProps` : ttl properties `needAccesBase_` : if need + * `fatalOnBaseNotFound_` : for debug + * + * Function: + * `decodePropFromIndex` : decode properties from Index key.It will be called by + * `decodeFromIndex` + * `decodeFromIndex` : decode all column in `requiredColumns_` by index + * key-value. + * `getBaseData` : get key-value of base data `decodeFromBase` : get + * all values that `requiredAndHintColumns_` required + * `checkTTL` : check data is + * expired or not + * ------------------------------------------------------------- + * + * Path + * + * `Path` is the most important part of `IndexScanNode`. By analyzing `ColumnHint`, it obtains + * the mode(Prefix or Range) and range(key of Prefix or [start,end) of Range) of keys that + * `IndexScanNode` need to query in kvstore. + * + * `Path` not only generate the key to access, but also `qualified` whether the key complies with + * the columnhint constraint or not.For example, if there is a truncated string index, we cannot + * simply compare bytes to determine whether the current key complies with the columnhints + * constraint, the result of `qulified(bytes)` should be `UNCERTAIN` and `IndexScanNode` will + * access base data then `Path` reconfirm `ColumnHint` constraint by `qulified(RowData)`. In + * addition to the above examples, there are other cases to deal with.`Path` and it's derive class + * will dynamic different strategy by `ColumnHint`,`IndexItem`,and `Schema`.All strategy will be + * added to `QFList_`(QualifiedFunctionList) during `buildKey`, and executed during `qualified`. + * + * `Path` whild be reseted when `IndexScanNode` execute on a new part. + * + * It should be noted that the range generated by `rangepath` is a certain left included and right + * excluded interval,like [startKey_, endKey_), although `ColumnHint` may have many different + * constraint ranges(e.g., (x, y],(INF,y),(x,INF)). Because the length of index key is fixed, the + * way to obtain **the smallest key greater than 'x'** is to append several '\xFF' after until the + * length of 'x' is greater than the length of the indexkey. + * + * + * Member: + * `QFList_` : all Qualified strategy need to executed during qualified + * `nullable_` : if `index_` contain nullable field, `nullable_[i]` is equal to + * `index_->fields[i].nullable`,else `nullable_` is empty + * `index_nullable_offset_` : Participate in the index key encode diagram + * `totalKeyLength_` : Participate in the index key encode diagram + * `suffixLength_` : Participate in the index key encode diagram + * `serializeString_` : a string express path + * + * Index Key Encode: + * ┌──────┬─────────────┬────────────────┬──────────┬─────────────────────────────────────────┐ + * │ type | PartitionID | Indexed Values | nullable | suffix({vid} or {srcId,rank,dstId}) | + * │ 1byte| 3 bytes | n bytes | 0/2 bytes| vid.length or vid.length*2+sizeof(rank) | + * └──────┴─────────────┴────────────────┴──────────┴─────────────────────────────────────────┘ + * │ └───────────────────┬─────────────────────┘ + * index_nullable_offset_ suffixLength_ + * └──────────────────────────────────┬───────────────────────────────────────────────────────┘ + * totalKeyLength_ + * + * Function: + * `make` : construct `PrefixPath` or `RangePath` according to `hints` + * `qualified(StringPiece)` : qulified key by bytes + * `qualified(Map)` : qulified row by value + * `resetPart` : reset current partitionID and reset `iter_` + * `encodeValue` : encode a Value to bytes + * + * + * ------------------------------------------------------------- + * + * + * + */ + +class Path; +class QualifiedStrategySet; +class IndexScanNode : public IndexNode { + FRIEND_TEST(IndexScanTest, Base); + FRIEND_TEST(IndexScanTest, Vertex); + FRIEND_TEST(IndexScanTest, Edge); + // There are too many unittests, so a helper is defined to access private data + friend class IndexScanTestHelper; + + public: + IndexScanNode(const IndexScanNode& node); IndexScanNode(RuntimeContext* context, + const std::string& name, IndexID indexId, - std::vector columnHints, - int64_t limit = -1) - : context_(context), indexId_(indexId), columnHints_(std::move(columnHints)), limit_(limit) { - /** - * columnHints's elements are {scanType = PREFIX|RANGE; beginStr; endStr}, - * {scanType = PREFIX|RANGE; beginStr; - * endStr},... if the scanType is RANGE, means the index scan is range scan. - * if all scanType are PREFIX, means the index scan is prefix scan. - * there should be only one RANGE hnit, and it must be the last one. - */ - for (size_t i = 0; i < columnHints_.size(); i++) { - if (columnHints_[i].get_scan_type() == cpp2::ScanType::RANGE) { - isRangeScan_ = true; - CHECK_EQ(columnHints_.size() - 1, i); - break; - } - } - RelNode::name_ = "IndexScanNode"; - } + const std::vector& columnHints, + ::nebula::kvstore::KVStore* kvstore) + : IndexNode(context, name), indexId_(indexId), columnHints_(columnHints), kvstore_(kvstore) {} + ::nebula::cpp2::ErrorCode init(InitContext& ctx) override; + std::string identify() override; - nebula::cpp2::ErrorCode doExecute(PartitionID partId) override { - auto ret = RelNode::doExecute(partId); - if (ret != nebula::cpp2::ErrorCode::SUCCEEDED) { - return ret; - } - auto scanRet = scanStr(partId); - if (!scanRet.ok()) { - return nebula::cpp2::ErrorCode::E_INVALID_FIELD_VALUE; - } - scanPair_ = scanRet.value(); - std::unique_ptr iter; - ret = isRangeScan_ ? context_->env()->kvstore_->range( - context_->spaceId(), partId, scanPair_.first, scanPair_.second, &iter) - : context_->env()->kvstore_->prefix( - context_->spaceId(), partId, scanPair_.first, &iter); - if (ret == nebula::cpp2::ErrorCode::SUCCEEDED && iter && iter->valid()) { - context_->isEdge() - ? iter_.reset(new EdgeIndexIterator(std::move(iter), context_->vIdLen())) - : iter_.reset(new VertexIndexIterator(std::move(iter), context_->vIdLen())); - } else { - iter_.reset(); - return ret; - } - return nebula::cpp2::ErrorCode::SUCCEEDED; + protected: + nebula::cpp2::ErrorCode doExecute(PartitionID partId) final; + Result doNext() final; + void decodePropFromIndex(folly::StringPiece key, + const Map& colPosMap, + std::vector& values); + virtual Row decodeFromIndex(folly::StringPiece key) = 0; + virtual nebula::cpp2::ErrorCode getBaseData(folly::StringPiece key, + std::pair& kv) = 0; + virtual Map decodeFromBase(const std::string& key, + const std::string& value) = 0; + virtual const std::vector>& getSchema() = 0; + bool checkTTL(); + nebula::cpp2::ErrorCode resetIter(PartitionID partId); + PartitionID partId_; + const IndexID indexId_; + std::shared_ptr index_; + bool indexNullable_ = false; + const std::vector& columnHints_; + std::unique_ptr path_; + std::unique_ptr iter_; + nebula::kvstore::KVStore* kvstore_; + std::vector requiredColumns_; + Set requiredAndHintColumns_; + std::pair> ttlProps_; + bool needAccessBase_{false}; + bool fatalOnBaseNotFound_{false}; + Map colPosMap_; +}; +class QualifiedStrategy { + public: + enum Result { INCOMPATIBLE = 0, UNCERTAIN = 1, COMPATIBLE = 2 }; + /** + * checkNull + * + * There are two overload `checkNull` functions: + * 1. First one which is with template arg `targetIsNull`, checks `columnIndex` at `nullable` + * whether equal to `targetIsNull` or not. + * 2. The other one which is without template, filters key whose `columnIndex` at `nullable` is + * true + * + * Args: + * `columnIndex` : Index of column. **NOTE** , however, that the order in nullable bytes is + * reversed + * `keyOffset` : Reference `Index Key Encode` -> `index_nullable_offset_` + * + * Return: + * For convenience, we define a variable x.When the value at `columnIndex` is null, x is true, + * Otherwise x is false. + * 1.With template.Return COMPATIBLE if `x`==`targetIsNull`,else INCOMPATIBLE + * 2.Without template.Return COMPATIBLE if `x`==false, else INCOMPATIBLE + */ + template + static QualifiedStrategy checkNull(size_t columnIndex, size_t keyOffset) { + QualifiedStrategy q; + q.func_ = [columnIndex, keyOffset](const folly::StringPiece& key) { + std::bitset<16> nullableBit; + auto v = *reinterpret_cast(key.data() + keyOffset); + nullableBit = v; + return nullableBit.test(15 - columnIndex) == targetIsNull ? Result::COMPATIBLE + : Result::INCOMPATIBLE; + }; + return q; } - - IndexIterator* iterator() { return iter_.get(); } - - std::vector moveData() { - auto* sh = context_->isEdge() ? context_->edgeSchema_ : context_->tagSchema_; - auto ttlProp = CommonUtils::ttlProps(sh); - data_.clear(); - int64_t count = 0; - while (!!iter_ && iter_->valid()) { - if (context_->isPlanKilled()) { - return {}; - } - if (!iter_->val().empty() && ttlProp.first) { - auto v = IndexKeyUtils::parseIndexTTL(iter_->val()); - if (CommonUtils::checkDataExpiredForTTL( - sh, std::move(v), ttlProp.second.second, ttlProp.second.first)) { - iter_->next(); - continue; - } - } - data_.emplace_back(iter_->key(), ""); - if (limit_ > 0 && ++count >= limit_) { - break; - } - iter_->next(); - } - return std::move(data_); + static QualifiedStrategy checkNull(size_t columnIndex, size_t keyOffset) { + QualifiedStrategy q; + q.func_ = [columnIndex, keyOffset](const folly::StringPiece& key) { + std::bitset<16> nullableBit; + auto v = *reinterpret_cast(key.data() + keyOffset); + nullableBit = v; + return nullableBit.test(15 - columnIndex) ? Result::INCOMPATIBLE : Result::COMPATIBLE; + }; + return q; } - - private: - StatusOr> scanStr(PartitionID partId) { - auto iRet = context_->isEdge() - ? context_->env()->indexMan_->getEdgeIndex(context_->spaceId(), indexId_) - : context_->env()->indexMan_->getTagIndex(context_->spaceId(), indexId_); - if (!iRet.ok()) { - return Status::IndexNotFound(); - } - if (isRangeScan_) { - return getRangeStr(partId, iRet.value()->get_fields()); - } else { - return getPrefixStr(partId, iRet.value()->get_fields()); - } + /** + * checkNaN + * + * Only for double. Check the value at `keyOffset` in indexKey is NaN or not. The logic here needs + * to be coordinated with the encoding logic of double numbers. + * + * Args: + * `keyOffset` : value offset at indexKey + * + * Return: + * Return INCOMPATIBLE if v==Nan else COMPATIBLE; + */ + static QualifiedStrategy checkNaN(size_t keyOffset) { + const char* chr = "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"; // '\xFF' * 8 + QualifiedStrategy q; + q.func_ = [chr, keyOffset](const folly::StringPiece& key) { + int ret = memcmp(chr, key.data() + keyOffset, 8); + return ret == 0 ? Result::INCOMPATIBLE : Result::COMPATIBLE; + }; + return q; } - - StatusOr> getPrefixStr( - PartitionID partId, const std::vector<::nebula::meta::cpp2::ColumnDef>& fields) { - std::string prefix; - prefix.append(IndexKeyUtils::indexPrefix(partId, indexId_)); - for (auto& col : columnHints_) { - auto iter = std::find_if(fields.begin(), fields.end(), [col](const auto& field) { - return col.get_column_name() == field.get_name(); - }); - if (iter == fields.end()) { - VLOG(3) << "Field " << col.get_column_name() << " not found "; - return Status::Error("Field not found"); - } - auto type = IndexKeyUtils::toValueType(iter->type.type); - if (type == Value::Type::STRING && !iter->type.type_length_ref().has_value()) { - return Status::Error("String property index has not set prefix length."); - } - prefix.append(encodeValue(*col.begin_value_ref(), type, iter->type.get_type_length())); - } - return std::make_pair(prefix, ""); + /** + * dedupGeoIndex + * + * Because a `GEOGRAPHY` type data will generate multiple index keys pointing to the same base + * data,the base data pointed to by the indexkey should be de duplicated. + * + * Args: + * `dedupSuffixLength` : If indexed schema is a tag, `dedupSuffixLength` should be vid.len; + * If the indexed schema is an edge, `dedupSuffixLength` shoule be + * srcId.len+sizeof(rank)+dstId.len + * Return: + * When suffix first appears, the function returns `COMPATIBLE`; otherwise, the function returns + * `INCOMPATIBLE` + */ + static QualifiedStrategy dedupGeoIndex(size_t dedupSuffixLength) { + QualifiedStrategy q; + q.func_ = [suffixSet = Set(), + suffixLength = dedupSuffixLength](const folly::StringPiece& key) mutable -> Result { + std::string suffix = key.subpiece(key.size() - suffixLength, suffixLength).toString(); + auto [iter, result] = suffixSet.insert(std::move(suffix)); + return result ? Result::COMPATIBLE : Result::INCOMPATIBLE; + }; + return q; } - - StatusOr> getRangeStr( - PartitionID partId, const std::vector<::nebula::meta::cpp2::ColumnDef>& fields) { - std::string start, end; - start.append(IndexKeyUtils::indexPrefix(partId, indexId_)); - end.append(IndexKeyUtils::indexPrefix(partId, indexId_)); - for (auto& col : columnHints_) { - auto iter = std::find_if(fields.begin(), fields.end(), [col](const auto& field) { - return col.get_column_name() == field.get_name(); - }); - if (iter == fields.end()) { - VLOG(3) << "Field " << col.get_column_name() << " not found "; - return Status::Error("Field not found"); - } - auto type = IndexKeyUtils::toValueType(iter->get_type().get_type()); - if (type == Value::Type::STRING && !iter->get_type().type_length_ref().has_value()) { - return Status::Error("String property index has not set prefix length."); - } - if (col.get_scan_type() == cpp2::ScanType::PREFIX) { - start.append(encodeValue(*col.begin_value_ref(), type, iter->type.get_type_length())); - end.append(encodeValue(*col.begin_value_ref(), type, iter->type.get_type_length())); + /** + * constant + * + * Always return `result` + */ + template + static QualifiedStrategy constant() { + QualifiedStrategy q; + q.func_ = [](const folly::StringPiece&) { return result; }; + return q; + } + /** + * compareTruncated + * + * For a `String` type index, `val` may be truncated, and it is not enough to determine whether + * the indexkey complies with the constraint of columnhint only through the interval limit of + * [start,end) which is generated by `RangeIndex`. Therefore, it is necessary to make additional + * judgment on the truncated string type index + * For example: + * (ab)c meas that string is "abc" but index val has been truncated to "ab". (ab)c > ab is + * `UNCERTAIN`, and (ab)c > aa is COMPATIBLE. + * + * Args: + * `LEorGE` : It's an assit arg. true means LE and false means GE. + * `val` : Truncated `String` index value,whose length has been define in `IndexItem`. + * `keyStartPos` : The position in indexKey where start compare with `val` + * + * Return: + * Return `COMPATIBLE` if `val` is `LEorGE` than indexKey.Otherwise, return `UNCERTAIN`. + */ + template + static QualifiedStrategy compareTruncated(const std::string& val, size_t keyStartPos) { + QualifiedStrategy q; + q.func_ = [val, keyStartPos](const folly::StringPiece& key) { + int ret = memcmp(val.data(), key.data() + keyStartPos, val.size()); + if constexpr (LEorGE == true) { + CHECK_LE(ret, 0); } else { - start.append(encodeValue(*col.begin_value_ref(), type, iter->type.get_type_length())); - end.append(encodeValue(*col.end_value_ref(), type, iter->type.get_type_length())); + CHECK_GE(ret, 0); } - } - return std::make_pair(start, end); + return ret == 0 ? Result::UNCERTAIN : Result::COMPATIBLE; + }; + return q; } + // call + inline Result operator()(const folly::StringPiece& key); - // precondition: if type is STRING, strLen must be valid - std::string encodeValue(const Value& val, Value::Type type, const int16_t* strLen) { - if (val.isNull()) { - return IndexKeyUtils::encodeNullValue(type, strLen); - } - if (type == Value::Type::STRING) { - return IndexKeyUtils::encodeValue(val, *strLen); - } else { - return IndexKeyUtils::encodeValue(val); - } - } + private: + std::function func_; +}; +class QualifiedStrategySet { + public: + inline void insert(QualifiedStrategy&& strategy); + inline QualifiedStrategy::Result operator()(const folly::StringPiece& key); + + private: + std::vector strategyList_; +}; + +class Path { + public: + // enum class Qualified : int16_t { INCOMPATIBLE = 0, UNCERTAIN = 1, COMPATIBLE = 2 }; + // using QualifiedFunction = std::function; + using ColumnTypeDef = ::nebula::meta::cpp2::ColumnTypeDef; + Path(nebula::meta::cpp2::IndexItem* index, + const meta::SchemaProviderIf* schema, + const std::vector& hints, + int64_t vidLen); + virtual ~Path() = default; + + static std::unique_ptr make(::nebula::meta::cpp2::IndexItem* index, + const meta::SchemaProviderIf* schema, + const std::vector& hints, + int64_t vidLen); + QualifiedStrategy::Result qualified(const folly::StringPiece& key); + virtual bool isRange() { return false; } + + virtual QualifiedStrategy::Result qualified(const Map& rowData) = 0; + virtual void resetPart(PartitionID partId) = 0; + const std::string& toString(); + + protected: + std::string encodeValue(const Value& value, + const ColumnTypeDef& colDef, + size_t index, + std::string& key); + QualifiedStrategySet strategySet_; + ::nebula::meta::cpp2::IndexItem* index_; + const meta::SchemaProviderIf* schema_; + const std::vector hints_; + std::vector nullable_; + int64_t index_nullable_offset_{8}; + int64_t totalKeyLength_{8}; + int64_t suffixLength_; + std::string serializeString_; +}; +class PrefixPath : public Path { + public: + PrefixPath(nebula::meta::cpp2::IndexItem* index, + const meta::SchemaProviderIf* schema, + const std::vector& hints, + int64_t vidLen); + // Override + QualifiedStrategy::Result qualified(const Map& rowData) override; + void resetPart(PartitionID partId) override; + + const std::string& getPrefixKey() { return prefix_; } private: - RuntimeContext* context_; - IndexID indexId_; - bool isRangeScan_{false}; - std::unique_ptr iter_; - std::pair scanPair_; - std::vector columnHints_; - int64_t limit_; - std::vector data_; + std::string prefix_; + void buildKey(); }; +class RangePath : public Path { + public: + RangePath(nebula::meta::cpp2::IndexItem* index, + const meta::SchemaProviderIf* schema, + const std::vector& hints, + int64_t vidLen); + QualifiedStrategy::Result qualified(const Map& rowData) override; + void resetPart(PartitionID partId) override; + + inline bool includeStart() { return includeStart_; } + inline bool includeEnd() { return includeEnd_; } + inline const std::string& getStartKey() { return startKey_; } + inline const std::string& getEndKey() { return endKey_; } + bool isRange() override { return true; } + + private: + std::string startKey_, endKey_; + bool includeStart_ = true; + bool includeEnd_ = false; + + void buildKey(); + std::tuple encodeRange( + const cpp2::IndexColumnHint& hint, + const nebula::meta::cpp2::ColumnTypeDef& colTypeDef, + size_t colIndex, + size_t offset); + inline std::string encodeString(const Value& value, size_t len, bool& truncated); + inline std::string encodeFloat(const Value& value, bool& isNaN); + std::string encodeBeginValue(const Value& value, + const ColumnTypeDef& colDef, + std::string& key, + size_t offset); + std::string encodeEndValue(const Value& value, + const ColumnTypeDef& colDef, + std::string& key, + size_t offset); +}; +/* define inline functions */ +QualifiedStrategy::Result QualifiedStrategySet::operator()(const folly::StringPiece& key) { + QualifiedStrategy::Result ret = QualifiedStrategy::COMPATIBLE; + for (auto& s : strategyList_) { + ret = std::min(ret, s(key)); + } + return ret; +} +void QualifiedStrategySet::insert(QualifiedStrategy&& strategy) { + strategyList_.emplace_back(std::move(strategy)); +} +inline QualifiedStrategy::Result QualifiedStrategy::operator()(const folly::StringPiece& key) { + return func_(key); +} } // namespace storage -} // namespace nebula -#endif // STORAGE_EXEC_INDEXSCANNODE_H_ +} // namespace nebula diff --git a/src/storage/exec/IndexSelectionNode.cpp b/src/storage/exec/IndexSelectionNode.cpp new file mode 100644 index 00000000000..b69e6bbfcae --- /dev/null +++ b/src/storage/exec/IndexSelectionNode.cpp @@ -0,0 +1,78 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ +#include "storage/exec/IndexSelectionNode.h" +namespace nebula { +namespace storage { +IndexSelectionNode::IndexSelectionNode(const IndexSelectionNode& node) + : IndexNode(node), expr_(node.expr_), colPos_(node.colPos_) { + ctx_ = std::make_unique(colPos_); +} + +IndexSelectionNode::IndexSelectionNode(RuntimeContext* context, Expression* expr) + : IndexNode(context, "IndexSelectionNode"), expr_(expr) {} +nebula::cpp2::ErrorCode IndexSelectionNode::init(InitContext& ctx) { + DCHECK_EQ(children_.size(), 1); + SelectionExprVisitor vis; + expr_->accept(&vis); + for (auto& col : vis.getRequiredColumns()) { + ctx.requiredColumns.insert(col); + } + auto ret = children_[0]->init(ctx); + if (UNLIKELY(ret != ::nebula::cpp2::ErrorCode::SUCCEEDED)) { + return ret; + } + for (auto& col : vis.getRequiredColumns()) { + colPos_[col] = ctx.retColMap.at(col); + } + ctx_ = std::make_unique(colPos_); + return ::nebula::cpp2::ErrorCode::SUCCEEDED; +} + +IndexNode::Result IndexSelectionNode::doNext() { + DCHECK_EQ(children_.size(), 1); + auto& child = *children_[0]; + do { + auto result = child.next(); + if (!result.hasData()) { + return result; + } + if (filter(result.row())) { + return result; + } + } while (true); + return Result(); +} + +std::unique_ptr IndexSelectionNode::copy() { + return std::make_unique(*this); +} + +std::string IndexSelectionNode::identify() { + return fmt::format("{}(expr=[{}])", name_, expr_->toString()); +} + +Value IndexSelectionNode::ExprContext::getEdgeProp(const std::string& edgeType, + const std::string& prop) const { + UNUSED(edgeType); + DCHECK(row_ != nullptr); + auto iter = colPos_.find(prop); + DCHECK(iter != colPos_.end()); + DCHECK(iter->second < row_->size()); + return (*row_)[iter->second]; +} + +Value IndexSelectionNode::ExprContext::getTagProp(const std::string& tag, + const std::string& prop) const { + UNUSED(tag); + DCHECK(row_ != nullptr); + auto iter = colPos_.find(prop); + DCHECK(iter != colPos_.end()); + DCHECK(iter->second < row_->size()); + return (*row_)[iter->second]; +} + +} // namespace storage + +} // namespace nebula diff --git a/src/storage/exec/IndexSelectionNode.h b/src/storage/exec/IndexSelectionNode.h new file mode 100644 index 00000000000..a134ecf890d --- /dev/null +++ b/src/storage/exec/IndexSelectionNode.h @@ -0,0 +1,136 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ +#pragma once + +#include "common/context/ExpressionContext.h" +#include "common/expression/Expression.h" +#include "folly/container/F14Map.h" +#include "storage/ExprVisitorBase.h" +#include "storage/exec/IndexNode.h" +namespace nebula { +namespace storage { +/** + * + * IndexSelectionNode + * + * reference: IndexNode + * + * `IndexSelectionNode` is the class which is used to filter data by given expression in RPC + * request. + * ┌───────────┐ + * │ IndexNode │ + * └─────┬─────┘ + * │ + * ┌─────────┴──────────┐ + * │ IndexSelectionNode │ + * └────────────────────┘ + * Member: + * `expr_` : expression used to filter + * `colPos_`: column's position in Row which is during eval `expr_` + * `ctx_` : used to eval expression + * Function: + * `filter` : compute `expr_` + * + * + * ------------------------------------------------------------------------------------------------ + * IndexSelectionNode::ExprContext + * + * `ExprContext` is a derive class of ExpressionContext which is needed in eval expression. + * NOTICE: There are many node in the entire storage plan tree where expressions need to be + * evaluated(e.g., Projection,Aggregate,etc.). So `ExprContext` may be not an internal + * class of IndexSelectionNode. + */ +class IndexSelectionNode : public IndexNode { + public: + IndexSelectionNode(const IndexSelectionNode &node); + IndexSelectionNode(RuntimeContext *context, Expression *expr); + nebula::cpp2::ErrorCode init(InitContext &ctx) override; + std::unique_ptr copy() override; + std::string identify() override; + + private: + Result doNext() override; + inline bool filter(const Row &row) { + ctx_->setRow(row); + auto &result = expr_->eval(*ctx_); + return result.type() == Value::Type::BOOL ? result.getBool() : false; + } + Expression *expr_; + Map colPos_; + // TODO(hs.zhang): `ExprContext` could be moved out later if we unify the valcano in go/lookup + class ExprContext : public ExpressionContext { + public: + explicit ExprContext(const Map &colPos) : colPos_(colPos) {} + void setRow(const Row &row) { row_ = &row; } + Value getEdgeProp(const std::string &edgeType, const std::string &prop) const override; + Value getTagProp(const std::string &tag, const std::string &prop) const override; + // override + const Value &getVar(const std::string &var) const override { + UNUSED(var); + return fatal(__FILE__, __LINE__); + } + const Value &getVersionedVar(const std::string &var, int64_t version) const override { + UNUSED(var), UNUSED(version); + return fatal(__FILE__, __LINE__); + } + const Value &getVarProp(const std::string &var, const std::string &prop) const override { + UNUSED(var), UNUSED(prop); + return fatal(__FILE__, __LINE__); + } + Value getSrcProp(const std::string &tag, const std::string &prop) const override { + UNUSED(tag), UNUSED(prop); + return fatal(__FILE__, __LINE__); + } + const Value &getDstProp(const std::string &tag, const std::string &prop) const override { + UNUSED(tag), UNUSED(prop); + return fatal(__FILE__, __LINE__); + } + const Value &getInputProp(const std::string &prop) const override { + UNUSED(prop); + return fatal(__FILE__, __LINE__); + } + Value getVertex(const std::string &) const override { return fatal(__FILE__, __LINE__); } + Value getEdge() const override { return fatal(__FILE__, __LINE__); } + Value getColumn(int32_t index) const override { + UNUSED(index); + return fatal(__FILE__, __LINE__); + } + void setVar(const std::string &var, Value val) override { + UNUSED(var), UNUSED(val); + fatal(__FILE__, __LINE__); + } + + private: + const Map &colPos_; + const Row *row_; + inline const Value &fatal(const std::string &file, int line) const { + LOG(FATAL) << "Unexpect at " << file << ":" << line; + static Value placeholder; + return placeholder; + } + }; + std::unique_ptr ctx_; +}; + +class SelectionExprVisitor : public ExprVisitorBase { + public: + void visit(EdgeSrcIdExpression *expr) override { requiredColumns_.insert(expr->prop()); } + void visit(EdgeTypeExpression *expr) override { requiredColumns_.insert(expr->prop()); } + void visit(EdgeRankExpression *expr) override { requiredColumns_.insert(expr->prop()); } + void visit(EdgeDstIdExpression *expr) override { requiredColumns_.insert(expr->prop()); } + void visit(TagPropertyExpression *expr) override { requiredColumns_.insert(expr->prop()); } + void visit(EdgePropertyExpression *expr) override { requiredColumns_.insert(expr->prop()); } + const Set &getRequiredColumns() { return requiredColumns_; } + ::nebula::cpp2::ErrorCode getCode() { return code_; } + + private: + using ExprVisitorBase::visit; + Set requiredColumns_; + ::nebula::cpp2::ErrorCode code_; +}; + +} // namespace storage + +} // namespace nebula diff --git a/src/storage/exec/IndexVertexNode.h b/src/storage/exec/IndexVertexNode.h deleted file mode 100644 index 7b2cae38e4b..00000000000 --- a/src/storage/exec/IndexVertexNode.h +++ /dev/null @@ -1,99 +0,0 @@ -/* Copyright (c) 2020 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ -#ifndef STORAGE_EXEC_INDEXVERTEXNODE_H_ -#define STORAGE_EXEC_INDEXVERTEXNODE_H_ - -#include "common/base/Base.h" -#include "storage/exec/IndexScanNode.h" -#include "storage/exec/RelNode.h" - -namespace nebula { -namespace storage { - -template -class IndexVertexNode final : public RelNode { - public: - using RelNode::doExecute; - - IndexVertexNode(RuntimeContext* context, - IndexScanNode* indexScanNode, - const std::vector>& schemas, - const std::string& schemaName, - int64_t limit = -1) - : context_(context), - indexScanNode_(indexScanNode), - schemas_(schemas), - schemaName_(schemaName), - limit_(limit) { - RelNode::name_ = "IndexVertexNode"; - } - - nebula::cpp2::ErrorCode doExecute(PartitionID partId) override { - auto ret = RelNode::doExecute(partId); - if (ret != nebula::cpp2::ErrorCode::SUCCEEDED) { - return ret; - } - - auto ttlProp = CommonUtils::ttlProps(context_->tagSchema_); - - data_.clear(); - std::vector vids; - auto* iter = static_cast(indexScanNode_->iterator()); - - while (iter && iter->valid()) { - if (context_->isPlanKilled()) { - return nebula::cpp2::ErrorCode::E_PLAN_IS_KILLED; - } - if (!iter->val().empty() && ttlProp.first) { - auto v = IndexKeyUtils::parseIndexTTL(iter->val()); - if (CommonUtils::checkDataExpiredForTTL( - context_->tagSchema_, std::move(v), ttlProp.second.second, ttlProp.second.first)) { - iter->next(); - continue; - } - } - vids.emplace_back(iter->vId()); - iter->next(); - } - int64_t count = 0; - for (const auto& vId : vids) { - VLOG(1) << "partId " << partId << ", vId " << vId << ", tagId " << context_->tagId_; - auto key = NebulaKeyUtils::vertexKey(context_->vIdLen(), partId, vId, context_->tagId_); - std::string val; - ret = context_->env()->kvstore_->get(context_->spaceId(), partId, key, &val); - if (ret == nebula::cpp2::ErrorCode::SUCCEEDED) { - data_.emplace_back(std::move(key), std::move(val)); - } else if (ret == nebula::cpp2::ErrorCode::E_KEY_NOT_FOUND) { - continue; - } else { - return ret; - } - if (limit_ > 0 && ++count >= limit_) { - break; - } - } - return nebula::cpp2::ErrorCode::SUCCEEDED; - } - - std::vector moveData() { return std::move(data_); } - - const std::vector>& getSchemas() { - return schemas_; - } - - const std::string& getSchemaName() { return schemaName_; } - - private: - RuntimeContext* context_; - IndexScanNode* indexScanNode_; - const std::vector>& schemas_; - const std::string& schemaName_; - int64_t limit_; - std::vector data_; -}; - -} // namespace storage -} // namespace nebula -#endif // STORAGE_EXEC_INDEXVERTEXNODE_H_ diff --git a/src/storage/exec/IndexVertexScanNode.cpp b/src/storage/exec/IndexVertexScanNode.cpp new file mode 100644 index 00000000000..499a4d59d8a --- /dev/null +++ b/src/storage/exec/IndexVertexScanNode.cpp @@ -0,0 +1,117 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ +#include "storage/exec/IndexVertexScanNode.h" + +#include "codec/RowReaderWrapper.h" +#include "common/utils/NebulaKeyUtils.h" +#include "storage/exec/QueryUtils.h" +namespace nebula { +namespace storage { + +IndexVertexScanNode::IndexVertexScanNode(const IndexVertexScanNode& node) + : IndexScanNode(node), tag_(node.tag_) {} + +IndexVertexScanNode::IndexVertexScanNode(RuntimeContext* context, + IndexID indexId, + const std::vector& clolumnHint, + ::nebula::kvstore::KVStore* kvstore) + : IndexScanNode(context, "IndexVertexScanNode", indexId, clolumnHint, kvstore) { + getIndex = std::function([this](std::shared_ptr& index) { + auto env = this->context_->env(); + auto indexMgr = env->indexMan_; + auto indexVal = indexMgr->getTagIndex(this->spaceId_, this->indexId_); + if (!indexVal.ok()) { + return ::nebula::cpp2::ErrorCode::E_INDEX_NOT_FOUND; + } + index = indexVal.value(); + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + }); + getTag = std::function([this](TagSchemas& tag) { + auto env = this->context_->env(); + auto schemaMgr = env->schemaMan_; + auto allSchema = schemaMgr->getAllVerTagSchema(this->spaceId_); + auto tagId = this->index_->get_schema_id().get_tag_id(); + if (!allSchema.ok() || !allSchema.value().count(tagId)) { + return ::nebula::cpp2::ErrorCode::E_TAG_NOT_FOUND; + } + tag = allSchema.value().at(tagId); + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + }); +} + +::nebula::cpp2::ErrorCode IndexVertexScanNode::init(InitContext& ctx) { + if (auto ret = getIndex(this->index_); UNLIKELY(ret != ::nebula::cpp2::ErrorCode::SUCCEEDED)) { + return ret; + } + if (auto ret = getTag(tag_); UNLIKELY(ret != ::nebula::cpp2::ErrorCode::SUCCEEDED)) { + return ret; + } + return IndexScanNode::init(ctx); +} + +nebula::cpp2::ErrorCode IndexVertexScanNode::getBaseData(folly::StringPiece key, + std::pair& kv) { + kv.first = NebulaKeyUtils::vertexKey(context_->vIdLen(), + partId_, + key.subpiece(key.size() - context_->vIdLen()).toString(), + context_->tagId_); + return kvstore_->get(context_->spaceId(), partId_, kv.first, &kv.second); +} + +Row IndexVertexScanNode::decodeFromIndex(folly::StringPiece key) { + std::vector values(requiredColumns_.size()); + if (colPosMap_.count(kVid)) { + auto vId = IndexKeyUtils::getIndexVertexID(context_->vIdLen(), key); + if (context_->isIntId()) { + values[colPosMap_[kVid]] = Value(*reinterpret_cast(vId.data())); + } else { + values[colPosMap_[kVid]] = Value(vId.subpiece(0, vId.find_first_of('\0')).toString()); + } + } + if (colPosMap_.count(kTag)) { + values[colPosMap_[kTag]] = Value(context_->tagId_); + } + key.subtract(context_->vIdLen()); + decodePropFromIndex(key, colPosMap_, values); + return Row(std::move(values)); +} + +Map IndexVertexScanNode::decodeFromBase(const std::string& key, + const std::string& value) { + Map values; + auto reader = RowReaderWrapper::getRowReader(tag_, folly::StringPiece(value)); + for (auto& col : requiredAndHintColumns_) { + switch (QueryUtils::toReturnColType(col)) { + case QueryUtils::ReturnColType::kVid: { + auto vId = NebulaKeyUtils::getVertexId(context_->vIdLen(), key); + if (context_->isIntId()) { + values[col] = Value(*reinterpret_cast(vId.data())); + } else { + values[col] = Value(vId.subpiece(0, vId.find_first_of('\0')).toString()); + } + } break; + case QueryUtils::ReturnColType::kTag: { + values[col] = Value(context_->tagId_); + } break; + case QueryUtils::ReturnColType::kOther: { + auto retVal = QueryUtils::readValue(reader.get(), col, tag_.back()->field(col)); + if (!retVal.ok()) { + LOG(FATAL) << "Bad value for field" << col; + } + values[col] = std::move(retVal.value()); + } break; + default: + LOG(FATAL) << "Unexpect column name:" << col; + } + } + return values; +} + +std::unique_ptr IndexVertexScanNode::copy() { + return std::make_unique(*this); +} + +} // namespace storage +} // namespace nebula diff --git a/src/storage/exec/IndexVertexScanNode.h b/src/storage/exec/IndexVertexScanNode.h new file mode 100644 index 00000000000..fea56a19adb --- /dev/null +++ b/src/storage/exec/IndexVertexScanNode.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ +#pragma once + +#include + +#include +#include + +#include "common/base/Base.h" +#include "storage/exec/IndexScanNode.h" +#include "storage/exec/StorageIterator.h" + +namespace nebula { +namespace storage { + +/** + * IndexVertexScanNode + * + * reference: IndexScanNode + */ +class IndexVertexScanNode final : public IndexScanNode { + public: + IndexVertexScanNode(const IndexVertexScanNode& node); + IndexVertexScanNode(RuntimeContext* context, + IndexID indexId, + const std::vector& clolumnHint, + ::nebula::kvstore::KVStore* kvstore); + ::nebula::cpp2::ErrorCode init(InitContext& ctx) override; + std::unique_ptr copy() override; + + private: + nebula::cpp2::ErrorCode getBaseData(folly::StringPiece key, + std::pair& kv) override; + Row decodeFromIndex(folly::StringPiece key) override; + Map decodeFromBase(const std::string& key, const std::string& value) override; + + using TagSchemas = std::vector>; + const TagSchemas& getSchema() override { return tag_; } + TagSchemas tag_; + using IndexItem = ::nebula::meta::cpp2::IndexItem; + // Convenient for testing + std::function<::nebula::cpp2::ErrorCode(std::shared_ptr&)> getIndex; + std::function<::nebula::cpp2::ErrorCode(TagSchemas&)> getTag; + + FRIEND_TEST(IndexScanTest, VertexIndexOnlyScan); + FRIEND_TEST(IndexScanTest, VertexBase); + FRIEND_TEST(IndexScanTest, Prefix1); + FRIEND_TEST(IndexScanTest, Prefix2); + FRIEND_TEST(IndexScanTest, Base); + FRIEND_TEST(IndexScanTest, Vertex); + friend class IndexScanTestHelper; +}; +} // namespace storage +} // namespace nebula diff --git a/src/storage/exec/StorageIterator.h b/src/storage/exec/StorageIterator.h index 49821069f00..c4fc30d456b 100644 --- a/src/storage/exec/StorageIterator.h +++ b/src/storage/exec/StorageIterator.h @@ -6,11 +6,11 @@ #ifndef STORAGE_EXEC_STORAGEITERATOR_H_ #define STORAGE_EXEC_STORAGEITERATOR_H_ +#include "codec/RowReaderWrapper.h" #include "common/base/Base.h" #include "kvstore/KVIterator.h" #include "storage/CommonUtils.h" #include "storage/StorageFlags.h" - namespace nebula { namespace storage { diff --git a/src/storage/index/LookupBaseProcessor-inl.h b/src/storage/index/LookupBaseProcessor-inl.h deleted file mode 100644 index 2de86f3b1af..00000000000 --- a/src/storage/index/LookupBaseProcessor-inl.h +++ /dev/null @@ -1,472 +0,0 @@ -/* Copyright (c) 2020 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ - -#pragma once - -#include "LookupBaseProcessor.h" -#include "folly/container/Enumerate.h" -namespace nebula { -namespace storage { - -template -nebula::cpp2::ErrorCode LookupBaseProcessor::requestCheck( - const cpp2::LookupIndexRequest& req) { - spaceId_ = req.get_space_id(); - auto retCode = this->getSpaceVidLen(spaceId_); - if (retCode != nebula::cpp2::ErrorCode::SUCCEEDED) { - return retCode; - } - this->planContext_ = std::make_unique( - this->env_, spaceId_, this->spaceVidLen_, this->isIntId_, req.common_ref()); - const auto& indices = req.get_indices(); - const auto& schemaId = indices.get_schema_id(); - this->planContext_->isEdge_ = schemaId.getType() == nebula::cpp2::SchemaID::Type::edge_type; - this->context_ = std::make_unique(this->planContext_.get()); - if (context_->isEdge()) { - context_->edgeType_ = schemaId.get_edge_type(); - auto edgeName = this->env_->schemaMan_->toEdgeName(spaceId_, context_->edgeType_); - if (!edgeName.ok()) { - return nebula::cpp2::ErrorCode::E_EDGE_NOT_FOUND; - } - context_->edgeName_ = std::move(edgeName.value()); - auto allEdges = this->env_->schemaMan_->getAllVerEdgeSchema(spaceId_); - if (!allEdges.ok()) { - return nebula::cpp2::ErrorCode::E_EDGE_NOT_FOUND; - } - if (!allEdges.value().count(context_->edgeType_)) { - return nebula::cpp2::ErrorCode::E_EDGE_NOT_FOUND; - } - schemas_ = std::move(allEdges).value()[context_->edgeType_]; - context_->edgeSchema_ = schemas_.back().get(); - } else { - context_->tagId_ = schemaId.get_tag_id(); - auto tagName = this->env_->schemaMan_->toTagName(spaceId_, context_->tagId_); - if (!tagName.ok()) { - return nebula::cpp2::ErrorCode::E_TAG_NOT_FOUND; - } - context_->tagName_ = std::move(tagName.value()); - auto allTags = this->env_->schemaMan_->getAllVerTagSchema(spaceId_); - if (!allTags.ok()) { - return nebula::cpp2::ErrorCode::E_TAG_NOT_FOUND; - } - if (!allTags.value().count(context_->tagId_)) { - return nebula::cpp2::ErrorCode::E_TAG_NOT_FOUND; - } - schemas_ = std::move(allTags).value()[context_->tagId_]; - context_->tagSchema_ = schemas_.back().get(); - } - - if (indices.get_contexts().empty() || !req.return_columns_ref().has_value() || - (*req.return_columns_ref()).empty()) { - return nebula::cpp2::ErrorCode::E_INVALID_OPERATION; - } - indexContexts_ = indices.get_contexts(); - - // setup yield columns. - if (req.return_columns_ref().has_value()) { - yieldCols_ = *req.return_columns_ref(); - } - - for (auto&& it : folly::enumerate(yieldCols_)) { - resultDataSet_.colNames.emplace_back(*it); - if (QueryUtils::toReturnColType(*it) != QueryUtils::ReturnColType::kOther) { - deDupColPos_.emplace_back(it.index); - } - } - - // limit - if (req.limit_ref().has_value()) { - if (*req.limit_ref() < 0) { - LOG(ERROR) << "Incorrect parameter : LIMIT = " << *req.limit_ref(); - return nebula::cpp2::ErrorCode::E_INVALID_PARM; - } - limit_ = *req.limit_ref(); - } - - return nebula::cpp2::ErrorCode::SUCCEEDED; -} - -template -bool LookupBaseProcessor::isOutsideIndex(Expression* filter, - const meta::cpp2::IndexItem* index) { - static const std::set propsInEdgeKey{kSrc, kType, kRank, kDst}; - auto fields = index->get_fields(); - switch (filter->kind()) { - case Expression::Kind::kLogicalOr: - case Expression::Kind::kLogicalAnd: { - auto* lExpr = static_cast(filter); - for (auto& expr : lExpr->operands()) { - auto ret = isOutsideIndex(expr, index); - if (ret) { - return ret; - } - } - break; - } - case Expression::Kind::kRelLE: - case Expression::Kind::kRelIn: - case Expression::Kind::kRelGE: - case Expression::Kind::kRelEQ: - case Expression::Kind::kRelLT: - case Expression::Kind::kRelGT: - case Expression::Kind::kRelNE: - case Expression::Kind::kRelNotIn: { - auto* rExpr = static_cast(filter); - auto ret = isOutsideIndex(rExpr->left(), index); - if (ret) { - return ret; - } - ret = isOutsideIndex(rExpr->right(), index); - if (ret) { - return ret; - } - break; - } - case Expression::Kind::kEdgeSrc: - case Expression::Kind::kEdgeType: - case Expression::Kind::kEdgeRank: - case Expression::Kind::kEdgeDst: { - auto* sExpr = static_cast(filter); - auto propName = sExpr->prop(); - return propsInEdgeKey.find(propName) == propsInEdgeKey.end(); - } - case Expression::Kind::kTagProperty: - case Expression::Kind::kEdgeProperty: { - auto* sExpr = static_cast(filter); - auto propName = sExpr->prop(); - auto it = std::find_if(fields.begin(), fields.end(), [&propName](const auto& f) { - return f.get_name() == propName; - }); - return it == fields.end(); - } - default: { - return false; - } - } - return false; -} - -/** - * lookup plan should be : - * +--------+---------+ - * | Plan | - * +--------+---------+ - * | - * +--------+---------+ - * | DeDupNode | - * +--------+---------+ - * | - * +----------+-----------+ - * + IndexOutputNode... + - * +----------+-----------+ - **/ - -template -StatusOr> LookupBaseProcessor::buildPlan( - IndexFilterItem* filterItem, nebula::DataSet* result) { - StoragePlan plan; - // TODO(sky) : Limit is not supported yet for de-dup node. - // Related to paging scan, the de-dup execution plan needs to be refactored - auto deDup = std::make_unique>(result, deDupColPos_); - int32_t filterId = 0; - std::unique_ptr> out; - auto pool = &planContext_->objPool_; - - for (const auto& ctx : indexContexts_) { - const auto& indexId = ctx.get_index_id(); - auto needFilter = ctx.filter_ref().is_set() && !(*ctx.filter_ref()).empty(); - - // Check whether a data node is required. - // If a non-indexed column appears in the WHERE clause or YIELD clause, - // That means need to query the corresponding data. - bool needData = false; - auto index = context_->isEdge() ? this->env_->indexMan_->getEdgeIndex(spaceId_, indexId) - : this->env_->indexMan_->getTagIndex(spaceId_, indexId); - if (!index.ok()) { - return Status::IndexNotFound(); - } - - // check nullable column - bool hasNullableCol = false; - - auto* indexItem = index.value().get(); - auto fields = indexItem->get_fields(); - - for (const auto& col : fields) { - if (!hasNullableCol && col.nullable_ref().value_or(false)) { - hasNullableCol = true; - break; - } - } - - for (const auto& yieldCol : yieldCols_) { - static const std::set propsInKey{kVid, kTag, kSrc, kType, kRank, kDst}; - if (propsInKey.count(yieldCol)) { - continue; - } - auto it = std::find_if(fields.begin(), fields.end(), [&yieldCol](const auto& columnDef) { - return yieldCol == columnDef.get_name(); - }); - if (it == fields.end() || - it->get_type().get_type() == - nebula::cpp2::PropertyType::GEOGRAPHY) { // geography index just stores - // S2CellId, so must read the - // original geo data. - needData = true; - break; - } - } - auto colHints = ctx.get_column_hints(); - - // Check WHERE clause contains columns that ware not indexed - if (ctx.filter_ref().is_set() && !(*ctx.filter_ref()).empty()) { - auto filter = Expression::decode(pool, *ctx.filter_ref()); - auto isFieldsOutsideIndex = isOutsideIndex(filter, indexItem); - if (isFieldsOutsideIndex) { - needData = needFilter = true; - } - } - - if (!needData && !needFilter) { - out = buildPlanBasic(result, ctx, plan, hasNullableCol, fields); - } else if (needData && !needFilter) { - out = buildPlanWithData(result, ctx, plan); - } else if (!needData && needFilter) { - auto expr = Expression::decode(pool, ctx.get_filter()); - auto exprCtx = std::make_unique( - context_->vIdLen(), context_->isIntId(), hasNullableCol, fields); - filterItem->emplace(filterId, std::make_pair(std::move(exprCtx), expr)); - out = buildPlanWithFilter( - result, ctx, plan, (*filterItem)[filterId].first.get(), (*filterItem)[filterId].second); - filterId++; - } else { - auto expr = Expression::decode(pool, ctx.get_filter()); - // Need to get columns in data, expr ctx need to be aware of schema - const auto& schemaName = context_->isEdge() ? context_->edgeName_ : context_->tagName_; - if (schemas_.empty()) { - return Status::Error("Schema not found"); - } - auto exprCtx = std::make_unique(context_->vIdLen(), - context_->isIntId(), - schemaName, - schemas_.back().get(), - context_->isEdge()); - filterItem->emplace(filterId, std::make_pair(std::move(exprCtx), expr)); - out = buildPlanWithDataAndFilter( - result, ctx, plan, (*filterItem)[filterId].first.get(), (*filterItem)[filterId].second); - filterId++; - } - if (out == nullptr) { - return Status::Error("Index scan plan error"); - } - deDup->addDependency(out.get()); - plan.addNode(std::move(out)); - } - plan.addNode(std::move(deDup)); - return plan; -} - -/** - * - * +----------+-----------+ - * + IndexOutputNode + - * +----------+-----------+ - * | - * +----------+-----------+ - * + IndexScanNode + - * +----------+-----------+ - * - * If this is a simple index scan, Just having IndexScanNode is enough. for - *example : tag (c1, c2, c3) index on tag (c1, c2, c3) hint : lookup index where - *c1 == 1 and c2 == 1 and c3 == 1 yield c1,c2,c3 - **/ -template -std::unique_ptr> LookupBaseProcessor::buildPlanBasic( - nebula::DataSet* result, - const cpp2::IndexQueryContext& ctx, - StoragePlan& plan, - bool hasNullableCol, - const std::vector& fields) { - auto indexId = ctx.get_index_id(); - auto colHints = ctx.get_column_hints(); - auto indexScan = std::make_unique>( - context_.get(), indexId, std::move(colHints), limit_); - - auto output = std::make_unique>( - result, context_.get(), indexScan.get(), hasNullableCol, fields); - output->addDependency(indexScan.get()); - plan.addNode(std::move(indexScan)); - return output; -} - -/** - * - * +----------+-----------+ - * + IndexOutputNode + - * +----------+-----------+ - * | - * +----------------+-----------------+ - * + IndexEdgeNode or IndexVertexNode + - * +----------------+-----------------+ - * | - * +----------+-----------+ - * + IndexScanNode + - * +----------+-----------+ - * - * If a non-indexed column appears in the YIELD clause, and no expression - *filtering is required . for example : tag (c1, c2, c3) index on tag (c1, c2) - * hint : lookup index where c1 == 1 and c2 == 1 yield c3 - **/ -template -std::unique_ptr> LookupBaseProcessor::buildPlanWithData( - nebula::DataSet* result, const cpp2::IndexQueryContext& ctx, StoragePlan& plan) { - auto indexId = ctx.get_index_id(); - auto colHints = ctx.get_column_hints(); - - auto indexScan = - std::make_unique>(context_.get(), indexId, std::move(colHints)); - if (context_->isEdge()) { - auto edge = std::make_unique>( - context_.get(), indexScan.get(), schemas_, context_->edgeName_, limit_); - edge->addDependency(indexScan.get()); - auto output = std::make_unique>(result, context_.get(), edge.get()); - output->addDependency(edge.get()); - plan.addNode(std::move(indexScan)); - plan.addNode(std::move(edge)); - return output; - } else { - auto vertex = std::make_unique>( - context_.get(), indexScan.get(), schemas_, context_->tagName_, limit_); - vertex->addDependency(indexScan.get()); - auto output = std::make_unique>(result, context_.get(), vertex.get()); - output->addDependency(vertex.get()); - plan.addNode(std::move(indexScan)); - plan.addNode(std::move(vertex)); - return output; - } -} - -/** - * - * +----------+-----------+ - * + IndexOutputNode + - * +----------+-----------+ - * | - * +----------+-----------+ - * + IndexFilterNode + - * +----------+-----------+ - * | - * +----------+-----------+ - * + IndexScanNode + - * +----------+-----------+ - * - * If have not non-indexed column appears in the YIELD clause, and expression - *filtering is required . for example : tag (c1, c2, c3) index on tag (c1, c2) - * hint : lookup index where c1 > 1 and c2 > 1 - **/ -template -std::unique_ptr> LookupBaseProcessor::buildPlanWithFilter( - nebula::DataSet* result, - const cpp2::IndexQueryContext& ctx, - StoragePlan& plan, - StorageExpressionContext* exprCtx, - Expression* exp) { - auto indexId = ctx.get_index_id(); - auto colHints = ctx.get_column_hints(); - - auto indexScan = - std::make_unique>(context_.get(), indexId, std::move(colHints)); - - auto filter = std::make_unique>( - context_.get(), indexScan.get(), exprCtx, exp, context_->isEdge(), limit_); - filter->addDependency(indexScan.get()); - auto output = - std::make_unique>(result, context_.get(), filter.get(), true); - output->addDependency(filter.get()); - plan.addNode(std::move(indexScan)); - plan.addNode(std::move(filter)); - return output; -} - -/** - * - * +----------+-----------+ - * + IndexOutputNode + - * +----------+-----------+ - * | - * +----------+-----------+ - * + IndexFilterNode + - * +----------+-----------+ - * | - * +----------------+-----------------+ - * + IndexEdgeNode or IndexVertexNode + - * +----------------+-----------------+ - * | - * +----------+-----------+ - * + IndexScanNode + - * +----------+-----------+ - * - * If a non-indexed column appears in the WHERE clause or YIELD clause, - * and expression filtering is required . - * for example : - * tag (c1, c2, c3) - * index on tag (c1, c2) - * hint : lookup index where c1 == 1 and c2 == 1 and c3 > 1 yield c3 - * lookup index where c1 == 1 and c2 == 1 and c3 > 1 - * lookup index where c1 == 1 and c3 == 1 - **/ -template -std::unique_ptr> -LookupBaseProcessor::buildPlanWithDataAndFilter(nebula::DataSet* result, - const cpp2::IndexQueryContext& ctx, - StoragePlan& plan, - StorageExpressionContext* exprCtx, - Expression* exp) { - auto indexId = ctx.get_index_id(); - auto colHints = ctx.get_column_hints(); - - auto indexScan = - std::make_unique>(context_.get(), indexId, std::move(colHints)); - if (context_->isEdge()) { - auto edge = std::make_unique>( - context_.get(), indexScan.get(), schemas_, context_->edgeName_); - edge->addDependency(indexScan.get()); - auto filter = std::make_unique>( - context_.get(), edge.get(), exprCtx, exp, limit_); - filter->addDependency(edge.get()); - - auto output = std::make_unique>(result, context_.get(), filter.get()); - output->addDependency(filter.get()); - plan.addNode(std::move(indexScan)); - plan.addNode(std::move(edge)); - plan.addNode(std::move(filter)); - return output; - } else { - auto vertex = std::make_unique>( - context_.get(), indexScan.get(), schemas_, context_->tagName_); - vertex->addDependency(indexScan.get()); - auto filter = std::make_unique>( - context_.get(), vertex.get(), exprCtx, exp, limit_); - filter->addDependency(vertex.get()); - - auto output = std::make_unique>(result, context_.get(), filter.get()); - output->addDependency(filter.get()); - plan.addNode(std::move(indexScan)); - plan.addNode(std::move(vertex)); - plan.addNode(std::move(filter)); - return output; - } -} -template -void LookupBaseProcessor::profilePlan(StoragePlan& plan) { - auto& nodes = plan.getNodes(); - std::lock_guard lck(BaseProcessor::profileMut_); - for (auto& node : nodes) { - BaseProcessor::profileDetail(node->name_, node->duration_.elapsedInUSec()); - } -} - -} // namespace storage -} // namespace nebula diff --git a/src/storage/index/LookupBaseProcessor.h b/src/storage/index/LookupBaseProcessor.h deleted file mode 100644 index dce3165b73b..00000000000 --- a/src/storage/index/LookupBaseProcessor.h +++ /dev/null @@ -1,92 +0,0 @@ -/* Copyright (c) 2020 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ - -#ifndef STORAGE_QUERY_LOOKUPBASEPROCESSOR_H_ -#define STORAGE_QUERY_LOOKUPBASEPROCESSOR_H_ - -#include "common/base/Base.h" -#include "storage/BaseProcessor.h" -#include "storage/exec/AggregateNode.h" -#include "storage/exec/DeDupNode.h" -#include "storage/exec/IndexEdgeNode.h" -#include "storage/exec/IndexFilterNode.h" -#include "storage/exec/IndexOutputNode.h" -#include "storage/exec/IndexScanNode.h" -#include "storage/exec/IndexVertexNode.h" -#include "storage/exec/StoragePlan.h" - -namespace nebula { -namespace storage { -using IndexFilterItem = - std::unordered_map, Expression*>>; - -template -class LookupBaseProcessor : public BaseProcessor { - public: - virtual ~LookupBaseProcessor() = default; - - virtual void process(const REQ& req) = 0; - - protected: - LookupBaseProcessor(StorageEnv* env, - const ProcessorCounters* counters, - folly::Executor* executor = nullptr) - : BaseProcessor(env, counters), executor_(executor) {} - - virtual void onProcessFinished() = 0; - - nebula::cpp2::ErrorCode requestCheck(const cpp2::LookupIndexRequest& req); - - bool isOutsideIndex(Expression* filter, const meta::cpp2::IndexItem* index); - - StatusOr> buildPlan(IndexFilterItem* filterItem, nebula::DataSet* result); - - std::unique_ptr> buildPlanBasic( - nebula::DataSet* result, - const cpp2::IndexQueryContext& ctx, - StoragePlan& plan, - bool hasNullableCol, - const std::vector& fields); - - std::unique_ptr> buildPlanWithData(nebula::DataSet* result, - const cpp2::IndexQueryContext& ctx, - StoragePlan& plan); - - std::unique_ptr> buildPlanWithFilter(nebula::DataSet* result, - const cpp2::IndexQueryContext& ctx, - StoragePlan& plan, - StorageExpressionContext* exprCtx, - Expression* exp); - - std::unique_ptr> buildPlanWithDataAndFilter( - nebula::DataSet* result, - const cpp2::IndexQueryContext& ctx, - StoragePlan& plan, - StorageExpressionContext* exprCtx, - Expression* exp); - - void profilePlan(StoragePlan& plan); - - protected: - GraphSpaceID spaceId_; - std::unique_ptr planContext_; - std::unique_ptr context_; - folly::Executor* executor_{nullptr}; - nebula::DataSet resultDataSet_; - std::vector partResults_; - std::vector indexContexts_{}; - std::vector yieldCols_{}; - std::vector filterItems_; - // Save schemas when column is out of index, need to read from data - std::vector> schemas_; - std::vector deDupColPos_; - int64_t limit_ = -1; -}; - -} // namespace storage -} // namespace nebula - -#include "storage/index/LookupBaseProcessor-inl.h" -#endif // STORAGE_QUERY_LOOKUPBASEPROCESSOR_H_ diff --git a/src/storage/index/LookupProcessor.cpp b/src/storage/index/LookupProcessor.cpp index 81e0ddc4ece..272bfead534 100644 --- a/src/storage/index/LookupProcessor.cpp +++ b/src/storage/index/LookupProcessor.cpp @@ -1,17 +1,28 @@ -/* Copyright (c) 2020 vesoft inc. All rights reserved. +/* Copyright (c) 2021 vesoft inc. All rights reserved. * * This source code is licensed under Apache 2.0 License. */ - #include "storage/index/LookupProcessor.h" -#include "storage/exec/DeDupNode.h" +#include +#include +#include "folly/Likely.h" +#include "interface/gen-cpp2/common_types.tcc" +#include "interface/gen-cpp2/meta_types.tcc" +#include "interface/gen-cpp2/storage_types.tcc" +#include "storage/exec/IndexDedupNode.h" +#include "storage/exec/IndexEdgeScanNode.h" +#include "storage/exec/IndexLimitNode.h" +#include "storage/exec/IndexNode.h" +#include "storage/exec/IndexProjectionNode.h" +#include "storage/exec/IndexSelectionNode.h" +#include "storage/exec/IndexVertexScanNode.h" namespace nebula { namespace storage { - ProcessorCounters kLookupCounters; - +// print Plan for debug +inline void printPlan(IndexNode* node, int tab = 0); void LookupProcessor::process(const cpp2::LookupIndexRequest& req) { if (executor_ != nullptr) { executor_->add([req, this]() { this->doProcess(req); }); @@ -21,125 +32,257 @@ void LookupProcessor::process(const cpp2::LookupIndexRequest& req) { } void LookupProcessor::doProcess(const cpp2::LookupIndexRequest& req) { - auto retCode = requestCheck(req); - if (limit_ == 0) { - onProcessFinished(); - onFinished(); - return; - } if (req.common_ref().has_value() && req.get_common()->profile_detail_ref().value_or(false)) { profileDetailFlag_ = true; } - if (retCode != nebula::cpp2::ErrorCode::SUCCEEDED) { + auto code = prepare(req); + if (UNLIKELY(code != ::nebula::cpp2::ErrorCode::SUCCEEDED)) { for (auto& p : req.get_parts()) { - pushResultCode(retCode, p); + pushResultCode(code, p); } onFinished(); return; } + auto plan = buildPlan(req); - // todo(doodle): specify by each query + if (UNLIKELY(profileDetailFlag_)) { + plan->enableProfileDetail(); + } + InitContext ctx; + code = plan->init(ctx); + if (UNLIKELY(code != ::nebula::cpp2::ErrorCode::SUCCEEDED)) { + for (auto& p : req.get_parts()) { + pushResultCode(code, p); + } + onFinished(); + return; + } if (!FLAGS_query_concurrently) { - runInSingleThread(req); + runInSingleThread(req.get_parts(), std::move(plan)); } else { - runInMultipleThread(req); + runInMultipleThread(req.get_parts(), std::move(plan)); } } +::nebula::cpp2::ErrorCode LookupProcessor::prepare(const cpp2::LookupIndexRequest& req) { + auto retCode = this->getSpaceVidLen(req.get_space_id()); + if (UNLIKELY(retCode != ::nebula::cpp2::ErrorCode::SUCCEEDED)) { + return retCode; + } + planContext_ = std::make_unique( + this->env_, req.get_space_id(), this->spaceVidLen_, this->isIntId_, req.common_ref()); + planContext_->isEdge_ = + req.get_indices().get_schema_id().getType() == nebula::cpp2::SchemaID::Type::edge_type; + context_ = std::make_unique(this->planContext_.get()); + std::string schemaName; + if (planContext_->isEdge_) { + auto edgeType = req.get_indices().get_schema_id().get_edge_type(); + auto schemaNameValue = env_->schemaMan_->toEdgeName(req.get_space_id(), edgeType); + if (!schemaNameValue.ok()) { + return ::nebula::cpp2::ErrorCode::E_EDGE_NOT_FOUND; + } + schemaName = schemaNameValue.value(); + context_->edgeType_ = edgeType; + } else { + auto tagId = req.get_indices().get_schema_id().get_tag_id(); + auto schemaNameValue = env_->schemaMan_->toTagName(req.get_space_id(), tagId); + if (!schemaNameValue.ok()) { + return ::nebula::cpp2::ErrorCode::E_TAG_NOT_FOUND; + } + schemaName = schemaNameValue.value(); + context_->tagId_ = tagId; + } + std::vector colNames; + for (auto& col : *req.get_return_columns()) { + colNames.emplace_back(schemaName + "." + col); + } + resultDataSet_ = ::nebula::DataSet(colNames); + return ::nebula::cpp2::ErrorCode::SUCCEEDED; +} -void LookupProcessor::runInSingleThread(const cpp2::LookupIndexRequest& req) { - filterItems_.emplace_back(IndexFilterItem()); - auto plan = buildPlan(&filterItems_.front(), &resultDataSet_); - if (!plan.ok()) { - for (auto& p : req.get_parts()) { - pushResultCode(nebula::cpp2::ErrorCode::E_INDEX_NOT_FOUND, p); +std::unique_ptr LookupProcessor::buildPlan(const cpp2::LookupIndexRequest& req) { + std::vector> nodes; + for (auto& ctx : req.get_indices().get_contexts()) { + auto node = buildOneContext(ctx); + nodes.emplace_back(std::move(node)); + } + for (size_t i = 0; i < nodes.size(); i++) { + auto projection = + std::make_unique(context_.get(), *req.get_return_columns()); + projection->addChild(std::move(nodes[i])); + nodes[i] = std::move(projection); + } + if (nodes.size() > 1) { + std::vector dedupColumn; + if (context_->isEdge()) { + dedupColumn = std::vector{kSrc, kRank, kDst}; + } else { + dedupColumn = std::vector{kVid}; } - onFinished(); - return; + auto dedup = std::make_unique(context_.get(), dedupColumn); + for (auto& node : nodes) { + dedup->addChild(std::move(node)); + } + nodes.clear(); + nodes[0] = std::move(dedup); } + if (req.limit_ref().has_value()) { + auto limit = *req.get_limit(); + auto node = std::make_unique(context_.get(), limit); + node->addChild(std::move(nodes[0])); + nodes[0] = std::move(node); + } + return std::move(nodes[0]); +} - std::unordered_set failedParts; - for (const auto& partId : req.get_parts()) { - auto ret = plan.value().go(partId); - if (ret != nebula::cpp2::ErrorCode::SUCCEEDED) { - if (failedParts.find(partId) == failedParts.end()) { - failedParts.emplace(partId); - handleErrorCode(ret, spaceId_, partId); +std::unique_ptr LookupProcessor::buildOneContext(const cpp2::IndexQueryContext& ctx) { + std::unique_ptr node; + DLOG(INFO) << ctx.get_column_hints().size(); + DLOG(INFO) << &ctx.get_column_hints(); + DLOG(INFO) << ::apache::thrift::SimpleJSONSerializer::serialize(ctx); + if (context_->isEdge()) { + node = std::make_unique( + context_.get(), ctx.get_index_id(), ctx.get_column_hints(), context_->env()->kvstore_); + } else { + node = std::make_unique( + context_.get(), ctx.get_index_id(), ctx.get_column_hints(), context_->env()->kvstore_); + } + if (ctx.filter_ref().is_set() && !ctx.get_filter().empty()) { + auto expr = Expression::decode(context_->objPool(), *ctx.filter_ref()); + auto filterNode = std::make_unique(context_.get(), expr); + filterNode->addChild(std::move(node)); + node = std::move(filterNode); + } + return node; +} + +void LookupProcessor::runInSingleThread(const std::vector& parts, + std::unique_ptr plan) { + // printPlan(plan.get()); + std::vector> datasetList; + std::vector<::nebula::cpp2::ErrorCode> codeList; + for (auto part : parts) { + DLOG(INFO) << "execute part:" << part; + plan->execute(part); + ::nebula::cpp2::ErrorCode code = ::nebula::cpp2::ErrorCode::SUCCEEDED; + decltype(datasetList)::value_type dataset; + do { + auto result = plan->next(); + if (!result.success()) { + code = result.code(); + break; + } + if (result.hasData()) { + dataset.emplace_back(std::move(result).row()); + } else { + break; } + } while (true); + datasetList.emplace_back(std::move(dataset)); + codeList.emplace_back(code); + } + for (size_t i = 0; i < datasetList.size(); i++) { + if (codeList[i] == ::nebula::cpp2::ErrorCode::SUCCEEDED) { + while (!datasetList[i].empty()) { + resultDataSet_.emplace_back(std::move(datasetList[i].front())); + datasetList[i].pop_front(); + } + } else { + DLOG(INFO) << int(codeList[i]); + handleErrorCode(codeList[i], context_->spaceId(), parts[i]); } } if (UNLIKELY(profileDetailFlag_)) { - profilePlan(plan.value()); + profilePlan(plan.get()); } onProcessFinished(); onFinished(); } -void LookupProcessor::runInMultipleThread(const cpp2::LookupIndexRequest& req) { - // As for lookup, once requestCheck is done, the info in RunTimeContext won't - // be changed anymore. So we only use one RunTimeContext, could make it per - // partition later if necessary. - for (size_t i = 0; i < req.get_parts().size(); i++) { - nebula::DataSet result = resultDataSet_; - partResults_.emplace_back(std::move(result)); - filterItems_.emplace_back(IndexFilterItem()); - } - size_t i = 0; - std::vector>> futures; - for (const auto& partId : req.get_parts()) { - futures.emplace_back(runInExecutor(&filterItems_[i], &partResults_[i], partId)); - i++; +void LookupProcessor::runInMultipleThread(const std::vector& parts, + std::unique_ptr plan) { + std::vector> planCopy = reproducePlan(plan.get(), parts.size()); + using ReturnType = std::tuple>; + std::vector> futures; + for (size_t i = 0; i < parts.size(); i++) { + futures.emplace_back(folly::via( + executor_, [this, plan = std::move(planCopy[i]), part = parts[i]]() -> ReturnType { + ::nebula::cpp2::ErrorCode code = ::nebula::cpp2::ErrorCode::SUCCEEDED; + std::deque dataset; + plan->execute(part); + do { + auto result = plan->next(); + if (!result.success()) { + code = result.code(); + break; + } + if (result.hasData()) { + dataset.emplace_back(std::move(result).row()); + } else { + break; + } + } while (true); + if (UNLIKELY(profileDetailFlag_)) { + profilePlan(plan.get()); + } + return {part, code, dataset}; + })); } - - folly::collectAll(futures).via(executor_).thenTry([this](auto&& t) mutable { + folly::collectAll(futures).via(executor_).thenTry([this](auto&& t) { CHECK(!t.hasException()); const auto& tries = t.value(); for (size_t j = 0; j < tries.size(); j++) { CHECK(!tries[j].hasException()); - const auto& [code, partId] = tries[j].value(); - if (code != nebula::cpp2::ErrorCode::SUCCEEDED) { - handleErrorCode(code, spaceId_, partId); + auto& [partId, code, dataset] = tries[j].value(); + if (code == ::nebula::cpp2::ErrorCode::SUCCEEDED) { + for (auto& row : dataset) { + resultDataSet_.emplace_back(std::move(row)); + } } else { - resultDataSet_.append(std::move(partResults_[j])); + handleErrorCode(code, context_->spaceId(), partId); } } - // when run each part concurrently, we need to dedup again. - if (!deDupColPos_.empty()) { - DeDupNode::dedup(resultDataSet_.rows, deDupColPos_); - } + DLOG(INFO) << "finish"; this->onProcessFinished(); this->onFinished(); }); } - -folly::Future> LookupProcessor::runInExecutor( - IndexFilterItem* filterItem, nebula::DataSet* result, PartitionID partId) { - return folly::via(executor_, [this, filterItem, result, partId]() { - auto plan = buildPlan(filterItem, result); - if (!plan.ok()) { - return std::make_pair(nebula::cpp2::ErrorCode::E_INDEX_NOT_FOUND, partId); +std::vector> LookupProcessor::reproducePlan(IndexNode* root, + size_t count) { + std::vector> ret(count); + for (size_t i = 0; i < count; i++) { + ret[i] = root->copy(); + DLOG(INFO) << ret[i].get(); + } + for (auto& child : root->children()) { + auto childPerPlan = reproducePlan(child.get(), count); + for (size_t i = 0; i < count; i++) { + ret[i]->addChild(std::move(childPerPlan[i])); } - auto ret = plan.value().go(partId); - if (UNLIKELY(this->profileDetailFlag_)) { - profilePlan(plan.value()); + } + return ret; +} +void LookupProcessor::profilePlan(IndexNode* root) { + std::queue q; + q.push(root); + while (!q.empty()) { + auto node = q.front(); + q.pop(); + auto id = node->identify(); + auto iter = profileDetail_.find(id); + if (iter == profileDetail_.end()) { + profileDetail_[id] = node->duration().elapsedInUSec(); + } else { + iter->second += node->duration().elapsedInUSec(); } - return std::make_pair(ret, partId); - }); + for (auto& child : node->children()) { + q.push(child.get()); + } + } } - -void LookupProcessor::onProcessFinished() { - if (context_->isEdge()) { - std::transform(resultDataSet_.colNames.begin(), - resultDataSet_.colNames.end(), - resultDataSet_.colNames.begin(), - [this](const auto& col) { return context_->edgeName_ + "." + col; }); - } else { - std::transform(resultDataSet_.colNames.begin(), - resultDataSet_.colNames.end(), - resultDataSet_.colNames.begin(), - [this](const auto& col) { return context_->tagName_ + "." + col; }); +inline void printPlan(IndexNode* node, int tab) { + for (auto& child : node->children()) { + printPlan(child.get(), tab + 1); } - resp_.set_data(std::move(resultDataSet_)); } - } // namespace storage } // namespace nebula diff --git a/src/storage/index/LookupProcessor.h b/src/storage/index/LookupProcessor.h index cd0b6fad795..1751f8f1c27 100644 --- a/src/storage/index/LookupProcessor.h +++ b/src/storage/index/LookupProcessor.h @@ -1,47 +1,45 @@ -/* Copyright (c) 2020 vesoft inc. All rights reserved. +/* Copyright (c) 2021 vesoft inc. All rights reserved. * * This source code is licensed under Apache 2.0 License. */ - -#ifndef STORAGE_QUERY_LOOKUP_H_ -#define STORAGE_QUERY_LOOKUP_H_ - +#pragma once #include "common/base/Base.h" -#include "storage/index/LookupBaseProcessor.h" - +#include "common/base/ErrorOr.h" +#include "interface/gen-cpp2/storage_types.h" +#include "storage/BaseProcessor.h" +#include "storage/exec/IndexNode.h" namespace nebula { namespace storage { - extern ProcessorCounters kLookupCounters; -class LookupProcessor - : public LookupBaseProcessor { +class LookupProcessor : public BaseProcessor { public: static LookupProcessor* instance(StorageEnv* env, const ProcessorCounters* counters = &kLookupCounters, folly::Executor* executor = nullptr) { return new LookupProcessor(env, counters, executor); } - - void process(const cpp2::LookupIndexRequest& req) override; - - protected: - LookupProcessor(StorageEnv* env, const ProcessorCounters* counters, folly::Executor* executor) - : LookupBaseProcessor( - env, counters, executor) {} - - void onProcessFinished() override; + void process(const cpp2::LookupIndexRequest& req); private: - void runInSingleThread(const cpp2::LookupIndexRequest& req); - void runInMultipleThread(const cpp2::LookupIndexRequest& req); - - folly::Future> runInExecutor( - IndexFilterItem* filterItem, nebula::DataSet* result, PartitionID partId); - + LookupProcessor(StorageEnv* env, const ProcessorCounters* counters, folly::Executor* executor) + : BaseProcessor(env, counters), executor_(executor) {} void doProcess(const cpp2::LookupIndexRequest& req); + void onProcessFinished() { + BaseProcessor::resp_.set_data(std::move(resultDataSet_)); + } + void profilePlan(IndexNode* plan); + void runInSingleThread(const std::vector& parts, std::unique_ptr plan); + void runInMultipleThread(const std::vector& parts, std::unique_ptr plan); + ::nebula::cpp2::ErrorCode prepare(const cpp2::LookupIndexRequest& req); + std::unique_ptr buildPlan(const cpp2::LookupIndexRequest& req); + std::unique_ptr buildOneContext(const cpp2::IndexQueryContext& ctx); + std::vector> reproducePlan(IndexNode* root, size_t count); + folly::Executor* executor_{nullptr}; + std::unique_ptr planContext_; + std::unique_ptr context_; + nebula::DataSet resultDataSet_; + std::vector partResults_; }; - } // namespace storage } // namespace nebula -#endif // STORAGE_QUERY_LOOKUP_H_ diff --git a/src/storage/test/CMakeLists.txt b/src/storage/test/CMakeLists.txt index d976ab2b209..c664696de53 100644 --- a/src/storage/test/CMakeLists.txt +++ b/src/storage/test/CMakeLists.txt @@ -708,6 +708,20 @@ nebula_add_test( gtest ) +nebula_add_test( + NAME + index_test + SOURCES + IndexTest.cpp + OBJECTS + ${storage_test_deps} + LIBRARIES + ${ROCKSDB_LIBRARIES} + ${THRIFT_LIBRARIES} + ${PROXYGEN_LIBRARIES} + gtest +) + nebula_add_executable( NAME chain_update_edge_test diff --git a/src/storage/test/IndexScanTest.cpp b/src/storage/test/IndexScanTest.cpp index 3e17741930f..59d5df1ef6f 100644 --- a/src/storage/test/IndexScanTest.cpp +++ b/src/storage/test/IndexScanTest.cpp @@ -76,7 +76,8 @@ static std::string genEdgeIndexKey(meta::SchemaManager* schemaMan, VertexID dst) { auto reader = RowReaderWrapper::getEdgePropReader(schemaMan, prop, spaceId, type); auto values = collectIndexValues(reader.get(), index->get_fields()); - auto indexKey = NebulaKeyUtils::edgeIndexKey(partId, index->get_index_id(), src, 0, dst, values); + auto indexKey = + NebulaKeyUtils::edgeIndexKeys(partId, index->get_index_id(), src, 0, dst, values)[0]; return indexKey; } diff --git a/src/storage/test/IndexTest.cpp b/src/storage/test/IndexTest.cpp new file mode 100644 index 00000000000..6c4e34ad122 --- /dev/null +++ b/src/storage/test/IndexTest.cpp @@ -0,0 +1,1937 @@ +/* Copyright (c) 2018 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include + +#include +#include + +#include "codec/RowReaderWrapper.h" +#include "codec/RowWriterV2.h" +#include "common/base/ObjectPool.h" +#include "common/expression/ConstantExpression.h" +#include "common/expression/PropertyExpression.h" +#include "common/expression/RelationalExpression.h" +#include "common/utils/NebulaKeyUtils.h" +#include "kvstore/KVEngine.h" +#include "kvstore/KVIterator.h" +#include "storage/exec/IndexDedupNode.h" +#include "storage/exec/IndexEdgeScanNode.h" +#include "storage/exec/IndexLimitNode.h" +#include "storage/exec/IndexNode.h" +#include "storage/exec/IndexProjectionNode.h" +#include "storage/exec/IndexSelectionNode.h" +#include "storage/exec/IndexVertexScanNode.h" +#include "storage/test/IndexTestUtil.h" +namespace nebula { +namespace storage { +namespace { +int schemaVer = 2; +using std::string_literals::operator""s; +} // namespace +/** + * IndexScanTest + * + * Test: + * 1. Vertex/Edge + * 2. back to table or not + * 3. different value type + * a. int/float/bool/fix_string/time/date/datetime + * b. compound index + * 4. range/prefix + * a. prefix(equal) + * b. range with begin is include/exclude/-INF + * c. range with end id include/exclude/+INF + * 5. nullable + * 6. multiPart + * Case: + * ┌────────────┬───────────┬───────────────┬─────────────────────────┬─────────┐ + * │ section1 │ name │ case │ description │ NOTICE │ + * ├────────────┼───────────┼───────────────┼─────────────────────────┼─────────┤ + * | Base | Base | | | | + * | ├───────────┼───────────────┼─────────────────────────┼─────────┤ + * | | Vertex | IndexOnly | | | + * | | Edge | BackToTable | | | + * | ├───────────┼───────────────┼─────────────────────────┼─────────┤ + * | | MultiPart | | | | + * ├────────────┼───────────┼───────────────┼─────────────────────────┼─────────┤ + * | Value Type | Int | Truncate | Test different interval | | + * | | Float | NoTruncate | with each type of Value | | + * | | Bool | INCLUDE_BEGIN | | | + * | | String | INCLUDE_END | | | + * | | Time | EXCLUDE_BEGIN | | | + * | | Date | EXCLUDE_END | | | + * | | DateTime | POSITIVE_INF | | | + * | | Compound | NEGATIVE_INF | | | + * | | Nullable | | | | + * | | Geography | | | | + * └────────────┴───────────┴───────────────┴─────────────────────────┴─────────┘ + * + * ┌─┬┐ + * │ ││ + * ├─┼┤ + * └─┴┘ + */ +class IndexScanTestHelper { + public: + void setIndex(IndexVertexScanNode* node, std::shared_ptr<::nebula::meta::cpp2::IndexItem> index) { + node->getIndex = [index](std::shared_ptr<::nebula::meta::cpp2::IndexItem>& ret) { + ret = index; + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + }; + } + void setIndex(IndexEdgeScanNode* node, std::shared_ptr<::nebula::meta::cpp2::IndexItem> index) { + node->getIndex = [index](std::shared_ptr<::nebula::meta::cpp2::IndexItem>& ret) { + ret = index; + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + }; + } + void setTag(IndexVertexScanNode* node, + std::shared_ptr<::nebula::meta::NebulaSchemaProvider> schema) { + node->getTag = [schema](IndexVertexScanNode::TagSchemas& tag) { + tag = std::vector>{schema}; + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + }; + } + void setEdge(IndexEdgeScanNode* node, + std::shared_ptr<::nebula::meta::NebulaSchemaProvider> schema) { + node->getEdge = [schema](IndexEdgeScanNode::EdgeSchemas& edge) { + edge = std::vector>{schema}; + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + }; + } + void setFatal(IndexScanNode* node, bool value) { node->fatalOnBaseNotFound_ = value; } +}; +class IndexScanTest : public ::testing::Test { + protected: + using Schema = ::nebula::meta::NebulaSchemaProvider; + using IndexItem = ::nebula::meta::cpp2::IndexItem; + using ColumnHint = ::nebula::storage::cpp2::IndexColumnHint; + static ColumnHint makeColumnHint(const std::string& name, const Value& value) { + ColumnHint hint; + hint.set_column_name(name); + hint.set_begin_value(value); + hint.set_scan_type(cpp2::ScanType::PREFIX); + return hint; + } + template + static ColumnHint makeColumnHint(const std::string& name, const Value& begin, const Value& end) { + ColumnHint hint; + hint.set_column_name(name); + hint.set_scan_type(cpp2::ScanType::RANGE); + hint.set_begin_value(begin); + hint.set_end_value(end); + hint.set_include_begin(includeBegin); + hint.set_include_end(includeEnd); + return hint; + } + template + static ColumnHint makeBeginColumnHint(const std::string& name, const Value& begin) { + ColumnHint hint; + hint.set_column_name(name); + hint.set_scan_type(cpp2::ScanType::RANGE); + hint.set_begin_value(begin); + hint.set_include_begin(include); + return hint; + } + template + static ColumnHint makeEndColumnHint(const std::string& name, const Value& end) { + ColumnHint hint; + hint.set_column_name(name); + hint.set_scan_type(cpp2::ScanType::RANGE); + hint.set_end_value(end); + hint.set_include_end(include); + return hint; + } + static std::vector> encodeTag( + const std::vector& rows, + TagID tagId, + std::shared_ptr schema, + std::vector> indices) { + std::vector> ret(indices.size() + 1); + for (size_t i = 0; i < rows.size(); i++) { + auto key = NebulaKeyUtils::vertexKey(8, 0, std::to_string(i), tagId); + RowWriterV2 writer(schema.get()); + for (size_t j = 0; j < rows[i].size(); j++) { + writer.setValue(j, rows[i][j]); + } + writer.finish(); + auto value = writer.moveEncodedStr(); + CHECK(ret[0].insert({key, value}).second); + RowReaderWrapper reader(schema.get(), folly::StringPiece(value), schemaVer); + for (size_t j = 0; j < indices.size(); j++) { + auto& index = indices[j]; + auto indexValue = IndexKeyUtils::collectIndexValues(&reader, index->get_fields()).value(); + auto indexKey = IndexKeyUtils::vertexIndexKeys( + 8, 0, index->get_index_id(), std::to_string(i), std::move(indexValue))[0]; + CHECK(ret[j + 1].insert({indexKey, ""}).second); + } + } + return ret; + } + static std::vector> encodeEdge( + const std::vector& rows, + EdgeType edgeType, + std::shared_ptr schema, + std::vector> indices) { + std::vector> ret(indices.size() + 1); + for (size_t i = 0; i < rows.size(); i++) { + auto key = NebulaKeyUtils::edgeKey(8, 0, std::to_string(i), edgeType, i, std::to_string(i)); + RowWriterV2 writer(schema.get()); + for (size_t j = 0; j < rows[i].size(); j++) { + writer.setValue(j, rows[i][j]); + } + writer.finish(); + auto value = writer.moveEncodedStr(); + CHECK(ret[0].insert({key, value}).second); + RowReaderWrapper reader(schema.get(), folly::StringPiece(value), schemaVer); + for (size_t j = 0; j < indices.size(); j++) { + auto& index = indices[j]; + auto indexValue = IndexKeyUtils::collectIndexValues(&reader, index->get_fields()).value(); + auto indexKey = IndexKeyUtils::edgeIndexKeys(8, + 0, + index->get_index_id(), + std::to_string(i), + i, + std::to_string(i), + std::move(indexValue))[0]; + CHECK(ret[j + 1].insert({indexKey, ""}).second); + } + } + return ret; + } + static PlanContext* getPlanContext() { + static std::unique_ptr ctx = std::make_unique(nullptr, 0, 8, false); + return ctx.get(); + } + static std::unique_ptr makeContext(TagID tagId, EdgeType edgeType) { + auto ctx = std::make_unique(getPlanContext()); + ctx->tagId_ = tagId; + ctx->edgeType_ = edgeType; + return ctx; + } +}; +TEST_F(IndexScanTest, Base) { + auto rows = R"( + int | int + 1 | 2 + 1 | 3 + )"_row; + auto schema = R"( + a | int ||false + b | int ||false + )"_schema; + auto indices = R"( + TAG(t,1) + (i1,2):a + (i2,3):b + )"_index(schema); + auto kv = encodeTag(rows, 1, schema, indices); + auto kvstore = std::make_unique(); + for (auto& iter : kv) { + for (auto& item : iter) { + kvstore->put(item.first, item.second); + } + } + { // Case 1 + std::vector columnHints{ + makeColumnHint("a", Value(1)) // a=1 + }; + IndexID indexId = 2; + auto context = makeContext(1, 0); + auto scanNode = + std::make_unique(context.get(), indexId, columnHints, kvstore.get()); + IndexScanTestHelper helper; + helper.setIndex(scanNode.get(), indices[0]); + helper.setTag(scanNode.get(), schema); + InitContext initCtx; + initCtx.requiredColumns = {kVid, "a"}; + scanNode->init(initCtx); + scanNode->execute(0); + std::vector result; + while (true) { + auto res = scanNode->next(); + ASSERT(res.success()); + if (!res.hasData()) { + break; + } + result.emplace_back(std::move(res).row()); + } + auto expect = R"( + string | int + 0 | 1 + 1 | 1 + )"_row; + std::vector colOrder = {kVid, "a"}; + ASSERT_EQ(result.size(), expect.size()); + for (size_t i = 0; i < result.size(); i++) { + ASSERT_EQ(result[i].size(), expect[i].size()); + for (size_t j = 0; j < expect[i].size(); j++) { + ASSERT_EQ(expect[i][j], result[i][initCtx.retColMap[colOrder[j]]]); + } + } + } // End of Case 1 + { // Case 2 + std::vector columnHints{ + makeColumnHint("b", Value(3)) // b=3 + }; + IndexID indexId = 3; + auto context = makeContext(1, 0); + auto scanNode = + std::make_unique(context.get(), indexId, columnHints, kvstore.get()); + IndexScanTestHelper helper; + helper.setIndex(scanNode.get(), indices[1]); + helper.setTag(scanNode.get(), schema); + InitContext initCtx; + initCtx.requiredColumns = {kVid, "b"}; + scanNode->init(initCtx); + scanNode->execute(0); + + std::vector result; + while (true) { + auto res = scanNode->next(); + ASSERT(res.success()); + if (!res.hasData()) { + break; + } + result.emplace_back(std::move(res).row()); + } + auto expect = R"( + string | int + 1 | 3 + )"_row; + std::vector colOrder = {kVid, "b"}; + ASSERT_EQ(result.size(), expect.size()); + for (size_t i = 0; i < result.size(); i++) { + ASSERT_EQ(result[i].size(), expect[i].size()); + for (size_t j = 0; j < expect[i].size(); j++) { + ASSERT_EQ(expect[i][j], result[i][initCtx.retColMap[colOrder[j]]]); + } + } + } // End of Case 2 +} +TEST_F(IndexScanTest, Vertex) { + auto rows = R"( + int | int + 1 | 2 + 1 | 3 + )"_row; + auto schema = R"( + a | int | | false + b | int | | false + )"_schema; + auto indices = R"( + TAG(t,1) + (i1,2):a + )"_index(schema); + auto kv = encodeTag(rows, 1, schema, indices); + auto kvstore = std::make_unique(); + std::vector columnHints{ + makeColumnHint("a", Value(1)) // a=1 + }; + IndexID indexId = 0; + auto context = makeContext(1, 0); + { // Case 1: IndexOnly + // Only put index key-values into kvstore + for (auto& item : kv[1]) { + kvstore->put(item.first, item.second); + } + auto scanNode = + std::make_unique(context.get(), indexId, columnHints, kvstore.get()); + IndexScanTestHelper helper; + helper.setIndex(scanNode.get(), indices[0]); + helper.setTag(scanNode.get(), schema); + InitContext initCtx; + initCtx.requiredColumns = {kVid, "a"}; + scanNode->init(initCtx); + scanNode->execute(0); + + std::vector result; + while (true) { + auto res = scanNode->next(); + ASSERT(res.success()); + if (!res.hasData()) { + break; + } + result.emplace_back(std::move(res).row()); + } + auto expect = R"( + string | int + 0 | 1 + 1 | 1 + )"_row; + std::vector colOrder = {kVid, "a"}; + ASSERT_EQ(result.size(), expect.size()); + for (size_t i = 0; i < result.size(); i++) { + ASSERT_EQ(result[i].size(), expect[i].size()); + for (size_t j = 0; j < expect[i].size(); j++) { + ASSERT_EQ(expect[i][j], result[i][initCtx.retColMap[colOrder[j]]]); + } + } + } // End of Case 1 + { // Case 2: Access base data + // Put base data key-values into kvstore + for (auto& item : kv[0]) { + kvstore->put(item.first, item.second); + } + auto scanNode = + std::make_unique(context.get(), indexId, columnHints, kvstore.get()); + IndexScanTestHelper helper; + helper.setIndex(scanNode.get(), indices[0]); + helper.setTag(scanNode.get(), schema); + InitContext initCtx; + initCtx.requiredColumns = {kVid, "b"}; + scanNode->init(initCtx); + scanNode->execute(0); + + std::vector result; + while (true) { + auto res = scanNode->next(); + ASSERT(res.success()); + if (!res.hasData()) { + break; + } + result.emplace_back(std::move(res).row()); + } + auto expect = R"( + string | int + 0 | 2 + 1 | 3 + )"_row; + std::vector colOrder = {kVid, "b"}; + ASSERT_EQ(result.size(), expect.size()); + for (size_t i = 0; i < result.size(); i++) { + ASSERT_EQ(result[i].size(), expect[i].size()); + VLOG(1) << result[i]; + for (size_t j = 0; j < expect[i].size(); j++) { + ASSERT_EQ(expect[i][j], result[i][initCtx.retColMap[colOrder[j]]]); + } + } + } // End of Case 2 +} +TEST_F(IndexScanTest, Edge) { + auto rows = R"( + int | int | int + 5 | 2 | 1 + 10 | 3 | 2 + 20 | 3 | 3 + )"_row; + auto schema = R"( + a | int | | false + b | int | | false + c | int | | false + )"_schema; + auto indices = R"( + EDGE(e,1) + (i1,2):b,c + )"_index(schema); + auto kv = encodeEdge(rows, 1, schema, indices); + auto kvstore = std::make_unique(); + std::vector columnHints{ + makeColumnHint("b", Value(3)), // b=3 + }; + IndexID indexId = 0; + auto context = makeContext(0, 1); + { // Case 1: IndexOnly + for (auto& item : kv[1]) { + kvstore->put(item.first, item.second); + } + auto scanNode = + std::make_unique(context.get(), indexId, columnHints, kvstore.get()); + IndexScanTestHelper helper; + helper.setIndex(scanNode.get(), indices[0]); + helper.setEdge(scanNode.get(), schema); + InitContext initCtx; + initCtx.requiredColumns = {kSrc, kRank, kDst, "c"}; + scanNode->init(initCtx); + scanNode->execute(0); + + std::vector result; + while (true) { + auto res = scanNode->next(); + ASSERT(res.success()); + if (!res.hasData()) { + break; + } + result.emplace_back(std::move(res).row()); + } + auto expect = R"( + string | int | string | int + 1 | 1 | 1 | 2 + 2 | 2 | 2 | 3 + )"_row; + std::vector colOrder = {kSrc, kRank, kDst, "c"}; + ASSERT_EQ(result.size(), expect.size()); + for (size_t i = 0; i < result.size(); i++) { + ASSERT_EQ(result[i].size(), expect[i].size()); + for (size_t j = 0; j < expect[i].size(); j++) { + EXPECT_EQ(expect[i][j], result[i][initCtx.retColMap[colOrder[j]]]); + } + } + } // End of Case 1 + { // Case 2: Access base data + for (auto& item : kv[0]) { + kvstore->put(item.first, item.second); + } + auto scanNode = + std::make_unique(context.get(), indexId, columnHints, kvstore.get()); + IndexScanTestHelper helper; + helper.setIndex(scanNode.get(), indices[0]); + helper.setEdge(scanNode.get(), schema); + InitContext initCtx; + initCtx.requiredColumns = {kSrc, kRank, kDst, "a"}; + scanNode->init(initCtx); + scanNode->execute(0); + + std::vector result; + while (true) { + auto res = scanNode->next(); + ASSERT(res.success()); + if (!res.hasData()) { + break; + } + result.emplace_back(std::move(res).row()); + } + auto expect = R"( + string | int | string | int + 1 | 1 | 1 | 10 + 2 | 2 | 2 | 20 + )"_row; + std::vector colOrder = {kSrc, kRank, kDst, "a"}; + ASSERT_EQ(result.size(), expect.size()); + for (size_t i = 0; i < result.size(); i++) { + ASSERT_EQ(result[i].size(), expect[i].size()); + for (size_t j = 0; j < expect[i].size(); j++) { + EXPECT_EQ(expect[i][j], result[i][initCtx.retColMap[colOrder[j]]]); + } + } + } +} +TEST_F(IndexScanTest, Int) { + auto rows = R"( + int | int | int + 1 | -1 | -10 + 2 | 1 | -9223372036854775808 + 3 | 0 | -1 + 4 | 9223372036854775807 | 0 + 5 | -9223372036854775808 | 9223372036854775807 + 6 | | 0 + )"_row; + auto schema = R"( + a | int | | false + b | int | | true + c | int | | false + )"_schema; + auto indices = R"( + TAG(t,1) + (i1,2):a + (i2,3):b + (i3,4):c + )"_index(schema); + auto kv = encodeTag(rows, 1, schema, indices); + auto kvstore = std::make_unique(); + for (auto& iter : kv) { + for (auto& item : iter) { + kvstore->put(item.first, item.second); + } + } + auto check = [&](std::shared_ptr index, + const std::vector& columnHints, + const std::vector& expect, + const std::string& case_) { + auto context = makeContext(1, 0); + auto scanNode = + std::make_unique(context.get(), 0, columnHints, kvstore.get()); + IndexScanTestHelper helper; + helper.setIndex(scanNode.get(), index); + helper.setTag(scanNode.get(), schema); + InitContext initCtx; + initCtx.requiredColumns = {kVid}; + scanNode->init(initCtx); + scanNode->execute(0); + + std::vector result; + while (true) { + auto res = scanNode->next(); + ASSERT(res.success()); + if (!res.hasData()) { + break; + } + result.emplace_back(std::move(res).row()); + } + EXPECT_EQ(result, expect) << "Fail at case " << case_; + }; + auto expect = [](auto... vidList) { + std::vector ret; + std::vector value; + (value.push_back(std::to_string(vidList)), ...); + for (auto& v : value) { + Row row; + row.emplace_back(v); + ret.emplace_back(std::move(row)); + } + return ret; + }; + const int64_t MAX = 0x7fffffffffffffff; + const int64_t MIN = -MAX - 1; + /* Case 1: Prefix */ + { + std::vector columnHints = {makeColumnHint("a", 1)}; // a=1; + check(indices[0], columnHints, expect(0), "case1.1"); // + columnHints = {makeColumnHint("b", MAX)}; // b=MAX + check(indices[1], columnHints, expect(3), "case1.2"); // + columnHints = {makeColumnHint("b", MIN)}; // b=MIN + check(indices[1], columnHints, expect(4), "case1.3"); // + columnHints = {makeColumnHint("c", 0)}; // c=0 + check(indices[2], columnHints, expect(3, 5), "case1.4"); // + } // End of Case 1 + /* Case 2: [x, INF) */ + { + std::vector columnHints = {makeBeginColumnHint("a", -1)}; // Case2.1: a >= -1 + check(indices[0], columnHints, expect(0, 1, 2, 3, 4, 5), "case2.1"); // + columnHints = {makeBeginColumnHint("a", 4)}; // Case2.2: a>=4 + check(indices[0], columnHints, expect(3, 4, 5), "case2.2"); // + columnHints = {makeBeginColumnHint("a", 7)}; // Case2.3: a>=7 + check(indices[0], columnHints, {}, "case2.3"); // + columnHints = {makeBeginColumnHint("b", MIN)}; // Case2.4: b>=INT_MIN + check(indices[1], columnHints, expect(4, 0, 2, 1, 3), "case2.4"); // + columnHints = {makeBeginColumnHint("b", MAX)}; // Case2.5: b>=INT_MAX + check(indices[1], columnHints, expect(3), "case2.5"); // + columnHints = {makeBeginColumnHint("b", 0)}; // Case2.6: b>=0 + check(indices[1], columnHints, expect(2, 1, 3), "case2.6"); // + columnHints = {makeBeginColumnHint("c", MIN)}; // Case2.7: c>=INT_MIN + check(indices[2], columnHints, expect(1, 0, 2, 3, 5, 4), "case2.7"); // + columnHints = {makeBeginColumnHint("c", MAX)}; // Case2.8: c>=INT_MAX + check(indices[2], columnHints, expect(4), "case2.8"); // + columnHints = {makeBeginColumnHint("c", 0)}; // Case2.9: c>=0 + check(indices[2], columnHints, expect(3, 5, 4), "case2.9"); // + } // End of Case 2 + /* Case 3: [x, y) */ + { + std::vector columnHints; // + columnHints = {makeColumnHint("a", -1, 10)}; // Case3.1: -1<=a<10 + check(indices[0], columnHints, expect(0, 1, 2, 3, 4, 5), "case3.1"); // + columnHints = {makeColumnHint("a", -100, 4)}; // Case3.2: -100<=a<4 + check(indices[0], columnHints, expect(0, 1, 2), "case3.2"); // + columnHints = {makeColumnHint("a", 4, 100)}; // Case3.3: 4<=a<100 + check(indices[0], columnHints, expect(3, 4, 5), "case3.3"); // + columnHints = {makeColumnHint("a", 2, 5)}; // Case3.4: 2<=a<5 + check(indices[0], columnHints, expect(1, 2, 3), "case3.4"); // + columnHints = {makeColumnHint("a", -100, 0)}; // Case3.5: -100<=a<0 + check(indices[0], columnHints, {}, "case3.5"); // + columnHints = {makeColumnHint("a", 10, 100)}; // Case3.6: 10<=a<100 + check(indices[0], columnHints, {}, "case3.6"); // + columnHints = {makeColumnHint("b", MIN, MAX)}; // Case3.7: MIN<=b("c", MIN, MAX)}; // Case3.8: MIN<=c columnHints; // + columnHints = {makeBeginColumnHint("a", 3)}; // Case 4.1: a>3 + check(indices[0], columnHints, expect(3, 4, 5), "case4.1"); // + columnHints = {makeBeginColumnHint("b", MIN)}; // Case 4.2: b>MIN + check(indices[1], columnHints, expect(0, 2, 1, 3), "case4.2"); // + columnHints = {makeBeginColumnHint("b", MAX)}; // Case4.3: b>MAX + check(indices[1], columnHints, {}, "case4.3"); // + columnHints = {makeBeginColumnHint("c", MIN)}; // Case4.4: c>MIN + check(indices[2], columnHints, expect(0, 2, 3, 5, 4), "case4.4"); // + columnHints = {makeBeginColumnHint("c", MAX - 1)}; // Case4.5: c>MAX-1 + check(indices[2], columnHints, expect(4), "case4.4"); // + } // End of Case 4 + /* Case 5: (x, y] */ + { + std::vector columnHints; // + columnHints = {makeColumnHint("a", 1, 6)}; // Case5.1: 1("a", 0, 3)}; // Case5.2: 0("b", MIN, MIN)}; // Case5.3: MIN("b", MAX, MAX)}; // Case5.4: MAX("b", 0, MAX)}; // Case5.5: 0("c", -1, MAX)}; // Case5.6: -1 columnHints; // + columnHints = {makeEndColumnHint("a", 4)}; // Case6.1: a<=4 + check(indices[0], columnHints, expect(0, 1, 2, 3), "case6.1"); // + columnHints = {makeEndColumnHint("a", 1)}; // Case6.2: a<=1 + check(indices[0], columnHints, expect(0), "case6.2"); // + columnHints = {makeEndColumnHint("b", MIN)}; // Case6.3: b<=MIN + check(indices[1], columnHints, expect(4), "case6.3"); // + columnHints = {makeEndColumnHint("b", MAX)}; // Case6.4: b<=MAX + check(indices[1], columnHints, expect(4, 0, 2, 1, 3), "casae6.4"); // + columnHints = {makeEndColumnHint("c", MIN)}; // Case6.5: c<=MIN + check(indices[2], columnHints, expect(1), "case6.5"); // + columnHints = {makeEndColumnHint("c", MAX)}; // Case6.6: c<=MAX + check(indices[2], columnHints, expect(1, 0, 2, 3, 5, 4), "case6.6"); // + } // End of Case 6 + /* Case 7: (-INF, y) */ + { + std::vector columnHints; // + columnHints = {makeEndColumnHint("a", 4)}; // Case7.1: a<4 + check(indices[0], columnHints, expect(0, 1, 2), "case7.1"); // + columnHints = {makeEndColumnHint("a", 1)}; // Case7.2: a<1 + check(indices[0], columnHints, {}, "case7.2"); // + columnHints = {makeEndColumnHint("b", MIN)}; // Case7.3: b("b", MAX)}; // Case7.4: b("c", MIN)}; // Case7.5: c("c", MAX)}; // Case7.6: c::infinity(); + auto rows = R"( +float | float | float | int +-100.0 | 0.0 | | 0 +-20.0 | -0.0 | | 1 +-5.0 | | 1.7976931348623157e+308 | 2 +-0.0 | <-INF> | 1.7976931348623157e+308 | 3 +0.0 | | | 4 +1.234e10 | <-NaN> | | 5 +5.0 | 4.9406564584124654e-324 | <-INF> | 6 +20.0 | -4.9406564584124654e-324 | <-INF> | 7 +100.0 | 2.2250738585072009e-308 | | 8 +1.2345e10 | -2.2250738585072009e-308 | | 9 +-7e-10 | 2.2250738585072014e-308 | <-NaN> | 10 +7e10 | -2.2250738585072014e-308 | <-NaN> | 11 +-7e10 | 1.7976931348623157e+308 | -0.0 | 12 +7e-10 | -1.7976931348623157e+308 | 0.0 | 13 + )"_row; + auto schema = R"( + a | double | | false + b | double | | false + c | double | | true + )"_schema; + auto indices = R"( + EDGE(e,1) + (i1,2):a + (i2,3):b + (i3,4):c + )"_index(schema); + auto kv = encodeEdge(rows, 1, schema, indices); + auto kvstore = std::make_unique(); + for (auto& iter : kv) { + for (auto& item : iter) { + kvstore->put(item.first, item.second); + } + } + auto check = [&](std::shared_ptr index, + const std::vector& columnHints, + const std::vector& expect, + const std::string& case_) { + auto context = makeContext(0, 1); + auto scanNode = + std::make_unique(context.get(), 0, columnHints, kvstore.get()); + IndexScanTestHelper helper; + helper.setIndex(scanNode.get(), index); + helper.setEdge(scanNode.get(), schema); + InitContext initCtx; + initCtx.requiredColumns = {kSrc}; + scanNode->init(initCtx); + scanNode->execute(0); + + std::vector result; + while (true) { + auto res = scanNode->next(); + ASSERT(res.success()); + if (!res.hasData()) { + break; + } + result.emplace_back(std::move(res).row()); + } + EXPECT_EQ(result, expect) << "Fail at case " << case_; + }; + auto expect = [](auto... vidList) { + std::vector ret; + std::vector value; + (value.push_back(std::to_string(vidList)), ...); + for (auto& v : value) { + Row row; + row.emplace_back(v); + ret.emplace_back(std::move(row)); + } + return ret; + }; + /* Case 1: prefix */ { + auto hint = [](const char* name, double value) { + return std::vector{makeColumnHint(name, value)}; + }; + check(indices[0], hint("a", 1000.0), {}, "case1.1"); // Case1.1: a=1000.0 + check(indices[0], hint("a", 0.0), expect(3, 4), "case1.2"); // Case1.2: a=0.0 + check(indices[1], hint("b", MAX_NV), expect(12), "case1.3"); // Case1.3: b=MAX_NV + check(indices[1], hint("b", MIN_NV), expect(10), "case1.4"); // Case1.4: b=MIN_NV + check(indices[1], hint("b", MAX_SV), expect(8), "case1.5"); // Case1.5: b=MAX_SV + check(indices[1], hint("b", MIN_SV), expect(6), "case1.6"); // Case1.6: b=MIN_SV + check(indices[1], hint("b", -MAX_NV), expect(13), "case1.7"); // Case1.7: b=-MAX_NV + check(indices[1], hint("b", -MIN_NV), expect(11), "case1.8"); // Case1.8: b=-MIN_NV + check(indices[1], hint("b", -MAX_SV), expect(9), "case1.9"); // Case1.9: b=-MAX_SV + check(indices[1], hint("b", -MIN_SV), expect(7), "case1.10"); // Case1.10 b=-MIN_SV + check(indices[1], hint("b", 0.0), expect(0, 1), "case1.11"); // Case1.11: b=0.0 + check(indices[1], hint("b", -0.0), expect(0, 1), "case1.12"); // Case1.12: b=-0.0 + check(indices[1], hint("b", INF), expect(2), "case1.13"); // Case1.13: b= + check(indices[1], hint("b", -INF), expect(3), "case1.14"); // Case1.14: b=<-INF> + check(indices[2], hint("c", INF), expect(4, 5), "case1.15"); // Case1.15: c= + } // End of Case 1 + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 + auto aOrder = expect(12, 0, 1, 2, 10, 3, 4, 13, 6, 7, 8, 5, 9, 11); + auto bOrder = expect(3, 13, 11, 9, 7, 0, 1, 6, 8, 10, 12, 2); + auto cOrder = expect(6, 7, 12, 13, 2, 3, 4, 5); + /* Case 2: [x, INF) */ { + auto hint = [](const char* name, double value) { + return std::vector{makeBeginColumnHint(name, value)}; + }; + auto slice = [](decltype(aOrder) all, size_t start) { + return decltype(all){all.begin() + start, all.end()}; + }; + check(indices[0], hint("a", -100.0), slice(aOrder, 1), "case2.1"); // Case 2.1: a>=-100 + check(indices[0], hint("a", 0.0), slice(aOrder, 5), "case2.2"); // Case 2.2: a>=0.0 + // Case 2.3~2.14: a>={each of $val} + std::vector val{ + -INF, -MAX_NV, -MIN_NV, -MAX_SV, -MIN_SV, -0.0, 0.0, MIN_SV, MAX_SV, MIN_NV, MAX_NV, INF}; + for (size_t i = 0; i < val.size(); i++) { + std::string case_ = fmt::format("case2.{}", i + 3); + auto offset = i; + if (val[i] == 0 && val[i - 1] == 0) { + offset--; + } + check(indices[1], hint("b", val[i]), slice(bOrder, offset), case_); + } + check(indices[2], hint("c", -INF), slice(cOrder, 0), "case2.15"); + check(indices[2], hint("c", 0.0), slice(cOrder, 2), "case2.16"); + check(indices[2], hint("c", MAX_NV), slice(cOrder, 4), "case2.17"); + check(indices[2], hint("c", INF), slice(cOrder, 6), "case2.18"); + } + /* Case 3: [x, y)*/ { + auto hint = [](const char* name, double left, double right) { + return std::vector{makeColumnHint(name, left, right)}; + }; + auto slice = [](decltype(aOrder) all, size_t start, size_t end) { + return decltype(all){all.begin() + start, all.begin() + std::min(end, all.size())}; + }; + check( + indices[0], hint("a", -100.0, -0.0), slice(aOrder, 1, 5), "case3.1"); // Case3.1:-100<=a<0 + check(indices[0], hint("a", 10, 1e9), slice(aOrder, 9, 11), "case3.2"); + check(indices[0], hint("a", 1, 2), {}, "case3.3"); + check(indices[0], hint("a", -INF, INF), aOrder, "case3.4"); + check(indices[0], hint("a", -INF, 0), slice(aOrder, 0, 5), "case3.5"); + check(indices[0], hint("a", 0, INF), slice(aOrder, 5, 14), "case3.6"); + // Case 3.7~3.18: b<{each of $val} + std::vector val{ + -INF, -MAX_NV, -MIN_NV, -MAX_SV, -MIN_SV, -0.0, 0.0, MIN_SV, MAX_SV, MIN_NV, MAX_NV, INF}; + for (size_t i = 0; i < val.size(); i++) { + std::string case_ = fmt::format("case3.{}", i + 7); + auto offset = i; + if (val[i] == 0 && val[i - 1] == 0) { + offset--; + } + check(indices[1], hint("b", -INF, val[i]), slice(bOrder, 0, offset), case_); + } + check(indices[2], hint("c", -INF, INF), slice(cOrder, 0, 6), "case3.19"); + } + /* Case 4: (x, INF)*/ { + auto hint = [](const char* name, double value) { + return std::vector{makeBeginColumnHint(name, value)}; + }; + auto slice = [](decltype(aOrder) all, size_t start) { + return decltype(all){all.begin() + start, all.end()}; + }; + check(indices[0], hint("a", 100), slice(aOrder, 11), "case4.1"); + check(indices[1], hint("b", INF), {}, "case4.2"); + int64_t x; + ::memcpy(&x, &INF, 8); + // int64_t x = *reinterpret_cast(&INF); + x--; + double y; + ::memcpy(&y, &x, 8); + // double y = *reinterpret_cast(&x); + check(indices[1], hint("b", y), slice(bOrder, 11), "case4.3"); + check(indices[2], hint("c", INF), {}, "case4.4"); + check(indices[2], hint("c", y), slice(cOrder, 6), "case4.5"); + } /* Case 5: (x, y]*/ + { + auto hint = [](const char* name, double left, double right) { + return std::vector{makeColumnHint(name, left, right)}; + }; + auto slice = [](decltype(aOrder) all, size_t start, size_t end) { + return decltype(all){all.begin() + start, all.begin() + end}; + }; + check( + indices[0], hint("a", -100.0, -0.0), slice(aOrder, 2, 7), "case5.1"); // Case3.1:-100<=a<0 + check(indices[0], hint("a", 10, 1e9), slice(aOrder, 9, 11), "case5.2"); + check(indices[0], hint("a", 1, 2), {}, "case5.3"); + check(indices[0], hint("a", -INF, INF), aOrder, "case5.4"); + check(indices[0], hint("a", -INF, 0), slice(aOrder, 0, 7), "case5.5"); + check(indices[0], hint("a", 0, INF), slice(aOrder, 7, 14), "case5.6"); + // Case 5.7~5.18: b>{each of $val} + std::vector val{ + -INF, -MAX_NV, -MIN_NV, -MAX_SV, -MIN_SV, -0.0, 0.0, MIN_SV, MAX_SV, MIN_NV, MAX_NV, INF}; + for (size_t i = 0; i < val.size(); i++) { + std::string case_ = fmt::format("case5.{}", i + 7); + auto offset = i + 1; + if (val[i] == 0 && val[i + 1] == 0) { + offset++; + } + check(indices[1], hint("b", val[i], INF), slice(bOrder, offset, bOrder.size()), case_); + } + check(indices[2], hint("c", -INF, INF), slice(cOrder, 2, 8), "case5.19"); + } /* Case 6: (-INF, y]*/ + { + auto hint = [](const char* name, double value) { + return std::vector{makeEndColumnHint(name, value)}; + }; + auto slice = [](decltype(aOrder) all, size_t end) { + return decltype(all){all.begin(), all.begin() + end}; + }; + check(indices[0], hint("a", 0), slice(aOrder, 7), "case6.1"); + check(indices[0], hint("a", -0.0), slice(aOrder, 7), "case6.2"); + check(indices[0], hint("a", -100.0), slice(aOrder, 2), "case6.3"); + // Case 6.4~6.15 + std::vector val{ + -INF, -MAX_NV, -MIN_NV, -MAX_SV, -MIN_SV, -0.0, 0.0, MIN_SV, MAX_SV, MIN_NV, MAX_NV, INF}; + for (size_t i = 0; i < val.size(); i++) { + std::string case_ = fmt::format("case6.{}", i + 3); + auto offset = i + 1; + if (val[i] == 0 && val[i + 1] == 0) { + offset++; + } + check(indices[1], hint("b", val[i]), slice(bOrder, offset), case_); + } + check(indices[2], hint("c", INF), cOrder, "case6.16"); + } + /* Case 7: (-INF, y)*/ { + auto hint = [](const char* name, double value) { + return std::vector{makeEndColumnHint(name, value)}; + }; + auto slice = [](decltype(aOrder) all, size_t end) { + return decltype(all){all.begin(), all.begin() + end}; + }; + check(indices[0], hint("a", 100), slice(aOrder, 10), "case7.1"); + check(indices[1], hint("b", -INF), {}, "case7.2"); + int64_t x; + ::memcpy(&x, &INF, 8); + // int64_t x = *reinterpret_cast(&INF); + x--; + double y; + ::memcpy(&y, &x, 8); + // double y = *reinterpret_cast(&x); + check(indices[1], hint("b", -y), slice(bOrder, 1), "case7.3"); + check(indices[2], hint("c", -INF), {}, "case7.4"); + check(indices[2], hint("c", -y), slice(cOrder, 2), "case7.5"); + } +} +TEST_F(IndexScanTest, Bool) { + auto rows = R"( + bool | bool + true | true + true | false + false | + false | false + true | + )"_row; + auto schema = R"( + a | bool | | + b | bool | | true + )"_schema; + auto indices = R"( + TAG(t,2) + (i1,2):a + (i2,3):b + )"_index(schema); + auto kv = encodeTag(rows, 2, schema, indices); + auto kvstore = std::make_unique(); + for (auto& iter : kv) { + for (auto& item : iter) { + kvstore->put(item.first, item.second); + } + } + auto check = [&](std::shared_ptr index, + const std::vector& columnHints, + const std::vector& expect, + const std::string& case_) { + auto context = makeContext(1, 0); + auto scanNode = + std::make_unique(context.get(), 0, columnHints, kvstore.get()); + IndexScanTestHelper helper; + helper.setIndex(scanNode.get(), index); + helper.setTag(scanNode.get(), schema); + InitContext initCtx; + initCtx.requiredColumns = {kVid}; + scanNode->init(initCtx); + scanNode->execute(0); + + std::vector result; + while (true) { + auto res = scanNode->next(); + ASSERT(res.success()); + if (!res.hasData()) { + break; + } + result.emplace_back(std::move(res).row()); + } + EXPECT_EQ(result, expect) << "Fail at case " << case_; + }; + auto expect = [](auto... vidList) { + std::vector ret; + std::vector value; + (value.push_back(std::to_string(vidList)), ...); + for (auto& v : value) { + Row row; + row.emplace_back(v); + ret.emplace_back(std::move(row)); + } + return ret; + }; + /* Case 1: Prefix */ { + check(indices[0], {makeColumnHint("a", true)}, expect(0, 1, 4), "case1.1"); + check(indices[0], {makeColumnHint("a", false)}, expect(2, 3), "case1.2"); + check(indices[1], {makeColumnHint("b", true)}, expect(0), "case1.3"); + check(indices[1], {makeColumnHint("b", false)}, expect(1, 3), "case1.4"); + } + /* Case 2: [x,INF) */ { + check(indices[0], {makeBeginColumnHint("a", false)}, expect(2, 3, 0, 1, 4), "case2.1"); + check(indices[0], {makeBeginColumnHint("a", true)}, expect(0, 1, 4), "case2.2"); + check(indices[1], {makeBeginColumnHint("b", true)}, expect(0), "case2.3"); + } +} +TEST_F(IndexScanTest, String1) { + /** + * data and query both without truncate + * That means ScanNode only access Index Key-Values + */ + auto rows = + " string | string | string | int \n" + " 123456789 | abcdefghi | \xFF\xFF\xFF\xFF\xFF\xFF\xFF | 0 \n" + " 123456789 | | | 1 \n" + " 12345678 | | \x01 | 2 \n" + " 123456788 | \xFF\xFF | | 3 \n" + " 12345678: | aacd | \xFF\xFF\xFF\xFF\xFF\xFF\xFE | 4 \n" + " a1234 | accd | \x00\x01 | 5 \n" + " | | | 6 \n" + ""_row; + // 0 1 2 3 4 5 6 7 + std::vector a = {6, 2, 3, 0, 1, 4, 5}; + std::vector b = {6, 4, 0, 5, 3}; + std::vector c = {1, 3, 5, 2, 4, 0}; + + auto schema = R"( + a | string | 10 | false + b | string | 10 | true + c | string | 10 | true + )"_schema; + auto indices = R"( + TAG(t,1) + (ia,2): a(10) + (ib,3): b(10) + (ic,4): c(10) + )"_index(schema); + auto kv = encodeTag(rows, 1, schema, indices); + auto kvstore = std::make_unique(); + for (size_t i = 0; i < kv.size(); i++) { + for (auto& item : kv[i]) { + kvstore->put(item.first, item.second); + } + } + auto check = [&](std::shared_ptr index, + const std::vector& columnHints, + const std::vector& acquiredColumns, + const std::vector& expect, + const std::string& case_) { + auto context = makeContext(1, 0); + auto scanNode = + std::make_unique(context.get(), 0, columnHints, kvstore.get()); + IndexScanTestHelper helper; + helper.setIndex(scanNode.get(), index); + helper.setTag(scanNode.get(), schema); + helper.setFatal(scanNode.get(), true); + InitContext initCtx; + initCtx.requiredColumns.insert(acquiredColumns.begin(), acquiredColumns.end()); + scanNode->init(initCtx); + scanNode->execute(0); + + std::vector result; + while (true) { + auto res = scanNode->next(); + ASSERT(res.success()); + if (!res.hasData()) { + break; + } + result.emplace_back(std::move(res).row()); + } + std::vector result2(result.size()); + for (size_t j = 0; j < acquiredColumns.size(); j++) { + int p = initCtx.retColMap[acquiredColumns[j]]; + for (size_t i = 0; i < result.size(); i++) { + result2[i].emplace_back(result[i][p]); + } + } + result = result2; + EXPECT_EQ(result, expect) << "Fail at case " << case_; + }; + auto expect = [&rows](const std::vector& vidList, const std::vector& columns) { + std::vector ret; + for (size_t i = 0; i < vidList.size(); i++) { + Row row; + row.emplace_back(Value(std::to_string(vidList[i]))); + for (size_t j = 0; j < columns.size(); j++) { + row.emplace_back(rows[vidList[i]][columns[j]]); + } + ret.emplace_back(std::move(row)); + } + return ret; + }; + /* Case 1: prefix */ { + auto hint = [](const char* name, const std::string& value) { + return std::vector{makeColumnHint(name, value)}; + }; + check(indices[0], hint("a", "123456789"), {kVid, "a"}, expect({0, 1}, {0}), "case1.1"); + check(indices[0], hint("a", "12345678"), {kVid, "a"}, expect({2}, {0}), "case1.2"); + check(indices[0], hint("a", ""), {kVid, "a"}, expect({6}, {0}), "case1.3"); + check(indices[1], hint("b", "\xFF\xFF"), {kVid, "b"}, expect({3}, {1}), "case1.4"); + check(indices[1], hint("b", ""), {kVid, "b"}, expect({6}, {1}), "case1.5"); + auto columnHint = hint("c", "\xFF\xFF\xFF\xFF\xFF\xFF\xFE"); + check(indices[2], columnHint, {kVid, "c"}, expect({4}, {2}), "case1.6"); + } + + /* Case 2: [x, INF)*/ { + auto hint = [](const char* name, const std::string& value) { + return std::vector{makeBeginColumnHint(name, value)}; + }; + auto slice = [](decltype(a) all, int begin) { + return decltype(all){all.begin() + begin, all.end()}; + }; + check(indices[0], hint("a", "12345678"), {kVid, "a"}, expect(slice(a, 1), {0}), "case2.1"); + check(indices[0], hint("a", "123456780"), {kVid, "a"}, expect(slice(a, 2), {0}), "case2.2"); + check(indices[0], hint("a", ""), {kVid, "a"}, expect(a, {0}), "case2.3"); + check(indices[1], hint("b", ""), {kVid, "b"}, expect(b, {1}), "case2.4"); + check(indices[1], hint("b", "abc"), {kVid, "b"}, expect(slice(b, 2), {1}), "case2.5"); + check(indices[1], hint("b", "aac"), {kVid, "b"}, expect(slice(b, 1), {1}), "case2.6"); + check(indices[1], hint("b", "aacd\x01"), {kVid, "b"}, expect(slice(b, 2), {1}), "case2.7"); + check(indices[1], hint("b", "\xFF\xFF"), {kVid, "b"}, expect(slice(b, 4), {1}), "case2.8"); + check(indices[1], hint("b", "\xFF\xFF\x01"), {kVid, "b"}, {}, "case2.9"); + check(indices[2], hint("c", ""), {kVid, "c"}, expect(c, {2}), "case2.10"); + check(indices[2], hint("c", "\x01"), {kVid, "c"}, expect(slice(c, 3), {2}), "case2.11"); + check(indices[2], + hint("c", "\xFF\xFF\xFF\xFF\xFF\xFF\xFF"), + {kVid, "c"}, + expect(slice(c, 5), {2}), + "case2.12"); + } + /* Case 3: (x,y) */ { + auto hint = [](const char* name, const std::string& begin, const std::string& end) { + return std::vector{makeColumnHint(name, begin, end)}; + }; + auto slice = [](decltype(a) all, int begin, int end) { + return decltype(all){all.begin() + begin, all.begin() + end}; + }; + auto columnHint = hint("a", "12345678", "123456789"); + check(indices[0], columnHint, {kVid, "a"}, expect(slice(a, 2, 3), {0}), "case3.1"); + check(indices[0], hint("a", "", "123456"), {kVid, "a"}, {}, "case3.2"); + check(indices[1], hint("b", "", "\xFF"), {kVid, "b"}, expect(slice(b, 1, 4), {1}), "case3.3"); + columnHint = hint("b", "aaccd", "\xFF\xFF"); + check(indices[1], columnHint, {kVid, "b"}, expect(slice(b, 1, 4), {1}), "case3.4"); + columnHint = hint("b", "\xFF", "\xFF\xFF\x01"); + check(indices[1], columnHint, {kVid, "b"}, expect(slice(b, 4, 5), {1}), "case3.5"); + check(indices[2], hint("c", "", "\x01"), {kVid, "c"}, expect(slice(c, 2, 3), {2}), "case3.6"); + columnHint = hint("c", "\x00\x00\x01"s, "\x01\x01"); + check(indices[2], columnHint, {kVid, "c"}, expect(slice(c, 2, 4), {2}), "case3.7"); + columnHint = hint("c", "\x00\x01"s, "\x01\x01"); + check(indices[2], columnHint, {kVid, "c"}, expect(slice(c, 3, 4), {2}), "case3.8"); + } + /* Case 4: (INF,y]*/ { + auto hint = [](const char* name, const std::string& value) { + return std::vector{makeEndColumnHint(name, value)}; + }; + auto slice = [](decltype(a) all, int end) { + return decltype(all){all.begin(), all.begin() + end}; + }; + check(indices[0], hint("a", "123456789"), {kVid, "a"}, expect(slice(a, 5), {0}), "case4.1"); + check(indices[0], hint("a", ""), {kVid, "a"}, expect(slice(a, 1), {0}), "case4.2"); + check(indices[0], hint("a", "\xFF"), {kVid, "a"}, expect(slice(a, 7), {0}), "case4.3"); + check(indices[1], hint("b", "\xFF\xFF"), {kVid, "b"}, expect(slice(b, 5), {1}), "case4.4"); + check(indices[1], hint("b", "\xFF\xFE"), {kVid, "b"}, expect(slice(b, 4), {1}), "case4.5"); + check(indices[2], hint("c", "\x00\x00\x01"s), {kVid, "c"}, expect(slice(c, 2), {2}), "case4.6"); + check(indices[2], hint("c", "\x00\x01"s), {kVid, "c"}, expect(slice(c, 3), {2}), "case4.7"); + check(indices[2], hint("c", "\x01"), {kVid, "c"}, expect(slice(c, 4), {2}), "case4.8"); + auto columnHint = hint("c", "\xFF\xFF\xFF\xFF\xFF\xFF\xFF"); + check(indices[2], columnHint, {kVid, "c"}, expect(c, {2}), "case4.9"); + } +} +TEST_F(IndexScanTest, String2) { + /** + * data with truncate + * query without truncate + * That means ScanNode need to access base data only when require indexed column + */ + auto rows = + " string | string | string | int \n" + " 123456 | ABCDE2 | | 0 \n" + " 1234567 | ABCDE1 | \xFF\xFF\xFF\xFF\xFF | 1 \n" + " 1234567 | ABCDE | \xFF\xFF\xFF\xFF\xFF\x00\x01 | 2 \n" + " 123457 | ABCDF | \xFF\xFF\xFF\xFF\xFF | 3 \n" + ""_row; + auto schema = R"( + c1 | string | | false + c2 | string | | true + c3 | string | | true + )"_schema; + auto indices = R"( + TAG(t,1) + (i1,2):c1(5) + (i2,3):c2(5) + (i3,4):c3(5) + )"_index(schema); + auto kv = encodeTag(rows, 1, schema, indices); + auto kvstore = std::make_unique(); + for (size_t i = 0; i < kv.size(); i++) { + for (auto& item : kv[i]) { + kvstore->put(item.first, item.second); + } + } + auto check = [&](std::shared_ptr index, + const std::vector& columnHints, + const std::vector& acquiredColumns, + const std::vector& expect, + const std::string& case_) { + auto context = makeContext(1, 0); + auto scanNode = + std::make_unique(context.get(), 0, columnHints, kvstore.get()); + IndexScanTestHelper helper; + helper.setIndex(scanNode.get(), index); + helper.setTag(scanNode.get(), schema); + helper.setFatal(scanNode.get(), true); + InitContext initCtx; + initCtx.requiredColumns.insert(acquiredColumns.begin(), acquiredColumns.end()); + scanNode->init(initCtx); + scanNode->execute(0); + + std::vector result; + while (true) { + auto res = scanNode->next(); + ASSERT(res.success()); + if (!res.hasData()) { + break; + } + result.emplace_back(std::move(res).row()); + } + std::vector result2(result.size()); + for (size_t j = 0; j < acquiredColumns.size(); j++) { + int p = initCtx.retColMap[acquiredColumns[j]]; + for (size_t i = 0; i < result.size(); i++) { + result2[i].emplace_back(result[i][p]); + } + } + result = result2; + EXPECT_EQ(result, expect) << "Fail at case " << case_; + }; + auto expect = [&rows](const std::vector& vidList, const std::vector& columns) { + std::vector ret; + for (size_t i = 0; i < vidList.size(); i++) { + Row row; + row.emplace_back(Value(std::to_string(vidList[i]))); + for (size_t j = 0; j < columns.size(); j++) { + row.emplace_back(rows[vidList[i]][columns[j]]); + } + ret.emplace_back(std::move(row)); + } + return ret; + }; + /* Case 1: Prefix */ { + auto hint = [](const char* name, const std::string& value) { + return std::vector{makeColumnHint(name, value)}; + }; + check(indices[0], hint("c1", "1234"), {kVid, "c1"}, {}, "case1.1"); + check(indices[0], hint("c1", "12345"), {kVid, "c1"}, {}, "case1.2"); + check(indices[1], hint("c2", "ABCDE"), {kVid, "c2"}, expect({2}, {1}), "case1.3"); + check(indices[2], + hint("c3", "\xFF\xFF\xFF\xFF\xFF"), + {kVid, "c3"}, + expect({1, 3}, {2}), + "case1.4"); + } + /* Case 2: (x, INF)*/ { + auto hint = [](const char* name, const std::string& value) { + return std::vector{makeBeginColumnHint(name, value)}; + }; + check(indices[0], hint("c1", "12345"), {kVid, "c1"}, expect({0, 1, 2, 3}, {0}), "case2.1"); + check(indices[1], hint("c2", "ABCDE"), {kVid, "c2"}, expect({0, 1, 3}, {1}), "case2.2"); + check( + indices[2], hint("c3", "\xFF\xFF\xFF\xFF\xFF"), {kVid, "c3"}, expect({2}, {2}), "case2.3"); + } + /* Case 3: [x, y] */ { + auto hint = [](const char* name, const std::string& begin, const std::string& end) { + return std::vector{makeColumnHint(name, begin, end)}; + }; + auto columnHint = hint("c1", "12345", "12346"); + check(indices[0], columnHint, {kVid, "c1"}, expect({0, 1, 2, 3}, {0}), "case3.1"); + columnHint = hint("c1", "12345", "12345"); + check(indices[0], columnHint, {kVid, "c1"}, {}, "case3.2"); + columnHint = hint("c2", "ABCDE", "ABCDF"); + check(indices[1], columnHint, {kVid, "c2"}, expect({0, 1, 2, 3}, {1}), "case3.3"); + columnHint = hint("c2", "ABCDE", "ABCDE"); + check(indices[1], columnHint, {kVid, "c2"}, expect({2}, {1}), "case3.4"); + columnHint = hint("c3", "\xFF\xFF\xFF\xFF\xFF", "\xFF\xFF\xFF\xFF\xFF"); + check(indices[2], columnHint, {kVid, "c3"}, expect({1, 3}, {2}), "case3.5"); + } + /* Case 4: (INF,y)*/ { + auto hint = [](const char* name, const std::string& value) { + return std::vector{makeEndColumnHint(name, value)}; + }; + auto columnHint = hint("c1", "12345"); + check(indices[0], columnHint, {kVid, "c1"}, {}, "case4.1"); + columnHint = hint("c2", "ABCDE"); + check(indices[1], columnHint, {kVid, "c2"}, {}, "case4.2"); + columnHint = hint("c2", "ABCDF"); + check(indices[1], columnHint, {kVid, "c2"}, expect({0, 1, 2}, {1}), "case4.3"); + columnHint = hint("c3", " \xFF\xFF\xFF\xFF\xFF"); + check(indices[2], columnHint, {kVid, "c3"}, {}, "case4.4"); + } +} +TEST_F(IndexScanTest, String3) { + /** + * data without truncate + * query with truncate + * That means ScanNode only access Index Key-Values + */ + auto rows = + " string | string | string | int \n" + " abcde | 98765 | | 0 \n" + " abcda | 12345 | \xFF\xFF\xFF\xFF\xFF | 1 \n" + " abcda | 98766 | | 2 \n" + " | | | 3 \n" + ""_row; + auto schema = R"( + a | string | | false + b | string | | true + c | string | | true + )"_schema; + auto indices = R"( + TAG(t,0) + (ia,1): a(6) + (ib,2): b(6) + (ic,3): c(6) + )"_index(schema); + auto kv = encodeTag(rows, 1, schema, indices); + auto kvstore = std::make_unique(); + for (size_t i = 0; i < kv.size(); i++) { + for (auto& item : kv[i]) { + kvstore->put(item.first, item.second); + } + } + auto check = [&](std::shared_ptr index, + const std::vector& columnHints, + const std::vector& acquiredColumns, + const std::vector& expect, + const std::string& case_) { + auto context = makeContext(1, 0); + auto scanNode = + std::make_unique(context.get(), 0, columnHints, kvstore.get()); + IndexScanTestHelper helper; + helper.setIndex(scanNode.get(), index); + helper.setTag(scanNode.get(), schema); + helper.setFatal(scanNode.get(), true); + InitContext initCtx; + initCtx.requiredColumns.insert(acquiredColumns.begin(), acquiredColumns.end()); + scanNode->init(initCtx); + scanNode->execute(0); + + std::vector result; + while (true) { + auto res = scanNode->next(); + ASSERT(res.success()); + if (!res.hasData()) { + break; + } + result.emplace_back(std::move(res).row()); + } + std::vector result2(result.size()); + for (size_t j = 0; j < acquiredColumns.size(); j++) { + int p = initCtx.retColMap[acquiredColumns[j]]; + for (size_t i = 0; i < result.size(); i++) { + result2[i].emplace_back(result[i][p]); + } + } + result = result2; + EXPECT_EQ(result, expect) << "Fail at case " << case_; + }; + auto expect = [&rows](const std::vector& vidList, const std::vector& columns) { + std::vector ret; + for (size_t i = 0; i < vidList.size(); i++) { + Row row; + row.emplace_back(Value(std::to_string(vidList[i]))); + for (size_t j = 0; j < columns.size(); j++) { + row.emplace_back(rows[vidList[i]][columns[j]]); + } + ret.emplace_back(std::move(row)); + } + return ret; + }; + /* Case 1: Prefix */ { + auto hint = [](const char* name, const std::string& value) { + return std::vector{makeColumnHint(name, value)}; + }; + check(indices[0], hint("a", "abcde "), {kVid, "a"}, {}, "case1.1"); + check(indices[2], hint("c", "\xFF\xFF\xFF\xFF\xFF\xFF"), {kVid, "c"}, {}, "case1.2"); + } + /* Case 2: [x, INF)*/ { + auto hint = [](const char* name, const std::string& value) { + return std::vector{makeBeginColumnHint(name, value)}; + }; + check(indices[0], hint("a", "abcdef"), {kVid, "a"}, {}, "case2.1"); + check(indices[0], hint("a", "abcda "), {kVid, "a"}, expect({0}, {0}), "case2.2"); + check(indices[1], hint("b", "987654 "), {kVid, "b"}, expect({2}, {1}), "case2.3"); + check(indices[2], hint("c", "\xFF\xFF\xFF\xFF\xFF\xFF"), {kVid, "c"}, {}, "case2.4"); + } + /* Case 3: (x, y]*/ { + auto hint = [](const char* name, const std::string& begin, const std::string& end) { + return std::vector{makeColumnHint(name, begin, end)}; + }; + auto columnHint = hint("a", "abcda ", "abcde "); + check(indices[0], columnHint, {kVid, "a"}, expect({0}, {0}), "case3.1"); + columnHint = hint("b", "98765 ", "98766 "); + check(indices[1], columnHint, {kVid, "b"}, expect({2}, {1}), "case3.2"); + columnHint = hint("c", "\xFF\xFF\xFF\xFF\xFE ", "\xFF\xFF\xFF\xFF\xFF "); + check(indices[2], columnHint, {kVid, "c"}, expect({1}, {2}), "case3.3"); + } + /* Case 4: (INF,y)*/ { + auto hint = [](const char* name, const std::string& value) { + return std::vector{makeEndColumnHint(name, value)}; + }; + check(indices[0], hint("a", "abcde "), {kVid, "a"}, expect({3, 1, 2, 0}, {0}), "case4.1"); + check(indices[1], hint("b", "98764 "), {kVid, "b"}, expect({1}, {1}), "case4.2"); + check(indices[2], + hint("c", "\xFF\xFF\xFF\xFF\xFF "), + {kVid, "c"}, + expect({2, 3, 1}, {2}), + "case4.3"); + } +} +TEST_F(IndexScanTest, String4) { + /** + * data with truncate + * query with truncate + * That means ScanNode always need to access base data. + */ + auto rows = + " string | string | int \n" + " abcde1 | 987654 | 0 \n" + " abcdd | 98765 | 1 \n" + " abcdf | 12345 | 2 \n" + " abcde | \xFF\xFF\xFF\xFF\xFF\xFF | 3 \n" + " abcde12 | | 4 \n" + " abcde123 | \xFF\xFF\xFF\xFF\xFF | 5 \n" + " abcde1234 | \xFF\xFF\xFF\xFF\xFF\xFF\x01| 6 \n" + " abcde1234 | | 7 \n" + ""_row; + auto schema = R"( + a | string | | false + b | string | | true + )"_schema; + auto indices = R"( + TAG(t,0) + (ia,1): a(5) + (ib,2): b(5) + )"_index(schema); + auto kv = encodeTag(rows, 1, schema, indices); + auto kvstore = std::make_unique(); + for (size_t i = 0; i < kv.size(); i++) { + for (auto& item : kv[i]) { + kvstore->put(item.first, item.second); + } + } + auto check = [&](std::shared_ptr index, + const std::vector& columnHints, + const std::vector& acquiredColumns, + const std::vector& expect, + const std::string& case_) { + auto context = makeContext(1, 0); + auto scanNode = + std::make_unique(context.get(), 0, columnHints, kvstore.get()); + IndexScanTestHelper helper; + helper.setIndex(scanNode.get(), index); + helper.setTag(scanNode.get(), schema); + helper.setFatal(scanNode.get(), true); + InitContext initCtx; + initCtx.requiredColumns.insert(acquiredColumns.begin(), acquiredColumns.end()); + scanNode->init(initCtx); + scanNode->execute(0); + + std::vector result; + while (true) { + auto res = scanNode->next(); + ASSERT(res.success()); + if (!res.hasData()) { + break; + } + result.emplace_back(std::move(res).row()); + } + std::vector result2(result.size()); + for (size_t j = 0; j < acquiredColumns.size(); j++) { + int p = initCtx.retColMap[acquiredColumns[j]]; + for (size_t i = 0; i < result.size(); i++) { + result2[i].emplace_back(result[i][p]); + } + } + result = result2; + EXPECT_EQ(result, expect) << "Fail at case " << case_; + }; + auto expect = [&rows](const std::vector& vidList, const std::vector& columns) { + std::vector ret; + for (size_t i = 0; i < vidList.size(); i++) { + Row row; + row.emplace_back(Value(std::to_string(vidList[i]))); + for (size_t j = 0; j < columns.size(); j++) { + row.emplace_back(rows[vidList[i]][columns[j]]); + } + ret.emplace_back(std::move(row)); + } + return ret; + }; + /* Case 1: Prefix */ { + auto hint = [](const char* name, const std::string& value) { + return std::vector{makeColumnHint(name, value)}; + }; + check(indices[0], hint("a", "abcde"), {kVid, "a"}, expect({3}, {0}), "case1.1"); + check(indices[0], hint("a", "abcde1234"), {kVid, "a"}, expect({6, 7}, {0}), "case1.2"); + check(indices[0], hint("a", "abcde2"), {kVid, "a"}, {}, "case1.3"); + check(indices[1], hint("b", "\xFF\xFF\xFF\xFF\xFF"), {kVid, "b"}, expect({5}, {1}), "case1.4"); + } + /* Case 2: (x, INF) */ { + auto hint = [](const char* name, const std::string& value) { + return std::vector{makeBeginColumnHint(name, value)}; + }; + check(indices[0], hint("a", "abcde"), {kVid, "a"}, expect({0, 4, 5, 6, 7, 2}, {0}), "case2.1"); + check(indices[0], hint("a", "abcde12"), {kVid, "a"}, expect({5, 6, 7, 2}, {0}), "case2.2"); + check(indices[0], hint("a", "abcde12345"), {kVid, "a"}, expect({2}, {0}), "case2.3"); + check( + indices[0], hint("a", "abcdd"), {kVid, "a"}, expect({0, 3, 4, 5, 6, 7, 2}, {0}), "case2.4"); + auto columnHint = hint("b", "\xFF\xFF\xFF\xFF\xFF"); + check(indices[1], columnHint, {kVid, "b"}, expect({3, 6}, {1}), "case2.5"); + columnHint = hint("b", "\xFF\xFF\xFF\xFF\xFF\x01"); + check(indices[1], columnHint, {kVid, "b"}, expect({3, 6}, {1}), "case2.6"); + columnHint = hint("b", "\xFF\xFF\xFF\xFF\xFF\xFF"); + check(indices[1], columnHint, {kVid, "b"}, expect({6}, {1}), "case2.7"); + } + /* Case 3: [x,y) */ { + auto hint = [](const char* name, const std::string& begin, const std::string& end) { + return std::vector{makeColumnHint(name, begin, end)}; + }; + auto columnHint = hint("a", "abcdd123", "abcde1234"); + check(indices[0], columnHint, {kVid, "a"}, expect({0, 3, 4, 5}, {0}), "case3.1"); + columnHint = hint("a", "abcde1", "abcdf"); + check(indices[0], columnHint, {kVid, "a"}, expect({0, 4, 5, 6, 7}, {0}), "case3.2"); + columnHint = hint("a", "abcde12345", "abcde123456"); + check(indices[0], columnHint, {kVid, "a"}, {}, "case3.3"); + columnHint = hint("a", "abcde1234", "abcde12345"); + check(indices[0], columnHint, {kVid, "a"}, expect({6, 7}, {0}), "case3.4"); + columnHint = hint("b", "\xFF\xFF\xFF\xFF\xFF", "\xFF\xFF\xFF\xFF\xFF\x00\x01"s); + check(indices[1], columnHint, {kVid, "b"}, expect({5}, {1}), "case3.5"); + columnHint = hint("b", "\xFF\xFF\xFF\xFF\xFF\x01", "\xFF\xFF\xFF\xFF\xFF\xFF\xFF"); + check(indices[1], columnHint, {kVid, "b"}, expect({3, 6}, {1}), "case3.6"); + } + /* Case 4: (INF,y] */ { + auto hint = [](const char* name, const std::string& value) { + return std::vector{makeEndColumnHint(name, value)}; + }; + check(indices[0], hint("a", "abcde123"), {kVid, "a"}, expect({1, 0, 3, 4, 5}, {0}), "case4.1"); + check(indices[0], hint("a", "abcde"), {kVid, "a"}, expect({1, 3}, {0}), "case4.2"); + check(indices[0], + hint("a", "abcde1234"), + {kVid, "a"}, + expect({1, 0, 3, 4, 5, 6, 7}, {0}), + "case4.3"); + check(indices[1], + hint("b", "\xFF\xFF\xFF\xFF\xFF"), + {kVid, "b"}, + expect({2, 0, 1, 5}, {1}), + "case4.4"); + check(indices[1], + hint("b", "\xFF\xFF\xFF\xFF\xFF\xFF"), + {kVid, "b"}, + expect({2, 0, 1, 3, 5}, {1}), + "case4.5"); + } +} +TEST_F(IndexScanTest, Nullable) { + std::shared_ptr schema; + auto kvstore = std::make_unique(); + auto check = [&](std::shared_ptr index, + const std::vector& columnHints, + const std::vector& expect, + const std::string& case_) { + auto context = makeContext(1, 0); + auto scanNode = + std::make_unique(context.get(), 0, columnHints, kvstore.get()); + IndexScanTestHelper helper; + helper.setIndex(scanNode.get(), index); + helper.setTag(scanNode.get(), schema); + helper.setFatal(scanNode.get(), true); + InitContext initCtx; + initCtx.requiredColumns = {kVid}; + scanNode->init(initCtx); + scanNode->execute(0); + + std::vector result; + while (true) { + auto res = scanNode->next(); + ASSERT(res.success()); + if (!res.hasData()) { + break; + } + result.emplace_back(std::move(res).row()); + } + EXPECT_EQ(result, expect) << "Fail at case " << case_; + }; + auto hint = [](const std::string& name) { + return std::vector{makeColumnHint(name, Value::kNullValue)}; + }; + auto expect = [](auto... vidList) { + std::vector ret; + std::vector value; + (value.push_back(std::to_string(vidList)), ...); + for (auto& v : value) { + Row row; + row.emplace_back(v); + ret.emplace_back(std::move(row)); + } + return ret; + }; + /* Case 1: Int*/ { + auto rows = R"( + int | int + 0 | 0 + 9223372036854775807 | + 9223372036854775807 | + -9223372036854775807 | 9223372036854775807 + )"_row; + schema = R"( + a | int | | false + b | int | | true + )"_schema; + auto indices = R"( + TAG(t,1) + (ia,2):a + (ib,3):b + (iba,4):b,a + )"_index(schema); + auto kv = encodeTag(rows, 1, schema, indices); + kvstore = std::make_unique(); + for (auto& iter : kv) { + for (auto& item : iter) { + kvstore->put(item.first, item.second); + } + } + check(indices[0], hint("a"), {}, "case1.1"); + check(indices[1], hint("b"), expect(1, 2), "case1.2"); + check(indices[2], hint("b"), expect(1, 2), "case1.3"); + } + /* Case 2: Float */ { + auto rows = R"( + float | float + 1.7976931348623157e+308 | + 0 | + | + | <-NaN> + )"_row; + schema = R"( + a | double | | false + b | double | | true + )"_schema; + auto indices = R"( + TAG(t,1) + (ia,2):a + (ib,3):b + (iba,4):b,a + )"_index(schema); + auto kv = encodeTag(rows, 1, schema, indices); + kvstore = std::make_unique(); + for (auto& iter : kv) { + for (auto& item : iter) { + kvstore->put(item.first, item.second); + } + } + check(indices[0], hint("a"), {}, "case2.1"); + check(indices[1], hint("b"), expect(0, 2), "case2.2"); + check(indices[2], hint("b"), expect(0, 2), "case2.3"); + } + /* Case 3: String */ { + auto rows = R"( + string | string + \xFF\xFF\xFF | + 123 | 456 + \xFF\xFF\x01 | \xFF\xFF\xFF + \xFF\xFF\x01 | + )"_row; + schema = R"( + a | string | | false + b | string | | true + )"_schema; + auto indices = R"( + TAG(t,1) + (ia,2):a(3) + (ib,3):b(3) + (iba,4):b(3),a(3) + )"_index(schema); + auto kv = encodeTag(rows, 1, schema, indices); + kvstore = std::make_unique(); + for (auto& iter : kv) { + for (auto& item : iter) { + kvstore->put(item.first, item.second); + } + } + check(indices[0], hint("a"), {}, "case3.1"); + check(indices[1], hint("b"), expect(0, 3), "case3.2"); + check(indices[2], hint("b"), expect(0, 3), "case3.3"); + } +} +TEST_F(IndexScanTest, TTL) { + // TODO(hs.zhang): add unittest +} +TEST_F(IndexScanTest, Time) { + // TODO(hs.zhang): add unittest +} +TEST_F(IndexScanTest, Date) { + // TODO(hs.zhang): add unittest +} +TEST_F(IndexScanTest, DateTime) { + // TODO(hs.zhang): add unittest +} +TEST_F(IndexScanTest, Compound) { + // TODO(hs.zhang): add unittest +} + +class IndexTest : public ::testing::Test { + protected: + static PlanContext* getPlanContext() { + static std::unique_ptr ctx = std::make_unique(nullptr, 0, 8, false); + return ctx.get(); + } + static std::unique_ptr makeContext() { + auto ctx = std::make_unique(getPlanContext()); + ctx->tagId_ = 0; + ctx->edgeType_ = 0; + return ctx; + } + static std::vector collectResult(IndexNode* node) { + std::vector result; + InitContext initCtx; + node->init(initCtx); + while (true) { + auto res = node->next(); + ASSERT(res.success()); + if (!res.hasData()) { + break; + } + result.emplace_back(std::move(res).row()); + } + return result; + } + static std::vector pick(const std::vector& rows, const std::vector& indices) { + std::vector ret; + for (auto i : indices) { + ret.push_back(rows[i]); + } + return ret; + } + ::nebula::ObjectPool pool; +}; + +TEST_F(IndexTest, Selection) { + const auto rows = R"( + int | int + 1 | 2 + | + 8 | 10 + 8 | 10 + )"_row; + size_t currentOffset = 0; + auto ctx = makeContext(); + auto expr = RelationalExpression::makeGE(&pool, + TagPropertyExpression::make(&pool, "", "a"), + ConstantExpression::make(&pool, Value(5))); + + auto selection = std::make_unique(ctx.get(), expr); + auto mockChild = std::make_unique(ctx.get()); + mockChild->executeFunc = [](PartitionID) { return ::nebula::cpp2::ErrorCode::SUCCEEDED; }; + mockChild->nextFunc = [&rows, ¤tOffset]() -> IndexNode::Result { + if (currentOffset < rows.size()) { + auto row = rows[currentOffset++]; + return IndexNode::Result(std::move(row)); + } else { + return IndexNode::Result(); + } + }; + mockChild->initFunc = [](InitContext& initCtx) -> ::nebula::cpp2::ErrorCode { + initCtx.returnColumns = {"a", "b"}; + initCtx.retColMap = {{"a", 0}, {"b", 1}}; + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + }; + selection->addChild(std::move(mockChild)); + ASSERT_EQ(collectResult(selection.get()), pick(rows, {2, 3})); +} +TEST_F(IndexTest, Projection) { + const auto rows = R"( + int | int | int + 1 | 2 | 3 + 4 | 5 | 6 + 7 | 8 |9 + )"_row; + size_t currentOffset = 0; + auto ctx = makeContext(); + auto projection = + std::make_unique(ctx.get(), std::vector{"c", "a", "b"}); + auto mockChild = std::make_unique(ctx.get()); + mockChild->executeFunc = [](PartitionID) { return ::nebula::cpp2::ErrorCode::SUCCEEDED; }; + mockChild->nextFunc = [&rows, ¤tOffset]() -> IndexNode::Result { + if (currentOffset < rows.size()) { + auto row = rows[currentOffset++]; + return IndexNode::Result(std::move(row)); + } else { + return IndexNode::Result(); + } + }; + mockChild->initFunc = [](InitContext& initCtx) -> ::nebula::cpp2::ErrorCode { + initCtx.returnColumns = {"a", "b", "c"}; + initCtx.retColMap = {{"a", 0}, {"b", 1}, {"c", 2}}; + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + }; + projection->addChild(std::move(mockChild)); + auto expect = R"( + int | int | int + 3 | 1 | 2 + 6 | 4 | 5 + 9 | 7 | 8 + )"_row; + ASSERT_EQ(collectResult(projection.get()), expect); +} +TEST_F(IndexTest, Limit) { + auto genRows = [](int start, int end) { + std::vector ret; + for (int i = start; i < end; i++) { + Row row; + row.emplace_back(Value(i)); + row.emplace_back(Value(i * i)); + row.emplace_back(Value(i * i * i)); + ret.emplace_back(std::move(row)); + } + return ret; + }; + auto rows = genRows(0, 1000); + size_t currentOffset = 0; + auto ctx = makeContext(); + auto limit = std::make_unique(ctx.get(), 10); + auto mockChild = std::make_unique(ctx.get()); + mockChild->executeFunc = [](PartitionID) { return ::nebula::cpp2::ErrorCode::SUCCEEDED; }; + mockChild->nextFunc = [&rows, ¤tOffset]() -> IndexNode::Result { + if (currentOffset < rows.size()) { + auto row = rows[currentOffset++]; + return IndexNode::Result(std::move(row)); + } else { + return IndexNode::Result(); + } + }; + mockChild->initFunc = [](InitContext&) -> ::nebula::cpp2::ErrorCode { + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + }; + limit->addChild(std::move(mockChild)); + ASSERT_EQ(collectResult(limit.get()), genRows(0, 10)); +} +TEST_F(IndexTest, Dedup) { + auto rows1 = R"( + int | int + 1 | 2 + 1 | 3 + 2 | 2 + )"_row; + auto rows2 = R"( + int | int + 1 | 4 + 2 | 3 + 1 | 5 + 3 | 6 + )"_row; + size_t offset1 = 0, offset2 = 0; + auto ctx = makeContext(); + auto dedup = std::make_unique(ctx.get(), std::vector{"a"}); + auto child1 = std::make_unique(ctx.get()); + child1->executeFunc = [](PartitionID) { return ::nebula::cpp2::ErrorCode::SUCCEEDED; }; + child1->nextFunc = [&rows1, &offset1]() -> IndexNode::Result { + if (offset1 < rows1.size()) { + auto row = rows1[offset1++]; + return IndexNode::Result(std::move(row)); + } else { + return IndexNode::Result(); + } + }; + child1->initFunc = [](InitContext& initCtx) -> ::nebula::cpp2::ErrorCode { + initCtx.returnColumns = {"a", "b"}; + initCtx.retColMap = {{"a", 0}, {"b", 1}}; + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + }; + auto child2 = std::make_unique(ctx.get()); + child2->executeFunc = [](PartitionID) { return ::nebula::cpp2::ErrorCode::SUCCEEDED; }; + child2->nextFunc = [&rows2, &offset2]() -> IndexNode::Result { + if (offset2 < rows2.size()) { + auto row = rows2[offset2++]; + return IndexNode::Result(std::move(row)); + } else { + return IndexNode::Result(); + } + }; + child2->initFunc = [](InitContext& initCtx) -> ::nebula::cpp2::ErrorCode { + initCtx.returnColumns = {"a", "b"}; + initCtx.retColMap = {{"a", 0}, {"b", 1}}; + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + }; + dedup->addChild(std::move(child1)); + dedup->addChild(std::move(child2)); + auto expect = R"( + int | int + 1 | 2 + 2 | 2 + 3 | 6 + )"_row; + ASSERT_EQ(collectResult(dedup.get()), expect); +} +} // namespace storage +} // namespace nebula +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + folly::init(&argc, &argv, true); + google::SetStderrLogging(google::INFO); + return RUN_ALL_TESTS(); +} diff --git a/src/storage/test/IndexTestUtil.h b/src/storage/test/IndexTestUtil.h new file mode 100644 index 00000000000..73e457e6167 --- /dev/null +++ b/src/storage/test/IndexTestUtil.h @@ -0,0 +1,611 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#pragma once +#include +#include +#include +#include +#include +#include + +#include "common/datatypes/DataSet.h" +#include "common/meta/NebulaSchemaProvider.h" +#include "folly/Conv.h" +#include "folly/String.h" +#include "kvstore/KVIterator.h" +#include "kvstore/KVStore.h" +#include "storage/exec/IndexNode.h" +namespace nebula { +namespace storage { +using ::nebula::kvstore::KVIterator; +class MockKVIterator : public KVIterator { + using KVMap = std::map; + + public: + MockKVIterator(const KVMap& kv, KVMap::iterator&& iter) : kv_(kv), iter_(std::move(iter)) {} + bool valid() const { return iter_ != kv_.end() && validFunc_(iter_); } + void next() { iter_++; } + void prev() { iter_--; } + folly::StringPiece key() const { return folly::StringPiece(iter_->first); } + folly::StringPiece val() const { return folly::StringPiece(iter_->second); } + void setValidFunc(const std::function validFunc) { + validFunc_ = validFunc; + } + + private: + const KVMap& kv_; + KVMap::iterator iter_; + std::function validFunc_; +}; +class MockKVStore : public ::nebula::kvstore::KVStore { + private: + GraphSpaceID spaceId_{0}; + std::map kv_; + + public: + MockKVStore() {} + // Return bit-OR of StoreCapability values; + uint32_t capability() const override { + CHECK(false); + return 0; + }; + void stop() override {} + ErrorOr partLeader(GraphSpaceID spaceId, + PartitionID partID) override { + UNUSED(spaceId), UNUSED(partID); + CHECK(false); + return nebula::cpp2::ErrorCode::SUCCEEDED; + } + // Read a single key + nebula::cpp2::ErrorCode get(GraphSpaceID spaceId, + PartitionID partId, + const std::string& key, + std::string* value, + bool canReadFromFollower = false) override { + UNUSED(canReadFromFollower); + UNUSED(partId); + CHECK_EQ(spaceId, spaceId_); + auto iter = kv_.lower_bound(key); + if (iter != kv_.end() && iter->first == key) { + *value = iter->second; + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + } else { + return ::nebula::cpp2::ErrorCode::E_KEY_NOT_FOUND; + } + } + + // Read multiple keys, if error occurs a cpp2::ErrorCode is returned, + // If key[i] does not exist, the i-th value in return value would be + // Status::KeyNotFound + std::pair> multiGet( + GraphSpaceID spaceId, + PartitionID partId, + const std::vector& keys, + std::vector* values, + bool canReadFromFollower = false) override { + UNUSED(canReadFromFollower); + UNUSED(spaceId); + UNUSED(partId); + std::vector status; + nebula::cpp2::ErrorCode ret = nebula::cpp2::ErrorCode::SUCCEEDED; + for (auto& key : keys) { + auto iter = kv_.lower_bound(key); + if (iter != kv_.end() && iter->first == key) { + values->push_back(iter->second); + status.push_back(Status::OK()); + } else { + values->push_back(""); + status.push_back(Status::KeyNotFound()); + ret = nebula::cpp2::ErrorCode::E_PARTIAL_RESULT; + } + } + return {ret, std::move(status)}; + } + + // Get all results in range [start, end) + nebula::cpp2::ErrorCode range(GraphSpaceID spaceId, + PartitionID partId, + const std::string& start, + const std::string& end, + std::unique_ptr* iter, + bool canReadFromFollower = false) override { + UNUSED(spaceId); + UNUSED(partId); + UNUSED(canReadFromFollower); + CHECK_EQ(spaceId, spaceId_); + std::unique_ptr mockIter; + mockIter = std::make_unique(kv_, kv_.lower_bound(start)); + mockIter->setValidFunc([end](const decltype(kv_)::iterator& it) { return it->first < end; }); + (*iter) = std::move(mockIter); + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + } + // virtual nebula::cpp2::ErrorCode prefix(GraphSpaceID spaceId, + // PartitionID partId, + // std::string&& prefix, + // std::unique_ptr* iter, + // bool canReadFromFollower = false) = delete override; + // virtual nebula::cpp2::ErrorCode rangeWithPrefix(GraphSpaceID spaceId, + // PartitionID partId, + // std::string&& start, + // std::string&& prefix, + // std::unique_ptr* iter, + // bool canReadFromFollower = false) = delete; + // virtual nebula::cpp2::ErrorCode range(GraphSpaceID spaceId, + // PartitionID partId, + // std::string&& start, + // std::string&& end, + // std::unique_ptr* iter, + // bool canReadFromFollower = false) = delete; + nebula::cpp2::ErrorCode prefix(GraphSpaceID spaceId, + PartitionID partId, + const std::string& prefix, + std::unique_ptr* iter, + bool canReadFromFollower = false) override { + UNUSED(canReadFromFollower); + UNUSED(spaceId); + UNUSED(partId); + CHECK_EQ(spaceId, spaceId_); + auto mockIter = std::make_unique(kv_, kv_.lower_bound(prefix)); + mockIter->setValidFunc([prefix](const decltype(kv_)::iterator& it) { + if (it->first.size() < prefix.size()) { + return false; + } + for (size_t i = 0; i < prefix.size(); i++) { + if (prefix[i] != it->first[i]) { + return false; + } + } + return true; + }); + (*iter) = std::move(mockIter); + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + } + + // Get all results with prefix starting from start + nebula::cpp2::ErrorCode rangeWithPrefix(GraphSpaceID spaceId, + PartitionID partId, + const std::string& start, + const std::string& prefix, + std::unique_ptr* iter, + bool canReadFromFollower = false) override { + UNUSED(canReadFromFollower); + UNUSED(spaceId); + UNUSED(partId); + CHECK_EQ(spaceId, spaceId_); + auto mockIter = std::make_unique(kv_, kv_.lower_bound(start)); + mockIter->setValidFunc([prefix](const decltype(kv_)::iterator& it) { + if (it->first.size() < prefix.size()) { + return false; + } + for (size_t i = 0; i < prefix.size(); i++) { + if (prefix[i] != it->first[i]) { + return false; + } + } + return true; + }); + (*iter) = std::move(mockIter); + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + } + + nebula::cpp2::ErrorCode sync(GraphSpaceID spaceId, PartitionID partId) override { + UNUSED(spaceId); + UNUSED(partId); + LOG(FATAL) << "Unexpect"; + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + } + + void asyncMultiPut(GraphSpaceID spaceId, + PartitionID partId, + std::vector<::nebula::kvstore::KV>&& keyValues, + ::nebula::kvstore::KVCallback cb) override { + UNUSED(spaceId); + UNUSED(partId); + UNUSED(cb); + for (size_t i = 0; i < keyValues.size(); i++) { + kv_.emplace(std::move(keyValues[i])); + } + } + + // Asynchronous version of remove methods + void asyncRemove(GraphSpaceID spaceId, + PartitionID partId, + const std::string& key, + ::nebula::kvstore::KVCallback cb) override { + UNUSED(spaceId); + UNUSED(partId); + UNUSED(cb); + kv_.erase(key); + } + + void asyncMultiRemove(GraphSpaceID spaceId, + PartitionID partId, + std::vector&& keys, + ::nebula::kvstore::KVCallback cb) override { + UNUSED(spaceId); + UNUSED(partId); + UNUSED(cb); + for (size_t i = 0; i < keys.size(); i++) { + kv_.erase(keys[i]); + } + } + + void asyncRemoveRange(GraphSpaceID spaceId, + PartitionID partId, + const std::string& start, + const std::string& end, + ::nebula::kvstore::KVCallback cb) override { + UNUSED(spaceId); + UNUSED(partId); + UNUSED(cb); + for (auto iter = kv_.lower_bound(start); iter != kv_.end();) { + if (iter->first < end) { + iter = kv_.erase(iter); + } else { + iter++; + } + } + } + + void asyncAtomicOp(GraphSpaceID spaceId, + PartitionID partId, + raftex::AtomicOp op, + ::nebula::kvstore::KVCallback cb) override { + UNUSED(spaceId); + UNUSED(partId); + UNUSED(cb); + UNUSED(op); + LOG(FATAL) << "Unexpect"; + } + void asyncAppendBatch(GraphSpaceID spaceId, + PartitionID partId, + std::string&& batch, + ::nebula::kvstore::KVCallback cb) override { + UNUSED(spaceId); + UNUSED(partId); + UNUSED(cb); + LOG(FATAL) << "Unexpect " << batch; + } + nebula::cpp2::ErrorCode ingest(GraphSpaceID spaceId) override { + UNUSED(spaceId); + LOG(FATAL) << "Unexpect"; + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + } + + int32_t allLeader( + std::unordered_map>& leaderIds) override { + UNUSED(leaderIds); + + LOG(FATAL) << "Unexpect"; + return 0; + } + + ErrorOr> part( + GraphSpaceID spaceId, PartitionID partId) override { + UNUSED(spaceId); + UNUSED(partId); + LOG(FATAL) << "Unexpect"; + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + } + nebula::cpp2::ErrorCode compact(GraphSpaceID spaceId) override { + UNUSED(spaceId); + LOG(FATAL) << "Unexpect"; + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + } + nebula::cpp2::ErrorCode flush(GraphSpaceID spaceId) override { + UNUSED(spaceId); + LOG(FATAL) << "Unexpect"; + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + } + + ErrorOr> createCheckpoint( + GraphSpaceID spaceId, const std::string& name) override { + UNUSED(spaceId); + UNUSED(name); + LOG(FATAL) << "Unexpect"; + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + }; + nebula::cpp2::ErrorCode dropCheckpoint(GraphSpaceID spaceId, const std::string& name) override { + UNUSED(spaceId); + UNUSED(name); + LOG(FATAL) << "Unexpect"; + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + } + + nebula::cpp2::ErrorCode setWriteBlocking(GraphSpaceID spaceId, bool sign) override { + UNUSED(spaceId); + UNUSED(sign); + LOG(FATAL) << "Unexpect"; + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + } + + ErrorOr> backupTable( + GraphSpaceID spaceId, + const std::string& name, + const std::string& tablePrefix, + std::function filter) override { + UNUSED(spaceId); + UNUSED(name); + UNUSED(tablePrefix); + UNUSED(filter); + LOG(FATAL) << "Unexpect"; + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + } + // for meta BR + nebula::cpp2::ErrorCode restoreFromFiles(GraphSpaceID spaceId, + const std::vector& files) override { + UNUSED(spaceId); + UNUSED(files); + LOG(FATAL) << "Unexpect"; + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + } + nebula::cpp2::ErrorCode multiPutWithoutReplicator( + GraphSpaceID spaceId, std::vector<::nebula::kvstore::KV> keyValues) override { + UNUSED(spaceId); + UNUSED(keyValues); + LOG(FATAL) << "Unexpect"; + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + } + std::vector getDataRoot() const override { + LOG(FATAL) << "Unexpect"; + return {}; + } + + ErrorOr getProperty(GraphSpaceID spaceId, + const std::string& property) override { + UNUSED(spaceId); + UNUSED(property); + return ::nebula::cpp2::ErrorCode::SUCCEEDED; + } + void put(const std::string& key, const std::string& value) { kv_[key] = value; } + + private: + using ::nebula::kvstore::KVStore::prefix; + using ::nebula::kvstore::KVStore::range; + using ::nebula::kvstore::KVStore::rangeWithPrefix; +}; +class MockIndexNode : public IndexNode { + public: + explicit MockIndexNode(RuntimeContext* context) : IndexNode(context, "MockIndexNode") {} + ::nebula::cpp2::ErrorCode init(InitContext& initCtx) override { return initFunc(initCtx); } + std::unique_ptr copy() override { LOG(FATAL) << "Unexpect"; } + std::function nextFunc; + std::function<::nebula::cpp2::ErrorCode(PartitionID)> executeFunc; + std::function<::nebula::cpp2::ErrorCode(InitContext& initCtx)> initFunc; + std::string identify() override { return "MockIndexNode"; } + + private: + Result doNext() override { return nextFunc(); } + ::nebula::cpp2::ErrorCode doExecute(PartitionID partId) override { return executeFunc(partId); }; +}; + +class RowParser { + public: + explicit RowParser(const std::string& str) { + ss = std::stringstream(folly::trimWhitespace(folly::StringPiece(str)).toString()); + parseHeader(); + parseRow(); + } + void parseHeader() { + std::string line; + std::getline(ss, line); + std::vector types; + folly::split("|", line, types); + for (size_t i = 0; i < types.size(); i++) { + types[i] = folly::trimWhitespace(folly::StringPiece(types[i])).toString(); + } + typeList_ = std::move(types); + } + void parseRow() { + std::string line; + while (std::getline(ss, line)) { + std::vector values; + folly::split("|", line, values); + for (size_t i = 0; i < values.size(); i++) { + values[i] = folly::trimWhitespace(folly::StringPiece(values[i])).toString(); + } + Row row; + for (size_t i = 0; i < values.size(); i++) { + if (values[i] == "") { + row.emplace_back(Value::null()); + } else if (values[i] == "") { + row.emplace_back(std::numeric_limits::infinity()); + } else if (values[i] == "<-INF>") { + row.emplace_back(-std::numeric_limits::infinity()); + } else if (values[i] == "") { + row.emplace_back(std::numeric_limits::quiet_NaN()); + } else if (values[i] == "<-NaN>") { + row.emplace_back(-std::numeric_limits::quiet_NaN()); + } else { + row.emplace_back(transformMap[typeList_[i]](values[i])); + } + } + rowList_.emplace_back(std::move(row)); + } + } + const std::vector& getResult() { return rowList_; } + + private: + std::stringstream ss; + std::vector typeList_; + std::vector rowList_; + std::map> transformMap{ + {"int", [](const std::string& str) { return Value(std::stol(str)); }}, + {"string", [](const std::string& str) { return Value(str); }}, + {"float", [](const std::string& str) { return Value(folly::to(str)); }}, + {"bool", [](const std::string& str) { return Value(str == "true" ? true : false); }}}; +}; + +/** + * define a schema + * + * format: + * name | type | length | nullable + * example: + * std::string str=R"( + * a | int | | + * b | string | | true + * c | double | 10 | + * )"_schema + */ +class SchemaParser { + public: + explicit SchemaParser(const std::string& str) { + schema = std::make_shared<::nebula::meta::NebulaSchemaProvider>(0); + ss = std::stringstream(folly::trimWhitespace(folly::StringPiece(str)).toString()); + parse(); + } + void parse() { + std::string line; + while (std::getline(ss, line)) { + std::vector values; + folly::split("|", line, values); + std::string name = folly::trimWhitespace(folly::StringPiece(values[0])).toString(); + auto type = typeMap[folly::trimWhitespace(folly::StringPiece(values[1])).toString()]; + int length = 0; + { + std::string lenStr = folly::trimWhitespace(folly::StringPiece(values[2])).toString(); + if (lenStr != "") { + length = std::stoi(lenStr); + } + } + bool nullable = false; + { + std::string nullableStr = folly::trimWhitespace(folly::StringPiece(values[3])).toString(); + if (nullableStr == "true") { + nullable = true; + } + } + schema->addField(name, type, length, nullable); + } + } + std::shared_ptr<::nebula::meta::NebulaSchemaProvider> getResult() { return schema; } + + private: + std::stringstream ss; + std::shared_ptr<::nebula::meta::NebulaSchemaProvider> schema; + std::map typeMap{ + {"int", ::nebula::cpp2::PropertyType::INT64}, + {"double", ::nebula::cpp2::PropertyType::DOUBLE}, + {"string", ::nebula::cpp2::PropertyType::STRING}, + {"bool", ::nebula::cpp2::PropertyType::BOOL}}; +}; + +/** + * define index of a schema + * + * format: + * (Tag|Edge)(name,id) + * example + * std::string str=R"( + * Tag(name,id) + * (i1,1): a,b(10),c + * (i2,2): b(5),c + * )"_index(schema) + */ +class IndexParser { + public: + using IndexItem = ::nebula::meta::cpp2::IndexItem; + using SchemaProvider = ::nebula::meta::NebulaSchemaProvider; + explicit IndexParser(const std::string& str) { + ss = std::stringstream(folly::trimWhitespace(folly::StringPiece(str)).toString()); + parseSchema(); + } + void parseSchema() { + static std::regex pattern(R"((TAG|EDGE)\((.+),(\d+)\))"); + std::smatch match; + std::string line; + std::getline(ss, line); + CHECK(std::regex_match(line, match, pattern)); + std::string name = match.str(2); + int32_t id = std::stoi(match.str(3)); + schemaName_ = name; + if (match.str(1) == "TAG") { + schemaId_.set_tag_id(id); + } else { + schemaId_.set_edge_type(id); + } + } + std::vector> operator()(std::shared_ptr schema) { + schema_ = schema; + std::vector> ret; + std::string line; + while (std::getline(ss, line)) { + auto index = parse(folly::trimWhitespace(folly::StringPiece(line)).toString()); + ret.push_back(index); + } + return ret; + } + std::shared_ptr parse(const std::string& line) { + auto ret = std::make_shared(); + ret->set_schema_id(schemaId_); + ret->set_schema_name(schemaName_); + static std::regex pattern(R"(\((.+),(\d+)\):(.+))"); + std::smatch match; + CHECK(std::regex_match(line, match, pattern)); + ret->set_index_name(folly::trimWhitespace(folly::StringPiece(match.str(1)).toString())); + ret->set_index_id(std::stoi(match.str(2))); + std::string columnStr = match.str(3); + std::vector columns; + folly::split(",", columnStr, columns); + for (size_t i = 0; i < columns.size(); i++) { + columns[i] = folly::trimWhitespace(folly::StringPiece(columns[i])).toString(); + } + std::vector<::nebula::meta::cpp2::ColumnDef> fields; + for (auto& column : columns) { + std::string name; + int length; + std::smatch m; + std::regex p(R"((.+)\((\d+)\))"); + if (std::regex_match(column, m, p)) { + name = m.str(1); + length = std::stoi(m.str(2)); + } else { + name = column; + length = 0; + } + ::nebula::meta::cpp2::ColumnDef col; + auto field = schema_->field(name); + col.set_name(name); + ::nebula::meta::cpp2::ColumnTypeDef type; + if (length > 0) { + type.set_type_length(length); + type.set_type(::nebula::cpp2::PropertyType::FIXED_STRING); + } else { + type.set_type(field->type()); + } + col.set_type(type); + col.set_nullable(field->nullable()); + fields.emplace_back(std::move(col)); + } + ret->set_fields(fields); + return ret; + } + + private: + std::stringstream ss; + std::string schemaName_; + ::nebula::cpp2::SchemaID schemaId_; + std::shared_ptr schema_; + std::shared_ptr<::nebula::meta::cpp2::IndexItem> index_; +}; + +// Definition of UDL +std::vector operator""_row(const char* str, std::size_t len) { + auto ret = RowParser(std::string(str, len)).getResult(); + return ret; +} +std::shared_ptr<::nebula::meta::NebulaSchemaProvider> operator"" _schema(const char* str, + std::size_t) { + return SchemaParser(std::string(str)).getResult(); +} + +IndexParser operator"" _index(const char* str, std::size_t) { + return IndexParser(std::string(str)); +} + +} // namespace storage +} // namespace nebula diff --git a/src/storage/test/LookupIndexTest.cpp b/src/storage/test/LookupIndexTest.cpp index 2740e72d9bd..2c883faddff 100644 --- a/src/storage/test/LookupIndexTest.cpp +++ b/src/storage/test/LookupIndexTest.cpp @@ -9,6 +9,10 @@ #include "codec/RowWriterV2.h" #include "codec/test/RowWriterV1.h" #include "common/base/Base.h" +#include "common/expression/ConstantExpression.h" +#include "common/expression/LogicalExpression.h" +#include "common/expression/PropertyExpression.h" +#include "common/expression/RelationalExpression.h" #include "common/fs/TempDir.h" #include "common/utils/IndexKeyUtils.h" #include "interface/gen-cpp2/common_types.h" diff --git a/src/tools/db-dump/CMakeLists.txt b/src/tools/db-dump/CMakeLists.txt index b04c86360cc..62f272621a0 100644 --- a/src/tools/db-dump/CMakeLists.txt +++ b/src/tools/db-dump/CMakeLists.txt @@ -46,6 +46,7 @@ set(tools_test_deps $ $ $ + $ $ ) diff --git a/src/tools/meta-dump/CMakeLists.txt b/src/tools/meta-dump/CMakeLists.txt index 8ba15751e2b..c5fa9d565d8 100644 --- a/src/tools/meta-dump/CMakeLists.txt +++ b/src/tools/meta-dump/CMakeLists.txt @@ -51,6 +51,7 @@ nebula_add_executable( $ $ $ + $ $ LIBRARIES ${ROCKSDB_LIBRARIES} diff --git a/tests/tck/features/lookup/LookUp.feature b/tests/tck/features/lookup/LookUp.feature index 6afca5f523f..ce6700e6697 100644 --- a/tests/tck/features/lookup/LookUp.feature +++ b/tests/tck/features/lookup/LookUp.feature @@ -840,7 +840,6 @@ Feature: LookUpTest_Vid_String "104":("yyy", 28), "105":("zzz", 21), "106":("kkk", 21), - "121":("Useless", 60), "121":("Useless", 20); INSERT VERTEX team(name)