From 256d9b24debf692ae806c96e836c6a4295e187ad Mon Sep 17 00:00:00 2001
From: liuyujia17 <liuyujia17@jd.com>
Date: Thu, 13 Oct 2022 20:30:19 +0800
Subject: [PATCH 1/7] Add SAMPLING clause, probability-based sample

---
 src/common/algorithm/sampler.h                | 185 ++++++++++++++++++
 src/graph/context/ast/CypherAstContext.h      |  11 +-
 src/graph/executor/CMakeLists.txt             |   1 +
 src/graph/executor/Executor.cpp               |   4 +
 src/graph/executor/query/SamplingExecutor.cpp | 131 +++++++++++++
 src/graph/executor/query/SamplingExecutor.h   |  30 +++
 src/graph/planner/CMakeLists.txt              |   1 +
 .../planner/match/SamplingClausePlanner.cpp   |  34 ++++
 .../planner/match/SamplingClausePlanner.h     |  24 +++
 src/graph/planner/plan/PlanNode.cpp           |   2 +
 src/graph/planner/plan/PlanNode.h             |   1 +
 src/graph/planner/plan/Query.cpp              |  23 +++
 src/graph/planner/plan/Query.h                |  52 +++++
 src/graph/service/PermissionCheck.cpp         |   1 +
 src/graph/validator/CMakeLists.txt            |   1 +
 src/graph/validator/MatchValidator.cpp        |  51 +++++
 src/graph/validator/MatchValidator.h          |   4 +
 src/graph/validator/SamplingValidator.cpp     |  85 ++++++++
 src/graph/validator/SamplingValidator.h       |  32 +++
 src/graph/validator/Validator.cpp             |   3 +
 src/parser/MatchSentence.cpp                  |  12 ++
 src/parser/MatchSentence.h                    |  22 +++
 src/parser/Sentence.h                         |   1 +
 src/parser/TraverseSentences.cpp              |  27 +++
 src/parser/TraverseSentences.h                |  77 ++++++++
 src/parser/parser.yy                          |  65 +++++-
 src/parser/scanner.lex                        |   3 +
 src/parser/test/ParserTest.cpp                |  13 ++
 src/parser/test/ScannerTest.cpp               |   6 +
 29 files changed, 892 insertions(+), 10 deletions(-)
 create mode 100644 src/common/algorithm/sampler.h
 create mode 100644 src/graph/executor/query/SamplingExecutor.cpp
 create mode 100644 src/graph/executor/query/SamplingExecutor.h
 create mode 100644 src/graph/planner/match/SamplingClausePlanner.cpp
 create mode 100644 src/graph/planner/match/SamplingClausePlanner.h
 create mode 100644 src/graph/validator/SamplingValidator.cpp
 create mode 100644 src/graph/validator/SamplingValidator.h

diff --git a/src/common/algorithm/sampler.h b/src/common/algorithm/sampler.h
new file mode 100644
index 00000000000..2ec6eacac1f
--- /dev/null
+++ b/src/common/algorithm/sampler.h
@@ -0,0 +1,185 @@
+/* Copyright (c) 2020 vesoft inc. All rights reserved.
+ *
+ * This source code is licensed under Apache 2.0 License.
+ */
+
+#ifndef COMMON_ALGORITHM_SAMPLER_H_
+#define COMMON_ALGORITHM_SAMPLER_H_
+
+#include <cfloat>
+#include <ctime>
+#include <random>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+namespace nebula {
+namespace algorithm {
+
+namespace {
+template <typename T = float>
+T UniformRandom() {
+  static_assert(std::is_floating_point<T>::value,
+                "Only support float point type");
+#if defined(__clang__)
+  static std::default_random_engine e(std::time(nullptr));
+  static std::uniform_real_distribution<T> u(0., 1.);
+#elif defined(__GNUC__) || defined(__GNUG__)
+  static thread_local std::default_random_engine e(std::time(nullptr));
+  static thread_local std::uniform_real_distribution<T> u(0., 1.);
+#endif
+  return u(e);
+}
+}  // namespace
+
+template <typename T>
+void Normalization(std::vector<T>& distribution) {
+  static_assert(std::is_floating_point<T>::value,
+                "Only support float point type");
+  T norm_sum = 0.0f;
+  for (auto& dist : distribution) {
+    norm_sum += dist;
+  }
+  if (norm_sum <= FLT_EPSILON and !distribution.empty()) {
+    for (size_t i = 0; i < distribution.size(); ++i) {
+      distribution[i] = 1.0f / static_cast<T>(distribution.size());
+    }
+    return;
+  }
+  for (size_t i = 0; i < distribution.size(); ++i) {
+    distribution[i] /= norm_sum;
+  }
+}
+
+// https://en.wikipedia.org/wiki/Alias_method
+template <typename T = float>
+class AliasSampler {
+ public:
+  static_assert(std::is_floating_point<T>::value,
+                "Only support float point type");
+  using AliasType = uint32_t;
+  bool Init(std::vector<T>& distribution);
+  inline bool Init(const std::vector<T>& distribution);
+  AliasType Sample() const;
+  inline size_t Size() const;
+
+ private:
+  std::vector<T> prob_;
+  std::vector<AliasType> alias_;
+};
+
+template <typename T>
+bool AliasSampler<T>::Init(std::vector<T>& distribution) {
+  // normalization sum of distribution to 1
+  Normalization(distribution);
+
+  prob_.resize(distribution.size());
+  alias_.resize(distribution.size());
+  std::vector<AliasType> smaller, larger;
+  smaller.reserve(distribution.size());
+  larger.reserve(distribution.size());
+
+  for (size_t i = 0; i < distribution.size(); ++i) {
+    prob_[i] = distribution[i] * distribution.size();
+    if (prob_[i] < 1.0) {
+      smaller.push_back(i);
+    } else {
+      larger.push_back(i);
+    }
+  }
+  // Construct the probability and alias tables
+  AliasType small, large;
+  while (!smaller.empty() && !larger.empty()) {
+    small = smaller.back();
+    smaller.pop_back();
+    large = larger.back();
+    larger.pop_back();
+    alias_[small] = large;
+    prob_[large] = prob_[large] + prob_[small] - 1.0;
+    if (prob_[large] < 1.0) {
+      smaller.push_back(large);
+    } else {
+      larger.push_back(large);
+    }
+  }
+  while (!smaller.empty()) {
+    small = smaller.back();
+    smaller.pop_back();
+    prob_[small] = 1.0;
+  }
+  while (!larger.empty()) {
+    large = larger.back();
+    larger.pop_back();
+    prob_[large] = 1.0;
+  }
+  return true;
+}
+
+template <typename T>
+bool AliasSampler<T>::Init(const std::vector<T>& distribution) {
+  std::vector<T> dist = distribution;
+  return Init(dist);
+}
+
+template <typename T>
+typename AliasSampler<T>::AliasType AliasSampler<T>::Sample() const {
+  AliasType roll = floor(prob_.size() * UniformRandom());
+  bool coin = UniformRandom() < prob_[roll];
+  return coin ? roll : alias_[roll];
+}
+
+template <typename T>
+size_t AliasSampler<T>::Size() const {
+  return prob_.size();
+}
+
+/**
+ * binary sample in accumulation weights
+ */
+template <typename T = float>
+size_t BinarySampleAcc(const std::vector<T>& accumulate_weights) {
+  if (accumulate_weights.empty()) {
+    return 0;
+  }
+  T rnd = UniformRandom() * accumulate_weights.back();
+  size_t low = 0, high = accumulate_weights.size() - 1, mid = 0;
+  while (low <= high) {
+    mid = ((high - low) >> 1) + low;
+    if (rnd < accumulate_weights[mid]) {
+      if (mid == 0) {
+        return mid;
+      }
+      high = mid - 1;
+      if (high >= 0 && rnd >= accumulate_weights[high]) {
+        // rnd in [mid-1, mid)
+        return mid;
+      }
+    } else {
+      low = mid + 1;
+      if (low < accumulate_weights.size() && rnd < accumulate_weights[low]) {
+        // rnd in [mid, mid+1)
+        return low;
+      }
+    }
+  }
+  return mid;
+}
+
+/**
+ * binary sample in weights
+ */
+template <typename T = float>
+size_t BinarySample(const std::vector<T>& weights) {
+  std::vector<T> accumulate_weights(weights.size(), 0.0f);
+  T cur_weight = 0.0f;
+  for (size_t i = 0; i < weights.size(); ++i) {
+    cur_weight += weights[i];
+    accumulate_weights[i] = cur_weight;
+  }
+  Normalization(accumulate_weights);
+  return BinarySampleAcc(accumulate_weights);
+}
+
+}  // namespace algorithm
+}  // namespace nebula
+#endif
diff --git a/src/graph/context/ast/CypherAstContext.h b/src/graph/context/ast/CypherAstContext.h
index 3a73cee3fd0..8f6a8c3819e 100644
--- a/src/graph/context/ast/CypherAstContext.h
+++ b/src/graph/context/ast/CypherAstContext.h
@@ -12,7 +12,7 @@
 #include "common/expression/PathBuildExpression.h"
 #include "graph/context/ast/AstContext.h"
 #include "parser/MatchSentence.h"
-
+#include "graph/planner/plan/Query.h"
 namespace nebula {
 namespace graph {
 enum class CypherClauseKind : uint8_t {
@@ -22,6 +22,7 @@ enum class CypherClauseKind : uint8_t {
   kWhere,
   kReturn,
   kOrderBy,
+  kSampling,
   kPagination,
   kYield,
   kShortestPath,
@@ -113,6 +114,12 @@ struct OrderByClauseContext final : CypherClauseContextBase {
   std::vector<std::pair<size_t, OrderFactor::OrderType>> indexedOrderFactors;
 };
 
+struct SamplingClauseContext final : CypherClauseContextBase {
+  SamplingClauseContext() : CypherClauseContextBase(CypherClauseKind::kSampling) {}
+
+  std::vector<SamplingParams> indexedSamplingFactors;
+};
+
 struct PaginationContext final : CypherClauseContextBase {
   PaginationContext() : CypherClauseContextBase(CypherClauseKind::kPagination) {}
 
@@ -148,6 +155,7 @@ struct YieldClauseContext final : CypherClauseContextBase {
 struct ReturnClauseContext final : CypherClauseContextBase {
   ReturnClauseContext() : CypherClauseContextBase(CypherClauseKind::kReturn) {}
 
+  std::unique_ptr<SamplingClauseContext> sampling;
   std::unique_ptr<OrderByClauseContext> order;
   std::unique_ptr<PaginationContext> pagination;
   std::unique_ptr<YieldClauseContext> yield;
@@ -156,6 +164,7 @@ struct ReturnClauseContext final : CypherClauseContextBase {
 struct WithClauseContext final : CypherClauseContextBase {
   WithClauseContext() : CypherClauseContextBase(CypherClauseKind::kWith) {}
 
+  std::unique_ptr<SamplingClauseContext> sampling;
   std::unique_ptr<OrderByClauseContext> order;
   std::unique_ptr<PaginationContext> pagination;
   std::unique_ptr<WhereClauseContext> where;
diff --git a/src/graph/executor/CMakeLists.txt b/src/graph/executor/CMakeLists.txt
index d33f3b6afa1..63727bc5341 100644
--- a/src/graph/executor/CMakeLists.txt
+++ b/src/graph/executor/CMakeLists.txt
@@ -25,6 +25,7 @@ nebula_add_library(
     query/UnwindExecutor.cpp
     query/SortExecutor.cpp
     query/TopNExecutor.cpp
+    query/SamplingExecutor.cpp
     query/IndexScanExecutor.cpp
     query/SetExecutor.cpp
     query/UnionExecutor.cpp
diff --git a/src/graph/executor/Executor.cpp b/src/graph/executor/Executor.cpp
index ebf89a4b5d0..97067e18ca6 100644
--- a/src/graph/executor/Executor.cpp
+++ b/src/graph/executor/Executor.cpp
@@ -87,6 +87,7 @@
 #include "graph/executor/query/ScanVerticesExecutor.h"
 #include "graph/executor/query/SortExecutor.h"
 #include "graph/executor/query/TopNExecutor.h"
+#include "graph/executor/query/SamplingExecutor.h"
 #include "graph/executor/query/TraverseExecutor.h"
 #include "graph/executor/query/UnionAllVersionVarExecutor.h"
 #include "graph/executor/query/UnionExecutor.h"
@@ -177,6 +178,9 @@ Executor *Executor::makeExecutor(QueryContext *qctx, const PlanNode *node) {
     case PlanNode::Kind::kTopN: {
       return pool->makeAndAdd<TopNExecutor>(node, qctx);
     }
+    case PlanNode::Kind::kSampling: {
+      return pool->makeAndAdd<SamplingExecutor>(node, qctx);
+    }
     case PlanNode::Kind::kFilter: {
       return pool->makeAndAdd<FilterExecutor>(node, qctx);
     }
diff --git a/src/graph/executor/query/SamplingExecutor.cpp b/src/graph/executor/query/SamplingExecutor.cpp
new file mode 100644
index 00000000000..ebcb1935ad9
--- /dev/null
+++ b/src/graph/executor/query/SamplingExecutor.cpp
@@ -0,0 +1,131 @@
+// Copyright (c) 2020 vesoft inc. All rights reserved.
+//
+// This source code is licensed under Apache 2.0 License.
+
+#include "graph/executor/query/SamplingExecutor.h"
+
+#include "common/algorithm/Sampler.h"
+#include "graph/planner/plan/Query.h"
+
+namespace nebula {
+namespace graph {
+
+using WeightType = float;
+
+folly::Future<Status> SamplingExecutor::execute() {
+  SCOPED_TIMER(&execTime_);
+  auto *sampling = asNode<Sampling>(node());
+  Result result = ectx_->getResult(sampling->inputVar());
+  auto *iter = result.iterRef();
+  if (UNLIKELY(iter == nullptr)) {
+    return Status::Error(
+        "Internal error: nullptr iterator in sampling executor");
+  }
+  if (UNLIKELY(!result.iter()->isSequentialIter())) {
+    std::stringstream ss;
+    ss << "Internal error: Sampling executor does not supported "
+       << iter->kind();
+    return Status::Error(ss.str());
+  }
+  auto &factors = sampling->factors();
+  auto size = iter->size();
+  if (size <= 0) {
+    iter->clear();
+    return finish(ResultBuilder()
+                      .value(result.valuePtr())
+                      .iter(std::move(result).iter())
+                      .build());
+  }
+  auto colNames = result.value().getDataSet().colNames;
+  DataSet dataset(std::move(colNames));
+  for (auto factor : factors) {
+    if (factor.count <= 0) {
+      iter->clear();
+      return finish(ResultBuilder()
+                        .value(result.valuePtr())
+                        .iter(std::move(result).iter())
+                        .build());
+    }
+    if (factor.samplingType == SamplingFactor::SamplingType::BINARY) {
+      executeBinarySample<SequentialIter>(iter, factor.colIdx, factor.count,
+                                          dataset);
+    } else {
+      executeAliasSample<SequentialIter>(iter, factor.colIdx, factor.count,
+                                         dataset);
+    }
+  }
+  return finish(ResultBuilder()
+                    .value(Value(std::move(dataset)))
+                    .iter(Iterator::Kind::kSequential)
+                    .build());
+}
+
+template <typename U>
+void SamplingExecutor::executeBinarySample(Iterator *iter, size_t index,
+                                           size_t count, DataSet &list) {
+  auto uIter = static_cast<U *>(iter);
+  std::vector<WeightType> accumulate_weights;
+  auto it = uIter->begin();
+  WeightType v;
+  while (it != uIter->end()) {
+    v = 1.0;
+    if ((*it)[index].type() == Value::Type::NULLVALUE) {
+      LOG(WARNING) << "Sampling type is nullvalue";
+    } else if ((*it)[index].type() == Value::Type::FLOAT) {
+      v = (float)((*it)[index].getFloat());
+    } else if ((*it)[index].type() == Value::Type::INT) {
+      v = (float)((*it)[index].getInt());
+    } else {
+      LOG(WARNING) << "Sampling type is wrong, must be int or float.";
+    }
+    if (!accumulate_weights.empty()) {
+      v += accumulate_weights.back();
+    }
+    accumulate_weights.emplace_back(std::move(v));
+    ++it;
+  }
+  nebula::algorithm::Normalization<WeightType>(accumulate_weights);
+  auto beg = uIter->begin();
+  for (size_t i = 0; i < count; ++i) {
+    auto idx =
+        nebula::algorithm::BinarySampleAcc<WeightType>(accumulate_weights);
+    list.emplace_back(*(beg + idx));
+  }
+  uIter->clear();
+}
+
+template <typename U>
+void SamplingExecutor::executeAliasSample(Iterator *iter, size_t index,
+                                          size_t count, DataSet &list) {
+  auto uIter = static_cast<U *>(iter);
+  std::vector<WeightType> weights;
+  auto it = uIter->begin();
+  WeightType v;
+  while (it != uIter->end()) {
+    v = 1.0;
+    if ((*it)[index].type() == Value::Type::NULLVALUE) {
+      LOG(WARNING) << "Sampling type is nullvalue";
+
+    } else if ((*it)[index].type() == Value::Type::FLOAT) {
+      v = (float)((*it)[index].getFloat());
+    } else if ((*it)[index].type() == Value::Type::INT) {
+      v = (float)((*it)[index].getInt());
+    } else {
+      LOG(WARNING) << "Sampling type is wrong, must be int or float.";
+    }
+    LOG(ERROR) << "lyj debug v:" << v;
+    weights.emplace_back(std::move(v));
+    ++it;
+  }
+  nebula::algorithm::AliasSampler<WeightType> sampler_;
+  sampler_.Init(weights);
+  auto beg = uIter->begin();
+  for (size_t i = 0; i < count; ++i) {
+    auto idx = sampler_.Sample();
+    list.emplace_back(*(beg + idx));
+  }
+  uIter->clear();
+}
+
+}  // namespace graph
+}  // namespace nebula
diff --git a/src/graph/executor/query/SamplingExecutor.h b/src/graph/executor/query/SamplingExecutor.h
new file mode 100644
index 00000000000..8e42ca10d61
--- /dev/null
+++ b/src/graph/executor/query/SamplingExecutor.h
@@ -0,0 +1,30 @@
+// Copyright (c) 2020 vesoft inc. All rights reserved.
+//
+// This source code is licensed under Apache 2.0 License.
+
+#ifndef GRAPH_EXECUTOR_QUERY_SAMPLINGEXECUTOR_H_
+#define GRAPH_EXECUTOR_QUERY_SAMPLINGEXECUTOR_H_
+
+#include "graph/executor/Executor.h"
+namespace nebula {
+namespace graph {
+
+class SamplingExecutor final : public Executor {
+ public:
+  SamplingExecutor(const PlanNode *node, QueryContext *qctx)
+      : Executor("SamplingExecutor", node, qctx) {}
+
+  folly::Future<Status> execute() override;
+
+ private:
+  template <typename U>
+  void executeBinarySample(Iterator *iter, size_t index, size_t count,
+                           DataSet &list);
+  template <typename U>
+  void executeAliasSample(Iterator *iter, size_t index, size_t count,
+                          DataSet &list);
+};
+
+}  // namespace graph
+}  // namespace nebula
+#endif  // GRAPH_EXECUTOR_QUERY_SAMPLINGEXECUTOR_H_
diff --git a/src/graph/planner/CMakeLists.txt b/src/graph/planner/CMakeLists.txt
index e8b0340130d..cb8135805ed 100644
--- a/src/graph/planner/CMakeLists.txt
+++ b/src/graph/planner/CMakeLists.txt
@@ -14,6 +14,7 @@ nebula_add_library(
     match/UnwindClausePlanner.cpp
     match/ReturnClausePlanner.cpp
     match/OrderByClausePlanner.cpp
+    match/SamplingClausePlanner.cpp
     match/YieldClausePlanner.cpp
     match/PaginationPlanner.cpp
     match/WhereClausePlanner.cpp
diff --git a/src/graph/planner/match/SamplingClausePlanner.cpp b/src/graph/planner/match/SamplingClausePlanner.cpp
new file mode 100644
index 00000000000..679e8068fdc
--- /dev/null
+++ b/src/graph/planner/match/SamplingClausePlanner.cpp
@@ -0,0 +1,34 @@
+/* Copyright (c) 2020 vesoft inc. All rights reserved.
+ *
+ * This source code is licensed under Apache 2.0 License.
+ */
+
+#include "graph/planner/match/SamplingClausePlanner.h"
+
+#include "graph/planner/plan/Query.h"
+
+namespace nebula {
+namespace graph {
+StatusOr<SubPlan> SamplingClausePlanner::transform(
+    CypherClauseContextBase* clauseCtx) {
+  if (clauseCtx->kind != CypherClauseKind::kSampling) {
+    return Status::Error("Not a valid context for SamplingClausePlanner.");
+  }
+  auto* samplingCtx = static_cast<SamplingClauseContext*>(clauseCtx);
+
+  SubPlan samplingPlan;
+  NG_RETURN_IF_ERROR(buildSampling(samplingCtx, samplingPlan));
+  return samplingPlan;
+}
+
+Status SamplingClausePlanner::buildSampling(SamplingClauseContext* octx,
+                                            SubPlan& subplan) {
+  auto* currentRoot = subplan.root;
+  auto* sampling =
+      Sampling::make(octx->qctx, currentRoot, octx->indexedSamplingFactors);
+  subplan.root = sampling;
+  subplan.tail = sampling;
+  return Status::OK();
+}
+}  // namespace graph
+}  // namespace nebula
diff --git a/src/graph/planner/match/SamplingClausePlanner.h b/src/graph/planner/match/SamplingClausePlanner.h
new file mode 100644
index 00000000000..fb4eeb9d16d
--- /dev/null
+++ b/src/graph/planner/match/SamplingClausePlanner.h
@@ -0,0 +1,24 @@
+/* Copyright (c) 2020 vesoft inc. All rights reserved.
+ *
+ * This source code is licensed under Apache 2.0 License.
+ */
+
+#ifndef GRAPH_PLANNER_MATCH_ORDERBYCLAUSEPLANNER_H_
+#define GRAPH_PLANNER_MATCH_ORDERBYCLAUSEPLANNER_H_
+
+#include "graph/planner/match/CypherClausePlanner.h"
+
+namespace nebula {
+namespace graph {
+// The SamplingClausePlanner generates plan for order by clause;
+class SamplingClausePlanner final : public CypherClausePlanner {
+ public:
+  SamplingClausePlanner() = default;
+
+  StatusOr<SubPlan> transform(CypherClauseContextBase* clauseCtx) override;
+
+  Status buildSampling(SamplingClauseContext* octx, SubPlan& subplan);
+};
+}  // namespace graph
+}  // namespace nebula
+#endif  // GRAPH_PLANNER_MATCH_ORDERBYCLAUSEPLANNER_H_
diff --git a/src/graph/planner/plan/PlanNode.cpp b/src/graph/planner/plan/PlanNode.cpp
index 9e8dc0b0228..e983696c24b 100644
--- a/src/graph/planner/plan/PlanNode.cpp
+++ b/src/graph/planner/plan/PlanNode.cpp
@@ -83,6 +83,8 @@ const char* PlanNode::toString(PlanNode::Kind kind) {
       return "Limit";
     case Kind::kSample:
       return "Sample";
+    case Kind::kSampling:
+      return "Sampling";
     case Kind::kAggregate:
       return "Aggregate";
     case Kind::kSelect:
diff --git a/src/graph/planner/plan/PlanNode.h b/src/graph/planner/plan/PlanNode.h
index 8ee84207edd..80d1e3bcf00 100644
--- a/src/graph/planner/plan/PlanNode.h
+++ b/src/graph/planner/plan/PlanNode.h
@@ -54,6 +54,7 @@ class PlanNode {
     kTopN,
     kLimit,
     kSample,
+    kSampling,
     kAggregate,
     kDedup,
     kAssign,
diff --git a/src/graph/planner/plan/Query.cpp b/src/graph/planner/plan/Query.cpp
index 127e2ce4dea..4e06f906717 100644
--- a/src/graph/planner/plan/Query.cpp
+++ b/src/graph/planner/plan/Query.cpp
@@ -435,6 +435,29 @@ void Sort::cloneMembers(const Sort& p) {
   factors_ = std::move(factors);
 }
 
+std::unique_ptr<PlanNodeDescription> Sampling::explain() const {
+  auto desc = SingleInputNode::explain();
+  addDescription("factors", folly::toJson(util::toJson(factorsString())),
+                 desc.get());
+  return desc;
+}
+
+PlanNode* Sampling::clone() const {
+  auto* newSampling = Sampling::make(qctx_, nullptr);
+  newSampling->cloneMembers(*this);
+  return newSampling;
+}
+
+void Sampling::cloneMembers(const Sampling& p) {
+  SingleInputNode::cloneMembers(p);
+
+  std::vector<SamplingParams> factors;
+  for (const auto& factor : p.factors()) {
+    factors.emplace_back(factor);
+  }
+  factors_ = std::move(factors);
+}
+
 // Get constant count value
 int64_t Limit::count(QueryContext* qctx) const {
   if (count_ == nullptr) {
diff --git a/src/graph/planner/plan/Query.h b/src/graph/planner/plan/Query.h
index 42b8b9692fc..60b13188df4 100644
--- a/src/graph/planner/plan/Query.h
+++ b/src/graph/planner/plan/Query.h
@@ -965,6 +965,58 @@ class Sort final : public SingleInputNode {
   std::vector<std::pair<size_t, OrderFactor::OrderType>> factors_;
 };
 
+struct SamplingParams {
+  size_t colIdx;
+  size_t count;
+  SamplingFactor::SamplingType samplingType;
+
+  SamplingParams() = default;
+  SamplingParams(size_t col_idx, size_t c, SamplingFactor::SamplingType st)
+      : colIdx(col_idx), count(c), samplingType(st) {}
+};
+
+// Sampling the given record set.
+class Sampling final : public SingleInputNode {
+ public:
+  static Sampling* make(QueryContext* qctx, PlanNode* input,
+                        std::vector<SamplingParams> factors = {}) {
+    return qctx->objPool()->makeAndAdd<Sampling>(qctx, input,
+                                                 std::move(factors));
+  }
+
+  const std::vector<SamplingParams>& factors() const { return factors_; }
+
+  PlanNode* clone() const override;
+  std::unique_ptr<PlanNodeDescription> explain() const override;
+
+ private:
+  friend ObjectPool;
+  Sampling(QueryContext* qctx, PlanNode* input,
+           std::vector<SamplingParams> factors)
+      : SingleInputNode(qctx, Kind::kSampling, input) {
+    factors_ = std::move(factors);
+  }
+
+  std::vector<std::vector<std::string>> factorsString() const {
+    auto cols = colNames();
+    std::vector<std::vector<std::string>> result;
+    for (auto& factor : factors_) {
+      std::string colName = cols[factor.colIdx];
+      std::string order =
+          factor.samplingType == SamplingFactor::SamplingType::BINARY ? "BINARY"
+                                                                      : "ALIAS";
+      std::vector temp = {colName, std::to_string(factor.count), order};
+      result.emplace_back(temp);
+    }
+    return result;
+  }
+
+  void cloneMembers(const Sampling&);
+
+ private:
+  std::vector<SamplingParams> factors_;
+};
+
 // Output the records with the given limitation.
 class Limit final : public SingleInputNode {
  public:
diff --git a/src/graph/service/PermissionCheck.cpp b/src/graph/service/PermissionCheck.cpp
index da0dd3789dd..22725be91cd 100644
--- a/src/graph/service/PermissionCheck.cpp
+++ b/src/graph/service/PermissionCheck.cpp
@@ -125,6 +125,7 @@ namespace graph {
     case Sentence::Kind::kLookup:
     case Sentence::Kind::kYield:
     case Sentence::Kind::kOrderBy:
+    case Sentence::Kind::kSampling:
     case Sentence::Kind::kFetchVertices:
     case Sentence::Kind::kFetchEdges:
     case Sentence::Kind::kFindPath:
diff --git a/src/graph/validator/CMakeLists.txt b/src/graph/validator/CMakeLists.txt
index 9c8c7f66fbf..f1f22b0aeec 100644
--- a/src/graph/validator/CMakeLists.txt
+++ b/src/graph/validator/CMakeLists.txt
@@ -24,6 +24,7 @@ nebula_add_library(
     YieldValidator.cpp
     ExplainValidator.cpp
     GroupByValidator.cpp
+    SamplingValidator.cpp
     FindPathValidator.cpp
     LookupValidator.cpp
     MatchValidator.cpp
diff --git a/src/graph/validator/MatchValidator.cpp b/src/graph/validator/MatchValidator.cpp
index 8fe2f611d45..9a60aed8b30 100644
--- a/src/graph/validator/MatchValidator.cpp
+++ b/src/graph/validator/MatchValidator.cpp
@@ -443,6 +443,14 @@ Status MatchValidator::validateReturn(MatchReturn *ret,
   NG_RETURN_IF_ERROR(validatePagination(ret->skip(), ret->limit(), *paginationCtx));
   retClauseCtx.pagination = std::move(paginationCtx);
 
+  if (ret->samplingFactors() != nullptr) {
+    auto samplingCtx = getContext<SamplingClauseContext>();
+    NG_RETURN_IF_ERROR(validateSampling(ret->samplingFactors(),
+                                        retClauseCtx.yield->yieldColumns,
+                                        *samplingCtx));
+    retClauseCtx.sampling = std::move(samplingCtx);
+  }
+
   if (ret->orderFactors() != nullptr) {
     auto orderByCtx = getContext<OrderByClauseContext>();
     NG_RETURN_IF_ERROR(
@@ -786,6 +794,49 @@ Status MatchValidator::validateOrderBy(const OrderFactors *factors,
   return Status::OK();
 }
 
+// Check validity of order by options.
+// Disable duplicate columns,
+// check expression of column (only constant expression and label expression)
+Status MatchValidator::validateSampling(
+    const SamplingFactors *factors, const YieldColumns *yieldColumns,
+    SamplingClauseContext &samplingCtx) const {
+  if (factors != nullptr) {
+    std::vector<std::string> inputColList;
+    inputColList.reserve(yieldColumns->columns().size());
+    for (auto *col : yieldColumns->columns()) {
+      inputColList.emplace_back(col->name());
+    }
+    std::unordered_map<std::string, size_t> inputColIndices;
+    for (auto i = 0u; i < inputColList.size(); i++) {
+      if (!inputColIndices.emplace(inputColList[i], i).second) {
+        return Status::SemanticError("Duplicated columns not allowed: %s",
+                                     inputColList[i].c_str());
+      }
+    }
+
+    for (auto &factor : factors->factors()) {
+      if (factor->count() < 0) {
+        return Status::SemanticError("Sampling count");
+      }
+      auto factorExpr = factor->expr();
+      if (ExpressionUtils::isEvaluableExpr(factorExpr, qctx_)) continue;
+      if (factorExpr->kind() != Expression::Kind::kLabel) {
+        return Status::SemanticError(
+            "Only column name can be used as sort item");
+      }
+      auto &name = static_cast<const LabelExpression *>(factor->expr())->name();
+      auto iter = inputColIndices.find(name);
+      if (iter == inputColIndices.end()) {
+        return Status::SemanticError("Column `%s' not found", name.c_str());
+      }
+      samplingCtx.indexedSamplingFactors.emplace_back(SamplingParams(
+          iter->second, factor->count(), factor->samplingType()));
+    }
+  }
+
+  return Status::OK();
+}
+
 // Validate group by and fill group by context.
 Status MatchValidator::validateGroup(YieldClauseContext &yieldCtx) {
   auto cols = yieldCtx.yieldColumns->columns();
diff --git a/src/graph/validator/MatchValidator.h b/src/graph/validator/MatchValidator.h
index b33feede7dc..46416c87270 100644
--- a/src/graph/validator/MatchValidator.h
+++ b/src/graph/validator/MatchValidator.h
@@ -54,6 +54,10 @@ class MatchValidator final : public Validator {
                          const YieldColumns *yieldColumns,
                          OrderByClauseContext &orderByCtx) const;
 
+  Status validateSampling(const SamplingFactors *factors,
+                          const YieldColumns *yieldColumns,
+                          SamplingClauseContext &samplingCtx) const;
+
   Status validateGroup(YieldClauseContext &yieldCtx);
 
   Status validateYield(YieldClauseContext &yieldCtx);
diff --git a/src/graph/validator/SamplingValidator.cpp b/src/graph/validator/SamplingValidator.cpp
new file mode 100644
index 00000000000..a6841c0c1d8
--- /dev/null
+++ b/src/graph/validator/SamplingValidator.cpp
@@ -0,0 +1,85 @@
+/* Copyright (c) 2020 vesoft inc. All rights reserved.
+ *
+ * This source code is licensed under Apache 2.0 License.
+ */
+
+#include "graph/validator/SamplingValidator.h"
+
+#include "parser/TraverseSentences.h"
+
+namespace nebula {
+namespace graph {
+Status SamplingValidator::validateImpl() {
+  auto sentence = static_cast<SamplingSentence *>(sentence_);
+  auto &factors = sentence->factors();
+  // Check expression type, collect properties, fill index of order by column in
+  // input columns.
+  for (auto &factor : factors) {
+    if (factor->count() < 0) {
+      return Status::SyntaxError("sampling `%ld' is illegal", factor->count());
+    }
+    if (factor->expr()->kind() == Expression::Kind::kInputProperty) {
+      auto expr = static_cast<InputPropertyExpression *>(factor->expr());
+      NG_RETURN_IF_ERROR(deduceExprType(expr));
+      NG_RETURN_IF_ERROR(deduceProps(expr, exprProps_));
+      const auto &cols = inputCols();
+      auto &name = expr->prop();
+      auto eq = [&](const ColDef &col) { return col.name == name; };
+      auto iter = std::find_if(cols.cbegin(), cols.cend(), eq);
+      size_t colIdx = std::distance(cols.cbegin(), iter);
+      colSamplingTypes_.emplace_back(
+          SamplingParams(colIdx, factor->count(), factor->samplingType()));
+    } else if (factor->expr()->kind() == Expression::Kind::kVarProperty) {
+      auto expr = static_cast<VariablePropertyExpression *>(factor->expr());
+      NG_RETURN_IF_ERROR(deduceExprType(expr));
+      NG_RETURN_IF_ERROR(deduceProps(expr, exprProps_));
+      const auto &cols = vctx_->getVar(expr->sym());
+      auto &name = expr->prop();
+      auto eq = [&](const ColDef &col) { return col.name == name; };
+      auto iter = std::find_if(cols.cbegin(), cols.cend(), eq);
+      size_t colIdx = std::distance(cols.cbegin(), iter);
+      colSamplingTypes_.emplace_back(
+          SamplingParams(colIdx, factor->count(), factor->samplingType()));
+    } else {
+      return Status::SemanticError("Order by with invalid expression `%s'",
+                                   factor->expr()->toString().c_str());
+    }
+  }
+
+  // only one Input/Variable is ok.
+  if (!exprProps_.inputProps().empty() && !exprProps_.varProps().empty()) {
+    return Status::SemanticError("Not support both input and variable.");
+  } else if (!exprProps_.inputProps().empty()) {
+    outputs_ = inputCols();
+  } else if (!exprProps_.varProps().empty()) {
+    if (!userDefinedVarNameList_.empty()) {
+      if (userDefinedVarNameList_.size() != 1) {
+        return Status::SemanticError(
+            "Multiple user defined vars are not supported yet.");
+      }
+      userDefinedVarName_ = *userDefinedVarNameList_.begin();
+      outputs_ = vctx_->getVar(userDefinedVarName_);
+    }
+  }
+
+  return Status::OK();
+}
+
+Status SamplingValidator::toPlan() {
+  auto *plan = qctx_->plan();
+  auto *samplingNode =
+      Sampling::make(qctx_, plan->root(), std::move(colSamplingTypes_));
+  std::vector<std::string> colNames;
+  for (auto &col : outputs_) {
+    colNames.emplace_back(col.name);
+  }
+  samplingNode->setColNames(std::move(colNames));
+  if (!userDefinedVarName_.empty()) {
+    samplingNode->setInputVar(userDefinedVarName_);
+  }
+  root_ = samplingNode;
+  tail_ = root_;
+  return Status::OK();
+}
+}  // namespace graph
+}  // namespace nebula
diff --git a/src/graph/validator/SamplingValidator.h b/src/graph/validator/SamplingValidator.h
new file mode 100644
index 00000000000..c7d732f630b
--- /dev/null
+++ b/src/graph/validator/SamplingValidator.h
@@ -0,0 +1,32 @@
+/* Copyright (c) 2020 vesoft inc. All rights reserved.
+ *
+ * This source code is licensed under Apache 2.0 License.
+ */
+
+#ifndef GRAPH_VALIDATOR_SAMPLINGVALIDATOR_H_
+#define GRAPH_VALIDATOR_SAMPLINGVALIDATOR_H_
+
+#include "graph/planner/plan/Query.h"
+#include "graph/validator/Validator.h"
+
+namespace nebula {
+namespace graph {
+class SamplingValidator final : public Validator {
+ public:
+  SamplingValidator(Sentence* sentence, QueryContext* context)
+      : Validator(sentence, context) {
+    setNoSpaceRequired();
+  }
+
+ private:
+  Status validateImpl() override;
+
+  Status toPlan() override;
+
+ private:
+  std::vector<SamplingParams> colSamplingTypes_;
+  std::string userDefinedVarName_;
+};
+}  // namespace graph
+}  // namespace nebula
+#endif  // GRAPH_VALIDATOR_SAMPLINGVALIDATOR_H_
diff --git a/src/graph/validator/Validator.cpp b/src/graph/validator/Validator.cpp
index a98dde58ad6..292a90fbe5b 100644
--- a/src/graph/validator/Validator.cpp
+++ b/src/graph/validator/Validator.cpp
@@ -29,6 +29,7 @@
 #include "graph/validator/MatchValidator.h"
 #include "graph/validator/MutateValidator.h"
 #include "graph/validator/OrderByValidator.h"
+#include "graph/validator/SamplingValidator.h"
 #include "graph/validator/PipeValidator.h"
 #include "graph/validator/ReportError.h"
 #include "graph/validator/SequentialValidator.h"
@@ -72,6 +73,8 @@ std::unique_ptr<Validator> Validator::makeValidator(Sentence* sentence, QueryCon
       return std::make_unique<LimitValidator>(sentence, context);
     case Sentence::Kind::kOrderBy:
       return std::make_unique<OrderByValidator>(sentence, context);
+    case Sentence::Kind::kSampling:
+      return std::make_unique<SamplingValidator>(sentence, context);
     case Sentence::Kind::kYield:
       return std::make_unique<YieldValidator>(sentence, context);
     case Sentence::Kind::kGroupBy:
diff --git a/src/parser/MatchSentence.cpp b/src/parser/MatchSentence.cpp
index 9ff07e88d0d..e06b2d00cfe 100644
--- a/src/parser/MatchSentence.cpp
+++ b/src/parser/MatchSentence.cpp
@@ -48,6 +48,12 @@ std::string WithClause::toString() const {
 
   buf += returnItems_->toString();
 
+  if (samplingFactors_ != nullptr) {
+    buf += " ";
+    buf += "SAMPLING ";
+    buf += samplingFactors_->toString();
+  }
+
   if (orderFactors_ != nullptr) {
     buf += " ";
     buf += "ORDER BY ";
@@ -101,6 +107,12 @@ std::string MatchReturn::toString() const {
 
   buf += returnItems_->toString();
 
+  if (samplingFactors_ != nullptr) {
+    buf += " ";
+    buf += "SAMPLING ";
+    buf += samplingFactors_->toString();
+  }
+
   if (orderFactors_ != nullptr) {
     buf += " ";
     buf += "ORDER BY ";
diff --git a/src/parser/MatchSentence.h b/src/parser/MatchSentence.h
index 239c52c991e..a3563a0ddbc 100644
--- a/src/parser/MatchSentence.h
+++ b/src/parser/MatchSentence.h
@@ -61,12 +61,14 @@ class MatchReturnItems final {
 class MatchReturn final {
  public:
   MatchReturn(MatchReturnItems* returnItems = nullptr,
+              SamplingFactors* samplingFactors = nullptr,
               OrderFactors* orderFactors = nullptr,
               Expression* skip = nullptr,
               Expression* limit = nullptr,
               bool distinct = false) {
     returnItems_.reset(returnItems);
     orderFactors_.reset(orderFactors);
+    samplingFactors_.reset(samplingFactors);
     skip_ = skip;
     limit_ = limit;
     isDistinct_ = distinct;
@@ -100,12 +102,21 @@ class MatchReturn final {
     return orderFactors_.get();
   }
 
+  SamplingFactors* samplingFactors() {
+    return samplingFactors_.get();
+  }
+
+  const SamplingFactors* samplingFactors() const {
+    return samplingFactors_.get();
+  }
+
   std::string toString() const;
 
  private:
   std::unique_ptr<MatchReturnItems> returnItems_;
   bool isDistinct_{false};
   std::unique_ptr<OrderFactors> orderFactors_;
+  std::unique_ptr<SamplingFactors> samplingFactors_;
   Expression* skip_{nullptr};
   Expression* limit_{nullptr};
 };
@@ -210,6 +221,7 @@ class UnwindClause final : public ReadingClause {
 class WithClause final : public ReadingClause {
  public:
   explicit WithClause(MatchReturnItems* returnItems,
+                      SamplingFactors* samplingFactors = nullptr,
                       OrderFactors* orderFactors = nullptr,
                       Expression* skip = nullptr,
                       Expression* limit = nullptr,
@@ -218,6 +230,7 @@ class WithClause final : public ReadingClause {
       : ReadingClause(Kind::kWith) {
     returnItems_.reset(returnItems);
     orderFactors_.reset(orderFactors);
+    samplingFactors_.reset(samplingFactors);
     skip_ = skip;
     limit_ = limit;
     where_.reset(where);
@@ -240,6 +253,14 @@ class WithClause final : public ReadingClause {
     return orderFactors_.get();
   }
 
+  SamplingFactors* samplingFactors() {
+    return samplingFactors_.get();
+  }
+
+  const SamplingFactors* samplingFactors() const {
+    return samplingFactors_.get();
+  }
+
   Expression* skip() {
     return skip_;
   }
@@ -273,6 +294,7 @@ class WithClause final : public ReadingClause {
  private:
   std::unique_ptr<MatchReturnItems> returnItems_;
   std::unique_ptr<OrderFactors> orderFactors_;
+  std::unique_ptr<SamplingFactors> samplingFactors_;
   Expression* skip_{nullptr};
   Expression* limit_{nullptr};
   std::unique_ptr<WhereClause> where_;
diff --git a/src/parser/Sentence.h b/src/parser/Sentence.h
index 6afcde73784..7f310ddaf9d 100644
--- a/src/parser/Sentence.h
+++ b/src/parser/Sentence.h
@@ -98,6 +98,7 @@ class Sentence {
     kRevoke,
     kChangePassword,
     kOrderBy,
+    kSampling,
     kShowConfigs,
     kSetConfig,
     kGetConfig,
diff --git a/src/parser/TraverseSentences.cpp b/src/parser/TraverseSentences.cpp
index af1fd83ec47..ad8fab8815b 100644
--- a/src/parser/TraverseSentences.cpp
+++ b/src/parser/TraverseSentences.cpp
@@ -144,6 +144,33 @@ std::string OrderBySentence::toString() const {
   return folly::stringPrintf("ORDER BY %s", orderFactors_->toString().c_str());
 }
 
+std::string SamplingFactor::toString() const {
+  switch (sampling_type_) {
+    case BINARY:
+      return folly::stringPrintf("%s %ld BINARY,", expr_->toString().c_str(), count_);
+    case ALIAS:
+      return folly::stringPrintf("%s %ld ALIAS", expr_->toString().c_str(), count_);
+    default:
+      LOG(FATAL) << "Unknown Sampling Type: " << sampling_type_;
+  }
+}
+
+std::string SamplingFactors::toString() const {
+  std::string buf;
+  buf.reserve(256);
+  for (auto &factor : factors_) {
+    buf += factor->toString();
+  }
+  if (!buf.empty()) {
+    buf.resize(buf.size() - 1);
+  }
+  return buf;
+}
+
+std::string SamplingSentence::toString() const {
+  return folly::stringPrintf("SAMPLING %s", samplingFactors_->toString().c_str());
+}
+
 std::string FetchVerticesSentence::toString() const {
   std::string buf;
   buf.reserve(256);
diff --git a/src/parser/TraverseSentences.h b/src/parser/TraverseSentences.h
index b4ef8bb11f0..91f2448aa91 100644
--- a/src/parser/TraverseSentences.h
+++ b/src/parser/TraverseSentences.h
@@ -301,6 +301,83 @@ class OrderBySentence final : public Sentence {
   std::unique_ptr<OrderFactors> orderFactors_;
 };
 
+
+class SamplingFactor final {
+ public:
+  enum SamplingType : uint8_t { BINARY, ALIAS };
+
+  SamplingFactor(Expression* expr, int64_t count, SamplingType sp) {
+    expr_ = expr;
+    count_ = count;
+    sampling_type_ = sp;
+  }
+
+  Expression* expr() {
+    return expr_;
+  }
+
+  void setExpr(Expression* expr) {
+    expr_ = expr;
+  }
+
+  int64_t count() {
+    return count_;
+  }
+
+  SamplingType samplingType() {
+    return sampling_type_;
+  }
+
+  std::string toString() const;
+
+ private:
+  Expression* expr_{nullptr};
+  int64_t count_;
+  SamplingType sampling_type_;
+};
+
+class SamplingFactors final {
+ public:
+  void addFactor(SamplingFactor* factor) {
+    factors_.emplace_back(factor);
+  }
+
+  auto& factors() {
+    return factors_;
+  }
+
+  const auto& factors() const {
+    return factors_;
+  }
+
+  std::string toString() const;
+
+ private:
+  std::vector<std::unique_ptr<SamplingFactor>> factors_;
+};
+
+
+class SamplingSentence final : public Sentence {
+ public:
+  explicit SamplingSentence(SamplingFactors* factors) {
+    samplingFactors_.reset(factors);
+    kind_ = Kind::kSampling;
+  }
+
+  auto& factors() {
+    return samplingFactors_->factors();
+  }
+
+  const auto& factors() const {
+    return samplingFactors_->factors();
+  }
+
+  std::string toString() const override;
+
+ private:
+  std::unique_ptr<SamplingFactors> samplingFactors_;
+};
+
 class FetchVerticesSentence final : public Sentence {
  public:
   FetchVerticesSentence(NameLabelList* tags, VertexIDList* vidList, YieldClause* clause) {
diff --git a/src/parser/parser.yy b/src/parser/parser.yy
index bfd06536385..2249ce4fe5e 100644
--- a/src/parser/parser.yy
+++ b/src/parser/parser.yy
@@ -119,6 +119,8 @@ using namespace nebula;
     nebula::IndexParamItem                 *index_param_item;
     nebula::OrderFactor                    *order_factor;
     nebula::OrderFactors                   *order_factors;
+    nebula::SamplingFactor                 *sampling_factor;
+    nebula::SamplingFactors                *sampling_factors;
     nebula::meta::cpp2::ConfigModule        config_module;
     nebula::meta::cpp2::ListHostType       list_host_type;
     nebula::ConfigRowItem                  *config_row_item;
@@ -186,7 +188,7 @@ using namespace nebula;
 %token KW_GET KW_DECLARE KW_GRAPH KW_META KW_STORAGE KW_AGENT
 %token KW_TTL KW_TTL_DURATION KW_TTL_COL KW_DATA KW_STOP
 %token KW_FETCH KW_PROP KW_UPDATE KW_UPSERT KW_WHEN
-%token KW_ORDER KW_ASC KW_LIMIT KW_SAMPLE KW_OFFSET KW_ASCENDING KW_DESCENDING
+%token KW_ORDER KW_ASC KW_LIMIT KW_SAMPLE KW_OFFSET KW_ASCENDING KW_DESCENDING KW_SAMPLING KW_BINARY KW_ALIAS
 %token KW_DISTINCT KW_ALL KW_OF
 %token KW_BALANCE KW_LEADER KW_RESET KW_PLAN
 %token KW_SHORTEST KW_PATH KW_NOLOOP KW_SHORTESTPATH KW_ALLSHORTESTPATHS
@@ -296,6 +298,8 @@ using namespace nebula;
 %type <index_param_item> index_param_item
 %type <order_factor> order_factor
 %type <order_factors> order_factors
+%type <sampling_factor> sampling_factor
+%type <sampling_factors> sampling_factors
 %type <config_module> config_module_enum
 %type <list_host_type> list_host_type
 %type <config_row_item> show_config_item get_config_item set_config_item
@@ -338,6 +342,7 @@ using namespace nebula;
 %type <match_clause_list> reading_clauses reading_with_clause reading_with_clauses
 %type <match_step_range> match_step_range
 %type <order_factors> match_order_by
+%type <sampling_factors> match_sampling
 %type <text_search_argument> text_search_argument
 %type <base_text_search_argument> base_text_search_argument
 %type <fuzzy_text_search_argument> fuzzy_text_search_argument
@@ -392,6 +397,7 @@ using namespace nebula;
 
 %type <sentence> traverse_sentence unwind_sentence
 %type <sentence> go_sentence match_sentence lookup_sentence find_path_sentence get_subgraph_sentence
+%type <sentence> group_by_sentence order_by_sentence limit_sentence sampling_sentence
 %type <sentence> group_by_sentence order_by_sentence limit_sentence
 %type <sentence> fetch_sentence fetch_vertices_sentence fetch_edges_sentence
 %type <sentence> set_sentence piped_sentence assignment_sentence match_sentences
@@ -579,6 +585,7 @@ unreserved_keyword
     | KW_DIVIDE             { $$ = new std::string("divide"); }
     | KW_RENAME             { $$ = new std::string("rename"); }
     | KW_CLEAR              { $$ = new std::string("clear"); }
+    | KW_SAMPLING           { $$ = new std::string("sampling"); }
     ;
 
 expression
@@ -1669,11 +1676,11 @@ unwind_sentence
     ;
 
 with_clause
-    : KW_WITH match_return_items match_order_by match_skip match_limit where_clause {
-        $$ = new WithClause($2, $3, $4, $5, $6, false/*distinct*/);
+    : KW_WITH match_return_items match_sampling match_order_by match_skip match_limit where_clause {
+        $$ = new WithClause($2, $3, $4, $5, $6, $7, false/*distinct*/);
     }
-    | KW_WITH KW_DISTINCT match_return_items match_order_by match_skip match_limit where_clause {
-        $$ = new WithClause($3, $4, $5, $6, $7, true);
+    | KW_WITH KW_DISTINCT match_return_items match_sampling match_order_by match_skip match_limit where_clause {
+        $$ = new WithClause($3, $4, $5, $6, $7, $8, true);
     }
     ;
 
@@ -1927,11 +1934,11 @@ match_edge_type_list
     ;
 
 match_return
-    : KW_RETURN match_return_items match_order_by match_skip match_limit {
-        $$ = new MatchReturn($2, $3, $4, $5);
+    : KW_RETURN match_return_items match_sampling match_order_by match_skip match_limit {
+        $$ = new MatchReturn($2, $3, $4, $5, $6);
     }
-    | KW_RETURN KW_DISTINCT match_return_items match_order_by match_skip match_limit {
-        $$ = new MatchReturn($3, $4, $5, $6, true);
+    | KW_RETURN KW_DISTINCT match_return_items match_sampling match_order_by match_skip match_limit {
+        $$ = new MatchReturn($3, $4, $5, $6, $7, true);
     }
     ;
 
@@ -1955,6 +1962,15 @@ match_order_by
     }
     ;
 
+match_sampling
+    : %empty {
+        $$ = nullptr;
+    }
+    | KW_SAMPLING sampling_factors {
+        $$ = $2;
+    }
+    ;
+
 match_skip
     : %empty {
         $$ = nullptr;
@@ -2215,6 +2231,36 @@ order_by_sentence
     }
     ;
 
+sampling_factor
+    : expression legal_integer {
+        $$ = new SamplingFactor($1, $2, SamplingFactor::BINARY);
+    }
+    | expression legal_integer KW_BINARY {
+        $$ = new SamplingFactor($1, $2, SamplingFactor::BINARY);
+    }
+    | expression legal_integer KW_ALIAS {
+        $$ = new SamplingFactor($1, $2, SamplingFactor::ALIAS);
+    }
+    ;
+
+sampling_factors
+    : sampling_factor {
+        auto factors = new SamplingFactors();
+        factors->addFactor($1);
+        $$ = factors;
+    }
+    | sampling_factors COMMA sampling_factor {
+        $1->addFactor($3);
+        $$ = $1;
+    }
+    ;
+
+sampling_sentence
+    : KW_SAMPLING sampling_factors {
+        $$ = new SamplingSentence($2);
+    }
+    ;
+
 fetch_vertices_sentence
     : KW_FETCH KW_PROP KW_ON name_label_list vid_list yield_clause {
         $$ = new FetchVerticesSentence($4, $5, $6);
@@ -2919,6 +2965,7 @@ traverse_sentence
     | go_sentence { $$ = $1; }
     | lookup_sentence { $$ = $1; }
     | group_by_sentence { $$ = $1; }
+    | sampling_sentence { $$ = $1; }
     | order_by_sentence { $$ = $1; }
     | fetch_sentence { $$ = $1; }
     | find_path_sentence { $$ = $1; }
diff --git a/src/parser/scanner.lex b/src/parser/scanner.lex
index 2e9f794df92..f18d754004d 100644
--- a/src/parser/scanner.lex
+++ b/src/parser/scanner.lex
@@ -161,6 +161,7 @@ LABEL_FULL_WIDTH            {CN_EN_FULL_WIDTH}{CN_EN_NUM_FULL_WIDTH}*
 "GET"                       { return TokenType::KW_GET; }
 "OF"                        { return TokenType::KW_OF; }
 "ORDER"                     { return TokenType::KW_ORDER; }
+"SAMPLING"                  { return TokenType::KW_SAMPLING; }
 "INGEST"                    { return TokenType::KW_INGEST; }
 "COMPACT"                   { return TokenType::KW_COMPACT; }
 "FLUSH"                     { return TokenType::KW_FLUSH; }
@@ -168,6 +169,8 @@ LABEL_FULL_WIDTH            {CN_EN_FULL_WIDTH}{CN_EN_NUM_FULL_WIDTH}*
 "ASC"                       { return TokenType::KW_ASC; }
 "ASCENDING"                 { return TokenType::KW_ASCENDING; }
 "DESCENDING"                { return TokenType::KW_DESCENDING; }
+"BINARY"                    { return TokenType::KW_BINARY; }
+"ALIAS"                     { return TokenType::KW_ALIAS; }
 "DISTINCT"                  { return TokenType::KW_DISTINCT; }
 "FETCH"                     { return TokenType::KW_FETCH; }
 "PROP"                      { return TokenType::KW_PROP; }
diff --git a/src/parser/test/ParserTest.cpp b/src/parser/test/ParserTest.cpp
index 44a06b291a5..82f27865ddd 100644
--- a/src/parser/test/ParserTest.cpp
+++ b/src/parser/test/ParserTest.cpp
@@ -1756,6 +1756,14 @@ TEST_F(ParserTest, UnreservedKeywords) {
     auto result = parse(query);
     ASSERT_TRUE(result.ok()) << result.status();
   }
+  {
+    std::string query =
+        "GO FROM \"123\" OVER like YIELD $$.tag1.EMAIL, like.users,"
+        "like._src, like._dst, like.type, $^.tag2.SPACE "
+        "| SAMPLING $-.SPACE 5 binary";
+    auto result = parse(query);
+    ASSERT_TRUE(result.ok()) << result.status();
+  }
   {
     std::string query =
         "GO FROM UUID() OVER like YIELD $$.tag1.EMAIL, like.users,"
@@ -1846,6 +1854,11 @@ TEST_F(ParserTest, Agg) {
     auto result = parse(query);
     ASSERT_TRUE(result.ok()) << result.status();
   }
+  {
+    std::string query = "SAMPLING $-.id 5 binary";
+    auto result = parse(query);
+    ASSERT_TRUE(result.ok()) << result.status();
+  }
   {
     std::string query =
         "GO FROM \"1\" OVER friend "
diff --git a/src/parser/test/ScannerTest.cpp b/src/parser/test/ScannerTest.cpp
index bd05a318dd3..d189de40536 100644
--- a/src/parser/test/ScannerTest.cpp
+++ b/src/parser/test/ScannerTest.cpp
@@ -350,6 +350,12 @@ TEST(Scanner, Basic) {
       CHECK_SEMANTIC_TYPE("ORDER", TokenType::KW_ORDER),
       CHECK_SEMANTIC_TYPE("Order", TokenType::KW_ORDER),
       CHECK_SEMANTIC_TYPE("order", TokenType::KW_ORDER),
+      CHECK_SEMANTIC_TYPE("sampling", TokenType::KW_SAMPLING),
+      CHECK_SEMANTIC_TYPE("Sampling", TokenType::KW_SAMPLING),
+      CHECK_SEMANTIC_TYPE("SAMPLING", TokenType::KW_SAMPLING),
+      CHECK_SEMANTIC_TYPE("binary", TokenType::KW_BINARY),
+      CHECK_SEMANTIC_TYPE("Binary", TokenType::KW_BINARY),
+      CHECK_SEMANTIC_TYPE("BINARY", TokenType::KW_BINARY),
       CHECK_SEMANTIC_TYPE("ASC", TokenType::KW_ASC),
       CHECK_SEMANTIC_TYPE("Asc", TokenType::KW_ASC),
       CHECK_SEMANTIC_TYPE("asc", TokenType::KW_ASC),

From d720e125c7ddbe9fa8931f6c9425c1220cfd5ce3 Mon Sep 17 00:00:00 2001
From: YolandaLyj <liuyujiauestc@163.com>
Date: Fri, 14 Oct 2022 10:14:43 +0800
Subject: [PATCH 2/7] Rename sampler.h to Sampler.h

---
 src/common/algorithm/{sampler.h => Sampler.h} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/common/algorithm/{sampler.h => Sampler.h} (100%)

diff --git a/src/common/algorithm/sampler.h b/src/common/algorithm/Sampler.h
similarity index 100%
rename from src/common/algorithm/sampler.h
rename to src/common/algorithm/Sampler.h

From 96fd97f7df94eb804e169ff5680771c319fc44d8 Mon Sep 17 00:00:00 2001
From: liuyujia17 <liuyujia17@jd.com>
Date: Wed, 4 Jan 2023 15:26:57 +0800
Subject: [PATCH 3/7] fix: remove unused warning

---
 src/graph/executor/query/SamplingExecutor.cpp | 26 ++++++-------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/src/graph/executor/query/SamplingExecutor.cpp b/src/graph/executor/query/SamplingExecutor.cpp
index ebcb1935ad9..5c1dc01ad92 100644
--- a/src/graph/executor/query/SamplingExecutor.cpp
+++ b/src/graph/executor/query/SamplingExecutor.cpp
@@ -64,31 +64,27 @@ template <typename U>
 void SamplingExecutor::executeBinarySample(Iterator *iter, size_t index,
                                            size_t count, DataSet &list) {
   auto uIter = static_cast<U *>(iter);
-  std::vector<WeightType> accumulate_weights;
+  std::vector<WeightType> accumulateWeights;
   auto it = uIter->begin();
   WeightType v;
   while (it != uIter->end()) {
     v = 1.0;
-    if ((*it)[index].type() == Value::Type::NULLVALUE) {
-      LOG(WARNING) << "Sampling type is nullvalue";
-    } else if ((*it)[index].type() == Value::Type::FLOAT) {
+    if ((*it)[index].type() == Value::Type::FLOAT) {
       v = (float)((*it)[index].getFloat());
     } else if ((*it)[index].type() == Value::Type::INT) {
       v = (float)((*it)[index].getInt());
-    } else {
-      LOG(WARNING) << "Sampling type is wrong, must be int or float.";
     }
-    if (!accumulate_weights.empty()) {
-      v += accumulate_weights.back();
+    if (!accumulateWeights.empty()) {
+      v += accumulateWeights.back();
     }
-    accumulate_weights.emplace_back(std::move(v));
+    accumulateWeights.emplace_back(std::move(v));
     ++it;
   }
-  nebula::algorithm::Normalization<WeightType>(accumulate_weights);
+  nebula::algorithm::Normalization<WeightType>(accumulateWeights);
   auto beg = uIter->begin();
   for (size_t i = 0; i < count; ++i) {
     auto idx =
-        nebula::algorithm::BinarySampleAcc<WeightType>(accumulate_weights);
+        nebula::algorithm::BinarySampleAcc<WeightType>(accumulateWeights);
     list.emplace_back(*(beg + idx));
   }
   uIter->clear();
@@ -103,17 +99,11 @@ void SamplingExecutor::executeAliasSample(Iterator *iter, size_t index,
   WeightType v;
   while (it != uIter->end()) {
     v = 1.0;
-    if ((*it)[index].type() == Value::Type::NULLVALUE) {
-      LOG(WARNING) << "Sampling type is nullvalue";
-
-    } else if ((*it)[index].type() == Value::Type::FLOAT) {
+    if ((*it)[index].type() == Value::Type::FLOAT) {
       v = (float)((*it)[index].getFloat());
     } else if ((*it)[index].type() == Value::Type::INT) {
       v = (float)((*it)[index].getInt());
-    } else {
-      LOG(WARNING) << "Sampling type is wrong, must be int or float.";
     }
-    LOG(ERROR) << "lyj debug v:" << v;
     weights.emplace_back(std::move(v));
     ++it;
   }

From 4ad02578a7a8ae70d002e1938de23df1bed95661 Mon Sep 17 00:00:00 2001
From: codesigner <codesigner.huang@vesoft.com>
Date: Tue, 10 Jan 2023 15:26:24 +0800
Subject: [PATCH 4/7] fix compile

---
 src/graph/executor/query/SamplingExecutor.cpp | 8 ++++----
 src/parser/parser.yy                          | 1 -
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/graph/executor/query/SamplingExecutor.cpp b/src/graph/executor/query/SamplingExecutor.cpp
index 5c1dc01ad92..f14c4114308 100644
--- a/src/graph/executor/query/SamplingExecutor.cpp
+++ b/src/graph/executor/query/SamplingExecutor.cpp
@@ -70,9 +70,9 @@ void SamplingExecutor::executeBinarySample(Iterator *iter, size_t index,
   while (it != uIter->end()) {
     v = 1.0;
     if ((*it)[index].type() == Value::Type::FLOAT) {
-      v = (float)((*it)[index].getFloat());
+      v = static_cast<float>((*it)[index].getFloat());
     } else if ((*it)[index].type() == Value::Type::INT) {
-      v = (float)((*it)[index].getInt());
+      v = static_cast<float>((*it)[index].getInt());
     }
     if (!accumulateWeights.empty()) {
       v += accumulateWeights.back();
@@ -100,9 +100,9 @@ void SamplingExecutor::executeAliasSample(Iterator *iter, size_t index,
   while (it != uIter->end()) {
     v = 1.0;
     if ((*it)[index].type() == Value::Type::FLOAT) {
-      v = (float)((*it)[index].getFloat());
+      v = static_cast<float>((*it)[index].getFloat());
     } else if ((*it)[index].type() == Value::Type::INT) {
-      v = (float)((*it)[index].getInt());
+      v = static_cast<float>((*it)[index].getInt());
     }
     weights.emplace_back(std::move(v));
     ++it;
diff --git a/src/parser/parser.yy b/src/parser/parser.yy
index 281694d00e7..1caa0795720 100644
--- a/src/parser/parser.yy
+++ b/src/parser/parser.yy
@@ -398,7 +398,6 @@ using namespace nebula;
 %type <sentence> traverse_sentence unwind_sentence
 %type <sentence> go_sentence match_sentence lookup_sentence find_path_sentence get_subgraph_sentence
 %type <sentence> group_by_sentence order_by_sentence limit_sentence sampling_sentence
-%type <sentence> group_by_sentence order_by_sentence limit_sentence
 %type <sentence> fetch_sentence fetch_vertices_sentence fetch_edges_sentence
 %type <sentence> set_sentence piped_sentence assignment_sentence match_sentences
 %type <sentence> yield_sentence use_sentence

From 7804f35bedf85a2c4ec5f94c8b0a912cdef85ca0 Mon Sep 17 00:00:00 2001
From: liuyujia17 <liuyujia17@jd.com>
Date: Wed, 11 Jan 2023 15:14:39 +0800
Subject: [PATCH 5/7] fix bugs found when checking cpplint code style

---
 src/common/algorithm/Sampler.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/common/algorithm/Sampler.h b/src/common/algorithm/Sampler.h
index 2ec6eacac1f..2d9fabbc994 100644
--- a/src/common/algorithm/Sampler.h
+++ b/src/common/algorithm/Sampler.h
@@ -15,8 +15,6 @@
 
 namespace nebula {
 namespace algorithm {
-
-namespace {
 template <typename T = float>
 T UniformRandom() {
   static_assert(std::is_floating_point<T>::value,
@@ -30,7 +28,6 @@ T UniformRandom() {
 #endif
   return u(e);
 }
-}  // namespace
 
 template <typename T>
 void Normalization(std::vector<T>& distribution) {
@@ -40,7 +37,7 @@ void Normalization(std::vector<T>& distribution) {
   for (auto& dist : distribution) {
     norm_sum += dist;
   }
-  if (norm_sum <= FLT_EPSILON and !distribution.empty()) {
+  if (norm_sum <= FLT_EPSILON && !distribution.empty()) {
     for (size_t i = 0; i < distribution.size(); ++i) {
       distribution[i] = 1.0f / static_cast<T>(distribution.size());
     }

From 241b25345e8aac7a104b5dc17ceefa521f9421d8 Mon Sep 17 00:00:00 2001
From: codesigner <codesigner.huang@vesoft.com>
Date: Wed, 11 Jan 2023 15:29:53 +0800
Subject: [PATCH 6/7] format change using clang-format

---
 src/common/algorithm/Sampler.h                |  9 ++--
 src/graph/context/ast/CypherAstContext.h      |  2 +-
 src/graph/executor/Executor.cpp               |  2 +-
 src/graph/executor/query/SamplingExecutor.cpp | 44 ++++++++-----------
 .../planner/match/SamplingClausePlanner.cpp   |  9 ++--
 src/graph/planner/plan/Query.cpp              |  3 +-
 src/graph/planner/plan/Query.h                | 16 +++----
 src/graph/validator/MatchValidator.cpp        | 21 ++++-----
 src/graph/validator/SamplingValidator.cpp     |  6 +--
 src/graph/validator/SamplingValidator.h       |  3 +-
 src/graph/validator/Validator.cpp             |  2 +-
 src/parser/TraverseSentences.h                |  2 -
 12 files changed, 48 insertions(+), 71 deletions(-)

diff --git a/src/common/algorithm/Sampler.h b/src/common/algorithm/Sampler.h
index 2d9fabbc994..ebcde5b433a 100644
--- a/src/common/algorithm/Sampler.h
+++ b/src/common/algorithm/Sampler.h
@@ -17,8 +17,7 @@ namespace nebula {
 namespace algorithm {
 template <typename T = float>
 T UniformRandom() {
-  static_assert(std::is_floating_point<T>::value,
-                "Only support float point type");
+  static_assert(std::is_floating_point<T>::value, "Only support float point type");
 #if defined(__clang__)
   static std::default_random_engine e(std::time(nullptr));
   static std::uniform_real_distribution<T> u(0., 1.);
@@ -31,8 +30,7 @@ T UniformRandom() {
 
 template <typename T>
 void Normalization(std::vector<T>& distribution) {
-  static_assert(std::is_floating_point<T>::value,
-                "Only support float point type");
+  static_assert(std::is_floating_point<T>::value, "Only support float point type");
   T norm_sum = 0.0f;
   for (auto& dist : distribution) {
     norm_sum += dist;
@@ -52,8 +50,7 @@ void Normalization(std::vector<T>& distribution) {
 template <typename T = float>
 class AliasSampler {
  public:
-  static_assert(std::is_floating_point<T>::value,
-                "Only support float point type");
+  static_assert(std::is_floating_point<T>::value, "Only support float point type");
   using AliasType = uint32_t;
   bool Init(std::vector<T>& distribution);
   inline bool Init(const std::vector<T>& distribution);
diff --git a/src/graph/context/ast/CypherAstContext.h b/src/graph/context/ast/CypherAstContext.h
index e1ef9ae5a18..3e2ff139727 100644
--- a/src/graph/context/ast/CypherAstContext.h
+++ b/src/graph/context/ast/CypherAstContext.h
@@ -11,8 +11,8 @@
 #include "common/expression/Expression.h"
 #include "common/expression/PathBuildExpression.h"
 #include "graph/context/ast/AstContext.h"
-#include "parser/MatchSentence.h"
 #include "graph/planner/plan/Query.h"
+#include "parser/MatchSentence.h"
 namespace nebula {
 namespace graph {
 enum class CypherClauseKind : uint8_t {
diff --git a/src/graph/executor/Executor.cpp b/src/graph/executor/Executor.cpp
index 0beeea4bd00..e59363e1277 100644
--- a/src/graph/executor/Executor.cpp
+++ b/src/graph/executor/Executor.cpp
@@ -85,11 +85,11 @@
 #include "graph/executor/query/ProjectExecutor.h"
 #include "graph/executor/query/RollUpApplyExecutor.h"
 #include "graph/executor/query/SampleExecutor.h"
+#include "graph/executor/query/SamplingExecutor.h"
 #include "graph/executor/query/ScanEdgesExecutor.h"
 #include "graph/executor/query/ScanVerticesExecutor.h"
 #include "graph/executor/query/SortExecutor.h"
 #include "graph/executor/query/TopNExecutor.h"
-#include "graph/executor/query/SamplingExecutor.h"
 #include "graph/executor/query/TraverseExecutor.h"
 #include "graph/executor/query/UnionAllVersionVarExecutor.h"
 #include "graph/executor/query/UnionExecutor.h"
diff --git a/src/graph/executor/query/SamplingExecutor.cpp b/src/graph/executor/query/SamplingExecutor.cpp
index f14c4114308..0d658196ee2 100644
--- a/src/graph/executor/query/SamplingExecutor.cpp
+++ b/src/graph/executor/query/SamplingExecutor.cpp
@@ -18,51 +18,42 @@ folly::Future<Status> SamplingExecutor::execute() {
   Result result = ectx_->getResult(sampling->inputVar());
   auto *iter = result.iterRef();
   if (UNLIKELY(iter == nullptr)) {
-    return Status::Error(
-        "Internal error: nullptr iterator in sampling executor");
+    return Status::Error("Internal error: nullptr iterator in sampling executor");
   }
   if (UNLIKELY(!result.iter()->isSequentialIter())) {
     std::stringstream ss;
-    ss << "Internal error: Sampling executor does not supported "
-       << iter->kind();
+    ss << "Internal error: Sampling executor does not supported " << iter->kind();
     return Status::Error(ss.str());
   }
   auto &factors = sampling->factors();
   auto size = iter->size();
   if (size <= 0) {
     iter->clear();
-    return finish(ResultBuilder()
-                      .value(result.valuePtr())
-                      .iter(std::move(result).iter())
-                      .build());
+    return finish(ResultBuilder().value(result.valuePtr()).iter(std::move(result).iter()).build());
   }
   auto colNames = result.value().getDataSet().colNames;
   DataSet dataset(std::move(colNames));
   for (auto factor : factors) {
     if (factor.count <= 0) {
       iter->clear();
-      return finish(ResultBuilder()
-                        .value(result.valuePtr())
-                        .iter(std::move(result).iter())
-                        .build());
+      return finish(
+          ResultBuilder().value(result.valuePtr()).iter(std::move(result).iter()).build());
     }
     if (factor.samplingType == SamplingFactor::SamplingType::BINARY) {
-      executeBinarySample<SequentialIter>(iter, factor.colIdx, factor.count,
-                                          dataset);
+      executeBinarySample<SequentialIter>(iter, factor.colIdx, factor.count, dataset);
     } else {
-      executeAliasSample<SequentialIter>(iter, factor.colIdx, factor.count,
-                                         dataset);
+      executeAliasSample<SequentialIter>(iter, factor.colIdx, factor.count, dataset);
     }
   }
-  return finish(ResultBuilder()
-                    .value(Value(std::move(dataset)))
-                    .iter(Iterator::Kind::kSequential)
-                    .build());
+  return finish(
+      ResultBuilder().value(Value(std::move(dataset))).iter(Iterator::Kind::kSequential).build());
 }
 
 template <typename U>
-void SamplingExecutor::executeBinarySample(Iterator *iter, size_t index,
-                                           size_t count, DataSet &list) {
+void SamplingExecutor::executeBinarySample(Iterator *iter,
+                                           size_t index,
+                                           size_t count,
+                                           DataSet &list) {
   auto uIter = static_cast<U *>(iter);
   std::vector<WeightType> accumulateWeights;
   auto it = uIter->begin();
@@ -83,16 +74,17 @@ void SamplingExecutor::executeBinarySample(Iterator *iter, size_t index,
   nebula::algorithm::Normalization<WeightType>(accumulateWeights);
   auto beg = uIter->begin();
   for (size_t i = 0; i < count; ++i) {
-    auto idx =
-        nebula::algorithm::BinarySampleAcc<WeightType>(accumulateWeights);
+    auto idx = nebula::algorithm::BinarySampleAcc<WeightType>(accumulateWeights);
     list.emplace_back(*(beg + idx));
   }
   uIter->clear();
 }
 
 template <typename U>
-void SamplingExecutor::executeAliasSample(Iterator *iter, size_t index,
-                                          size_t count, DataSet &list) {
+void SamplingExecutor::executeAliasSample(Iterator *iter,
+                                          size_t index,
+                                          size_t count,
+                                          DataSet &list) {
   auto uIter = static_cast<U *>(iter);
   std::vector<WeightType> weights;
   auto it = uIter->begin();
diff --git a/src/graph/planner/match/SamplingClausePlanner.cpp b/src/graph/planner/match/SamplingClausePlanner.cpp
index 679e8068fdc..bbc900d4a4f 100644
--- a/src/graph/planner/match/SamplingClausePlanner.cpp
+++ b/src/graph/planner/match/SamplingClausePlanner.cpp
@@ -9,8 +9,7 @@
 
 namespace nebula {
 namespace graph {
-StatusOr<SubPlan> SamplingClausePlanner::transform(
-    CypherClauseContextBase* clauseCtx) {
+StatusOr<SubPlan> SamplingClausePlanner::transform(CypherClauseContextBase* clauseCtx) {
   if (clauseCtx->kind != CypherClauseKind::kSampling) {
     return Status::Error("Not a valid context for SamplingClausePlanner.");
   }
@@ -21,11 +20,9 @@ StatusOr<SubPlan> SamplingClausePlanner::transform(
   return samplingPlan;
 }
 
-Status SamplingClausePlanner::buildSampling(SamplingClauseContext* octx,
-                                            SubPlan& subplan) {
+Status SamplingClausePlanner::buildSampling(SamplingClauseContext* octx, SubPlan& subplan) {
   auto* currentRoot = subplan.root;
-  auto* sampling =
-      Sampling::make(octx->qctx, currentRoot, octx->indexedSamplingFactors);
+  auto* sampling = Sampling::make(octx->qctx, currentRoot, octx->indexedSamplingFactors);
   subplan.root = sampling;
   subplan.tail = sampling;
   return Status::OK();
diff --git a/src/graph/planner/plan/Query.cpp b/src/graph/planner/plan/Query.cpp
index cacd7bc9e18..1f6ed9592cd 100644
--- a/src/graph/planner/plan/Query.cpp
+++ b/src/graph/planner/plan/Query.cpp
@@ -436,8 +436,7 @@ void Sort::cloneMembers(const Sort& p) {
 
 std::unique_ptr<PlanNodeDescription> Sampling::explain() const {
   auto desc = SingleInputNode::explain();
-  addDescription("factors", folly::toJson(util::toJson(factorsString())),
-                 desc.get());
+  addDescription("factors", folly::toJson(util::toJson(factorsString())), desc.get());
   return desc;
 }
 
diff --git a/src/graph/planner/plan/Query.h b/src/graph/planner/plan/Query.h
index 1182e3f3f18..5add55541bc 100644
--- a/src/graph/planner/plan/Query.h
+++ b/src/graph/planner/plan/Query.h
@@ -1025,21 +1025,22 @@ struct SamplingParams {
 // Sampling the given record set.
 class Sampling final : public SingleInputNode {
  public:
-  static Sampling* make(QueryContext* qctx, PlanNode* input,
+  static Sampling* make(QueryContext* qctx,
+                        PlanNode* input,
                         std::vector<SamplingParams> factors = {}) {
-    return qctx->objPool()->makeAndAdd<Sampling>(qctx, input,
-                                                 std::move(factors));
+    return qctx->objPool()->makeAndAdd<Sampling>(qctx, input, std::move(factors));
   }
 
-  const std::vector<SamplingParams>& factors() const { return factors_; }
+  const std::vector<SamplingParams>& factors() const {
+    return factors_;
+  }
 
   PlanNode* clone() const override;
   std::unique_ptr<PlanNodeDescription> explain() const override;
 
  private:
   friend ObjectPool;
-  Sampling(QueryContext* qctx, PlanNode* input,
-           std::vector<SamplingParams> factors)
+  Sampling(QueryContext* qctx, PlanNode* input, std::vector<SamplingParams> factors)
       : SingleInputNode(qctx, Kind::kSampling, input) {
     factors_ = std::move(factors);
   }
@@ -1050,8 +1051,7 @@ class Sampling final : public SingleInputNode {
     for (auto& factor : factors_) {
       std::string colName = cols[factor.colIdx];
       std::string order =
-          factor.samplingType == SamplingFactor::SamplingType::BINARY ? "BINARY"
-                                                                      : "ALIAS";
+          factor.samplingType == SamplingFactor::SamplingType::BINARY ? "BINARY" : "ALIAS";
       std::vector temp = {colName, std::to_string(factor.count), order};
       result.emplace_back(temp);
     }
diff --git a/src/graph/validator/MatchValidator.cpp b/src/graph/validator/MatchValidator.cpp
index b303abb7842..09be77dfcbb 100644
--- a/src/graph/validator/MatchValidator.cpp
+++ b/src/graph/validator/MatchValidator.cpp
@@ -497,9 +497,8 @@ Status MatchValidator::validateReturn(MatchReturn *ret,
 
   if (ret->samplingFactors() != nullptr) {
     auto samplingCtx = getContext<SamplingClauseContext>();
-    NG_RETURN_IF_ERROR(validateSampling(ret->samplingFactors(),
-                                        retClauseCtx.yield->yieldColumns,
-                                        *samplingCtx));
+    NG_RETURN_IF_ERROR(
+        validateSampling(ret->samplingFactors(), retClauseCtx.yield->yieldColumns, *samplingCtx));
     retClauseCtx.sampling = std::move(samplingCtx);
   }
 
@@ -873,9 +872,9 @@ Status MatchValidator::validateOrderBy(const OrderFactors *factors,
 // Check validity of order by options.
 // Disable duplicate columns,
 // check expression of column (only constant expression and label expression)
-Status MatchValidator::validateSampling(
-    const SamplingFactors *factors, const YieldColumns *yieldColumns,
-    SamplingClauseContext &samplingCtx) const {
+Status MatchValidator::validateSampling(const SamplingFactors *factors,
+                                        const YieldColumns *yieldColumns,
+                                        SamplingClauseContext &samplingCtx) const {
   if (factors != nullptr) {
     std::vector<std::string> inputColList;
     inputColList.reserve(yieldColumns->columns().size());
@@ -885,8 +884,7 @@ Status MatchValidator::validateSampling(
     std::unordered_map<std::string, size_t> inputColIndices;
     for (auto i = 0u; i < inputColList.size(); i++) {
       if (!inputColIndices.emplace(inputColList[i], i).second) {
-        return Status::SemanticError("Duplicated columns not allowed: %s",
-                                     inputColList[i].c_str());
+        return Status::SemanticError("Duplicated columns not allowed: %s", inputColList[i].c_str());
       }
     }
 
@@ -897,16 +895,15 @@ Status MatchValidator::validateSampling(
       auto factorExpr = factor->expr();
       if (ExpressionUtils::isEvaluableExpr(factorExpr, qctx_)) continue;
       if (factorExpr->kind() != Expression::Kind::kLabel) {
-        return Status::SemanticError(
-            "Only column name can be used as sort item");
+        return Status::SemanticError("Only column name can be used as sort item");
       }
       auto &name = static_cast<const LabelExpression *>(factor->expr())->name();
       auto iter = inputColIndices.find(name);
       if (iter == inputColIndices.end()) {
         return Status::SemanticError("Column `%s' not found", name.c_str());
       }
-      samplingCtx.indexedSamplingFactors.emplace_back(SamplingParams(
-          iter->second, factor->count(), factor->samplingType()));
+      samplingCtx.indexedSamplingFactors.emplace_back(
+          SamplingParams(iter->second, factor->count(), factor->samplingType()));
     }
   }
 
diff --git a/src/graph/validator/SamplingValidator.cpp b/src/graph/validator/SamplingValidator.cpp
index a6841c0c1d8..2f7ebd567a1 100644
--- a/src/graph/validator/SamplingValidator.cpp
+++ b/src/graph/validator/SamplingValidator.cpp
@@ -54,8 +54,7 @@ Status SamplingValidator::validateImpl() {
   } else if (!exprProps_.varProps().empty()) {
     if (!userDefinedVarNameList_.empty()) {
       if (userDefinedVarNameList_.size() != 1) {
-        return Status::SemanticError(
-            "Multiple user defined vars are not supported yet.");
+        return Status::SemanticError("Multiple user defined vars are not supported yet.");
       }
       userDefinedVarName_ = *userDefinedVarNameList_.begin();
       outputs_ = vctx_->getVar(userDefinedVarName_);
@@ -67,8 +66,7 @@ Status SamplingValidator::validateImpl() {
 
 Status SamplingValidator::toPlan() {
   auto *plan = qctx_->plan();
-  auto *samplingNode =
-      Sampling::make(qctx_, plan->root(), std::move(colSamplingTypes_));
+  auto *samplingNode = Sampling::make(qctx_, plan->root(), std::move(colSamplingTypes_));
   std::vector<std::string> colNames;
   for (auto &col : outputs_) {
     colNames.emplace_back(col.name);
diff --git a/src/graph/validator/SamplingValidator.h b/src/graph/validator/SamplingValidator.h
index c7d732f630b..a34598fddcc 100644
--- a/src/graph/validator/SamplingValidator.h
+++ b/src/graph/validator/SamplingValidator.h
@@ -13,8 +13,7 @@ namespace nebula {
 namespace graph {
 class SamplingValidator final : public Validator {
  public:
-  SamplingValidator(Sentence* sentence, QueryContext* context)
-      : Validator(sentence, context) {
+  SamplingValidator(Sentence* sentence, QueryContext* context) : Validator(sentence, context) {
     setNoSpaceRequired();
   }
 
diff --git a/src/graph/validator/Validator.cpp b/src/graph/validator/Validator.cpp
index 9801ac10ed2..ed56575af57 100644
--- a/src/graph/validator/Validator.cpp
+++ b/src/graph/validator/Validator.cpp
@@ -29,9 +29,9 @@
 #include "graph/validator/MatchValidator.h"
 #include "graph/validator/MutateValidator.h"
 #include "graph/validator/OrderByValidator.h"
-#include "graph/validator/SamplingValidator.h"
 #include "graph/validator/PipeValidator.h"
 #include "graph/validator/ReportError.h"
+#include "graph/validator/SamplingValidator.h"
 #include "graph/validator/SequentialValidator.h"
 #include "graph/validator/SetValidator.h"
 #include "graph/validator/UnwindValidator.h"
diff --git a/src/parser/TraverseSentences.h b/src/parser/TraverseSentences.h
index 91f2448aa91..62eda23f2ad 100644
--- a/src/parser/TraverseSentences.h
+++ b/src/parser/TraverseSentences.h
@@ -301,7 +301,6 @@ class OrderBySentence final : public Sentence {
   std::unique_ptr<OrderFactors> orderFactors_;
 };
 
-
 class SamplingFactor final {
  public:
   enum SamplingType : uint8_t { BINARY, ALIAS };
@@ -356,7 +355,6 @@ class SamplingFactors final {
   std::vector<std::unique_ptr<SamplingFactor>> factors_;
 };
 
-
 class SamplingSentence final : public Sentence {
  public:
   explicit SamplingSentence(SamplingFactors* factors) {

From de461aef9ca420ce0c04fe76416d86ddeb4bef00 Mon Sep 17 00:00:00 2001
From: codesigner <codesigner.huang@vesoft.com>
Date: Wed, 11 Jan 2023 15:32:20 +0800
Subject: [PATCH 7/7] format change using clang-format

---
 src/graph/executor/query/SamplingExecutor.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/graph/executor/query/SamplingExecutor.h b/src/graph/executor/query/SamplingExecutor.h
index 8e42ca10d61..0c5dfca0c34 100644
--- a/src/graph/executor/query/SamplingExecutor.h
+++ b/src/graph/executor/query/SamplingExecutor.h
@@ -18,11 +18,9 @@ class SamplingExecutor final : public Executor {
 
  private:
   template <typename U>
-  void executeBinarySample(Iterator *iter, size_t index, size_t count,
-                           DataSet &list);
+  void executeBinarySample(Iterator *iter, size_t index, size_t count, DataSet &list);
   template <typename U>
-  void executeAliasSample(Iterator *iter, size_t index, size_t count,
-                          DataSet &list);
+  void executeAliasSample(Iterator *iter, size_t index, size_t count, DataSet &list);
 };
 
 }  // namespace graph