vesoft-inc · CPWstatic · Jun 30, 2022 · Jun 27, 2022 · Jun 28, 2022 · Jun 30, 2022
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
@@ -2,7 +2,7 @@ name: nightly
 
 on:
   schedule:
-    - cron: '0 18 * * *'
+    - cron: "0 18 * * *"
 
 concurrency:
   group: nightly
@@ -35,7 +35,7 @@ jobs:
       - uses: actions/checkout@v2
       - uses: actions/setup-go@v2
         with:
-          go-version: '^1.16.7'
+          go-version: "^1.16.7"
       - name: package
         run: ./package/package.sh -t Release -r ON -p OFF -s FALSE -k OFF
       - name: output some vars
@@ -199,4 +199,99 @@ jobs:
         if: ${{ failure() }}
         with:
           name: ${{ matrix.os }}-${{ matrix.compiler }}-nebula-test-logs
-          path:  ./build/server_*/logs/
+          path: ./build/server_*/logs/
+
+  test:
+    name: Tck test
+    needs: package
+    runs-on: [self-hosted, nebula]
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - ubuntu1804
+          - ubuntu2004
+          - centos7
+          - centos8
+        extra_config:
+          - "ENABLE_SSL=true CA_SIGNED=true QUERY_CONCURRENTLY=false"
+          - "ENABLE_SSL=false CA_SIGNED=false QUERY_CONCURRENTLY=true"
+    env:
+      OSS_DIR: nebula-graph/package/nightly
+    container:
+      image: vesoft/nebula-dev:${{ matrix.os }}
+    steps:
+      - uses: webiny/action-post-run@2.0.1
+        with:
+          run: sh -c "find . -mindepth 1 -delete"
+      - uses: actions/checkout@v2
+      - name: output some vars
+        id: vars
+        env:
+          SHA_EXT: sha256sum.txt
+        run: |
+          subdir=$(date -u +%Y.%m.%d -d "1 days ago")
+          echo "::set-output name=subdir::$subdir"
+
+      - id: oss_package
+        run: |
+          case ${{ matrix.os }} in
+          ubuntu1804)
+            echo "::set-output name=p::nebula-graph-${{ steps.vars.outputs.subdir }}-nightly.ubuntu1804.amd64.tar.gz"
+            ;;
+          ubuntu2004)
+            echo "::set-output name=p::nebula-graph-${{ steps.vars.outputs.subdir }}-nightly.ubuntu2004.amd64.tar.gz"
+            ;;
+          centos7)
+            echo "::set-output name=p::nebula-graph-${{ steps.vars.outputs.subdir }}-nightly.el7.x86_64.tar.gz"
+            ;;
+          centos8)
+            echo "::set-output name=p::nebula-graph-${{ steps.vars.outputs.subdir }}-nightly.el8.x86_64.tar.gz"
+            ;;
+          esac
+      - name: Prepare environment
+        id: prepare
+        run: |
+          [ -d build/ ] && rm -rf build/* || mkdir -p build
+          make init -C tests
+      - name: CMake
+        id: cmake
+        run: |
+          echo "::set-output name=j::8"
+      - name: download from oss
+        run: |
+          ossutil64 -e ${{ secrets.OSS_ENDPOINT }} \
+                    -i ${{ secrets.OSS_ID }} \
+                    -k  ${{ secrets.OSS_SECRET }} \
+                    -f cp oss://${{ env.OSS_DIR  }}/${{ steps.vars.outputs.subdir }}/${{ steps.oss_package.outputs.p }} \
+                    build/.
+          tar zxvf build/${{ steps.oss_package.outputs.p }} -C build
+          d=`echo ${{ steps.oss_package.outputs.p }} | sed 's/.tar.gz//'`
+          mv build/${d}/* build/.
+
+      - name: Setup cluster
+        run: |
+          make CONTAINERIZED=true ${{ matrix.extra_config }} up
+        working-directory: tests/
+        timeout-minutes: 2
+      - name: Pytest
+        run: |
+          make RM_DIR=false DEBUG=false J=${{ steps.cmake.outputs.j }} test
+        working-directory: tests/
+        timeout-minutes: 15
+      - name: TCK
+        run: |
+          make RM_DIR=false DEBUG=false J=${{ steps.cmake.outputs.j }} tck
+        working-directory: tests/
+        timeout-minutes: 60
+      - name: Down cluster
+        run: |
+          make RM_DIR=false down
+        working-directory: tests/
+        timeout-minutes: 2
+      - name: Upload logs
+        uses: actions/upload-artifact@v2
+        if: ${{ failure() }}
+        with:
+          name: ${{ matrix.os }}-${{ matrix.compiler }}-nebula-test-logs
+          path: ./build/server_*/logs*/
diff --git a/.github/workflows/rc.yml b/.github/workflows/rc.yml
@@ -185,6 +185,9 @@ jobs:
           - ubuntu2004
           - centos7
           - centos8
+        extra_config:
+          - "ENABLE_SSL=true CA_SIGNED=true QUERY_CONCURRENTLY=false"
+          - "ENABLE_SSL=false CA_SIGNED=false QUERY_CONCURRENTLY=true"
     env:
       OSS_DIR: nebula-graph/rc
     container:
@@ -235,7 +238,7 @@ jobs:
 
       - name: Setup cluster
         run: |
-          make CONTAINERIZED=true ENABLE_SSL=true CA_SIGNED=true up
+          make CONTAINERIZED=true ${{ matrix.extra_config }} up
         working-directory: tests/
         timeout-minutes: 2
       - name: Pytest
@@ -258,4 +261,4 @@ jobs:
         if: ${{ failure() }}
         with:
           name: ${{ matrix.os }}-${{ matrix.compiler }}-nebula-test-logs
-          path:  ./build/server_*/logs/
+          path:  ./build/server_*/logs*/
diff --git a/src/common/expression/test/CMakeLists.txt b/src/common/expression/test/CMakeLists.txt
@@ -47,6 +47,7 @@ set(expression_test_common_libs
     $<TARGET_OBJECTS:graph_obj>
     $<TARGET_OBJECTS:ssl_obj>
     $<TARGET_OBJECTS:memory_obj>
+    $<TARGET_OBJECTS:gc_obj>
 )
 
 

diff --git a/src/common/thread/GenericThreadPool.h b/src/common/thread/GenericThreadPool.h
@@ -113,6 +113,16 @@ class GenericThreadPool final : public boost::noncopyable, public nebula::cpp::N
    */
   void purgeTimerTask(uint64_t id);
 
+  /**
+   * To add a repeated timer task for all workers which will be executed in each period.
+   * @ms      interval in milliseconds
+   * @task    a callable object
+   * @args    variadic arguments
+   * @return  ID of the added task, unique for this worker
+   */
+  template <typename F, typename... Args>
+  void addRepeatTaskForAll(size_t ms, F &&f, Args &&... args);
+
  private:
   size_t nrThreads_{0};
   std::atomic<size_t> nextThread_{0};
@@ -156,6 +166,12 @@ uint64_t GenericThreadPool::addRepeatTask(size_t ms, F &&f, Args &&... args) {
   return ((idx << GenericWorker::TIMER_ID_BITS) | id);
 }
 
+template <typename F, typename... Args>
+void GenericThreadPool::addRepeatTaskForAll(size_t ms, F &&f, Args &&... args) {
+  for (auto idx = 0UL; idx < nrThreads_; ++idx) {
+    pool_[idx]->addRepeatTask(ms, std::forward<F>(f), std::forward<Args>(args)...);
+  }
+}
 }  // namespace thread
 }  // namespace nebula
 

diff --git a/src/daemons/CMakeLists.txt b/src/daemons/CMakeLists.txt
@@ -146,6 +146,7 @@ nebula_add_executable(
         $<TARGET_OBJECTS:graph_obj>
         $<TARGET_OBJECTS:ft_es_graph_adapter_obj>
         $<TARGET_OBJECTS:storage_thrift_obj>
+        $<TARGET_OBJECTS:gc_obj>
         ${common_deps}
     LIBRARIES
         ${PROXYGEN_LIBRARIES}
@@ -245,6 +246,7 @@ nebula_add_executable(
         $<TARGET_OBJECTS:meta_version_man_obj>
         $<TARGET_OBJECTS:meta_data_upgrade_obj>
         $<TARGET_OBJECTS:meta_v2_thrift_obj>
+        $<TARGET_OBJECTS:gc_obj>
         ${storage_meta_deps}
         ${common_deps}
     LIBRARIES

diff --git a/src/graph/CMakeLists.txt b/src/graph/CMakeLists.txt
@@ -13,3 +13,4 @@ nebula_add_subdirectory(stats)
 nebula_add_subdirectory(util)
 nebula_add_subdirectory(validator)
 nebula_add_subdirectory(visitor)
+nebula_add_subdirectory(gc)
diff --git a/src/graph/context/CMakeLists.txt b/src/graph/context/CMakeLists.txt
@@ -1,7 +1,3 @@
-# Copyright (c) 2020 vesoft inc. All rights reserved.
-#
-# This source code is licensed under Apache 2.0 License.
-
 nebula_add_library(
     graph_context_obj OBJECT
     QueryContext.cpp

diff --git a/src/graph/context/ExecutionContext.cpp b/src/graph/context/ExecutionContext.cpp
@@ -5,6 +5,9 @@
 
 #include "graph/context/ExecutionContext.h"
 
+#include "graph/gc/GC.h"
+#include "graph/service/GraphFlags.h"
+
 namespace nebula {
 namespace graph {
 constexpr int64_t ExecutionContext::kLatestVersion;
@@ -23,7 +26,12 @@ void ExecutionContext::setResult(const std::string& name, Result&& result) {
 }
 
 void ExecutionContext::dropResult(const std::string& name) {
-  valueMap_[name].clear();
+  auto& val = valueMap_[name];
+  if (FLAGS_enable_async_gc) {
+    GC::instance().clear(std::move(val));
+  } else {
+    val.clear();
+  }
 }
 
 size_t ExecutionContext::numVersions(const std::string& name) const {

diff --git a/src/graph/context/Iterator.cpp b/src/graph/context/Iterator.cpp
@@ -602,6 +602,15 @@ void GetNeighborsIter::clearEdges() {
   }
 }
 
+SequentialIter::SequentialIter(const SequentialIter& iter)
+    : Iterator(iter.valuePtr(), Kind::kSequential) {
+  auto valuePtr = iter.valuePtr();
+  auto& ds = valuePtr->mutableDataSet();
+  iter_ = ds.rows.begin();
+  rows_ = &ds.rows;
+  colIndices_ = iter.getColIndices();
+}
+
 SequentialIter::SequentialIter(std::shared_ptr<Value> value, bool checkMemory)
     : Iterator(value, Kind::kSequential, checkMemory) {
   DCHECK(value->isDataSet());
@@ -715,6 +724,11 @@ Value SequentialIter::getEdge() const {
   return getColumn("EDGE");
 }
 
+PropIter::PropIter(const PropIter& iter) : SequentialIter(iter) {
+  dsIndex_ = iter.dsIndex_;
+  kind_ = Kind::kProp;
+}
+
 PropIter::PropIter(std::shared_ptr<Value> value, bool checkMemory)
     : SequentialIter(value, checkMemory) {
   DCHECK(value->isDataSet());

diff --git a/src/graph/context/Iterator.h b/src/graph/context/Iterator.h
@@ -422,10 +422,10 @@ class GetNeighborsIter final : public Iterator {
 class SequentialIter : public Iterator {
  public:
   explicit SequentialIter(std::shared_ptr<Value> value, bool checkMemory = false);
+  explicit SequentialIter(const SequentialIter& iter);
 
   // Union multiple sequential iterators
   explicit SequentialIter(std::vector<std::unique_ptr<Iterator>> inputList);
-
   // Union two sequential iterators.
   SequentialIter(std::unique_ptr<Iterator> left, std::unique_ptr<Iterator> right);
 
@@ -527,6 +527,7 @@ class SequentialIter : public Iterator {
 class PropIter final : public SequentialIter {
  public:
   explicit PropIter(std::shared_ptr<Value> value, bool checkMemory = false);
+  explicit PropIter(const PropIter& iter);
 
   std::unique_ptr<Iterator> copy() const override {
     auto copy = std::make_unique<PropIter>(*this);

diff --git a/src/graph/context/test/CMakeLists.txt b/src/graph/context/test/CMakeLists.txt
@@ -47,6 +47,7 @@ SET(CONTEXT_TEST_LIBS
     $<TARGET_OBJECTS:graph_stats_obj>
     $<TARGET_OBJECTS:meta_client_stats_obj>
     $<TARGET_OBJECTS:storage_client_stats_obj>
+    $<TARGET_OBJECTS:gc_obj>
 )
 
 if(ENABLE_STANDALONE_VERSION)

diff --git a/src/graph/executor/Executor.cpp b/src/graph/executor/Executor.cpp
@@ -7,6 +7,7 @@
 #include <folly/String.h>
 #include <folly/executors/InlineExecutor.h>
 
+#include <algorithm>
 #include <atomic>
 
 #include "common/base/ObjectPool.h"
@@ -665,7 +666,6 @@ void Executor::drop(const PlanNode *node) {
         // Make sure drop happened-after count decrement
         CHECK_EQ(inputVar->userCount.load(std::memory_order_acquire), 0);
         ectx_->dropResult(inputVar->name);
-        VLOG(1) << node->kind() << " Drop variable " << inputVar->name;
       }
     }
   }
@@ -734,5 +734,14 @@ folly::Executor *Executor::runner() const {
   return qctx()->rctx()->runner();
 }
 
+size_t Executor::getBatchSize(size_t totalSize) const {
+  // batch size should be the greater one of FLAGS_min_batch_size and (totalSize/FLAGS_max_job_size)
+  size_t jobSize = FLAGS_max_job_size;
+  size_t minBatchSize = FLAGS_min_batch_size;
+  size_t batchSizeTmp = std::ceil(static_cast<float>(totalSize) / jobSize);
+  size_t batchSize = batchSizeTmp > minBatchSize ? batchSizeTmp : minBatchSize;
+  return batchSize;
+}
+
 }  // namespace graph
 }  // namespace nebula
diff --git a/src/graph/executor/Executor.h b/src/graph/executor/Executor.h
@@ -106,6 +106,17 @@ class Executor : private boost::noncopyable, private cpp::NonMovable {
   // Store the default result which not used for later executor
   Status finish(Value &&value);
 
+  size_t getBatchSize(size_t totalSize) const;
+
+  // ScatterFunc: A callback function that handle partial records of a dataset.
+  // GatherFunc: A callback function that gather all results of ScatterFunc, and do post works.
+  // Iterator: An iterator of a dataset.
+  template <
+      class ScatterFunc,
+      class ScatterResult = typename std::result_of<ScatterFunc(size_t, size_t, Iterator *)>::type,
+      class GatherFunc>
+  auto runMultiJobs(ScatterFunc &&scatter, GatherFunc &&gather, Iterator *iter);
+
   int64_t id_;
 
   // Executor name
@@ -129,6 +140,34 @@ class Executor : private boost::noncopyable, private cpp::NonMovable {
   std::unordered_map<std::string, std::string> otherStats_;
 };
 
+template <class ScatterFunc, class ScatterResult, class GatherFunc>
+auto Executor::runMultiJobs(ScatterFunc &&scatter, GatherFunc &&gather, Iterator *iter) {
+  size_t totalSize = iter->size();
+  size_t batchSize = getBatchSize(totalSize);
+
+  // Start multiple jobs for handling the results
+  std::vector<folly::Future<ScatterResult>> futures;
+  size_t begin = 0, end = 0, dispathedCnt = 0;
+  while (dispathedCnt < totalSize) {
+    end = begin + batchSize > totalSize ? totalSize : begin + batchSize;
+    futures.emplace_back(folly::via(
+        runner(),
+        [begin, end, tmpIter = iter->copy(), f = std::move(scatter)]() mutable -> ScatterResult {
+          // Since not all iterators are linear, so iterates to the begin pos
+          size_t tmp = 0;
+          for (; tmpIter->valid() && tmp < begin; ++tmp) {
+            tmpIter->next();
+          }
+
+          return f(begin, end, tmpIter.get());
+        }));
+    begin = end;
+    dispathedCnt += batchSize;
+  }
+
+  // Gather all results and do post works
+  return folly::collect(futures).via(runner()).thenValue(std::move(gather));
+}
 }  // namespace graph
 }  // namespace nebula
 

diff --git a/src/graph/executor/algo/ShortestPathBase.cpp b/src/graph/executor/algo/ShortestPathBase.cpp
@@ -218,7 +218,9 @@ void ShortestPathBase::addStats(PropRpcResponse& resp, int64_t timeInUSec) const
     ss << "\n}";
   }
   ss << "\n}";
+  statsLock_.lock();
   stats_->emplace(folly::sformat("get_prop "), ss.str());
+  statsLock_.unlock();
 }
 
 }  // namespace graph