Skip to content

Commit

Permalink
Cherry pick v3.2.0 (0627-0630) (#4370)
Browse files Browse the repository at this point in the history
* enhance nightly tck test job (#4355)

* add lock when multithread access shared variable (#4360)

* Implement inner parallel procesing by using Scatter/Gather model. (#4361)

* Multi job traverse.

Some debug infos.

Multi job AppendVertices handle resp.

Multi job Project.

Enhance AppendVertices.

Multi job filter.

Fix row cnt and delete debug info.

Fix AppendV.

Add debug info.

Rm debug info.

Fix copy in traverse.

Set max_job_size to default 20.

Add runMultiJobs template.

Impl Project/Filter/AppendV.

Impl Join.

Fix double free for join.

Fix filter on GetNeighborsIter.

Fix prop iter.

Fix filter on prop iter.

Fix left join by clearing hash tabls manually.

Report error in filter.

Fix tests: the order is not guarentted in scatter gather model.

Iterates to begin out of job.

Modify the default config.

Fix appendv.

Support run multi job mode test.

Test multi job.

Rebase and compatible with single job.

* Update the logs and flags.

* Add comment.

* Fix nightly.

* Fix test.

* Fix compile.

* Fix test.

* Address comment and fix test.

Co-authored-by: Sophie <84560950+Sophie-Xie@users.noreply.github.com>

* Async gc. (#4362)

* Add GC.

* Impl gc.

* Fix linking.

* Using thread pool.

* Using try_dequeue.

* Stop workers when destruct.

* Set default worker size to 8.

* Add repeat task for all.

* Update flags.

* Add comments.

* Fix nightly.

Co-authored-by: Harris.Chu <1726587+HarrisChu@users.noreply.github.com>
Co-authored-by: jimingquan <mingquan.ji@vesoft.com>
Co-authored-by: cpw <13495049+CPWstatic@users.noreply.github.com>
  • Loading branch information
4 people committed Jun 30, 2022
1 parent dfe737c commit 518c7d7
Show file tree
Hide file tree
Showing 44 changed files with 1,029 additions and 61 deletions.
101 changes: 98 additions & 3 deletions .github/workflows/nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: nightly

on:
schedule:
- cron: '0 18 * * *'
- cron: "0 18 * * *"

concurrency:
group: nightly
Expand Down Expand Up @@ -35,7 +35,7 @@ jobs:
- uses: actions/checkout@v2
- uses: actions/setup-go@v2
with:
go-version: '^1.16.7'
go-version: "^1.16.7"
- name: package
run: ./package/package.sh -t Release -r ON -p OFF -s FALSE -k OFF
- name: output some vars
Expand Down Expand Up @@ -199,4 +199,99 @@ jobs:
if: ${{ failure() }}
with:
name: ${{ matrix.os }}-${{ matrix.compiler }}-nebula-test-logs
path: ./build/server_*/logs/
path: ./build/server_*/logs/

test:
name: Tck test
needs: package
runs-on: [self-hosted, nebula]
strategy:
fail-fast: false
matrix:
os:
- ubuntu1804
- ubuntu2004
- centos7
- centos8
extra_config:
- "ENABLE_SSL=true CA_SIGNED=true QUERY_CONCURRENTLY=false"
- "ENABLE_SSL=false CA_SIGNED=false QUERY_CONCURRENTLY=true"
env:
OSS_DIR: nebula-graph/package/nightly
container:
image: vesoft/nebula-dev:${{ matrix.os }}
steps:
- uses: webiny/action-post-run@2.0.1
with:
run: sh -c "find . -mindepth 1 -delete"
- uses: actions/checkout@v2
- name: output some vars
id: vars
env:
SHA_EXT: sha256sum.txt
run: |
subdir=$(date -u +%Y.%m.%d -d "1 days ago")
echo "::set-output name=subdir::$subdir"
- id: oss_package
run: |
case ${{ matrix.os }} in
ubuntu1804)
echo "::set-output name=p::nebula-graph-${{ steps.vars.outputs.subdir }}-nightly.ubuntu1804.amd64.tar.gz"
;;
ubuntu2004)
echo "::set-output name=p::nebula-graph-${{ steps.vars.outputs.subdir }}-nightly.ubuntu2004.amd64.tar.gz"
;;
centos7)
echo "::set-output name=p::nebula-graph-${{ steps.vars.outputs.subdir }}-nightly.el7.x86_64.tar.gz"
;;
centos8)
echo "::set-output name=p::nebula-graph-${{ steps.vars.outputs.subdir }}-nightly.el8.x86_64.tar.gz"
;;
esac
- name: Prepare environment
id: prepare
run: |
[ -d build/ ] && rm -rf build/* || mkdir -p build
make init -C tests
- name: CMake
id: cmake
run: |
echo "::set-output name=j::8"
- name: download from oss
run: |
ossutil64 -e ${{ secrets.OSS_ENDPOINT }} \
-i ${{ secrets.OSS_ID }} \
-k ${{ secrets.OSS_SECRET }} \
-f cp oss://${{ env.OSS_DIR }}/${{ steps.vars.outputs.subdir }}/${{ steps.oss_package.outputs.p }} \
build/.
tar zxvf build/${{ steps.oss_package.outputs.p }} -C build
d=`echo ${{ steps.oss_package.outputs.p }} | sed 's/.tar.gz//'`
mv build/${d}/* build/.
- name: Setup cluster
run: |
make CONTAINERIZED=true ${{ matrix.extra_config }} up
working-directory: tests/
timeout-minutes: 2
- name: Pytest
run: |
make RM_DIR=false DEBUG=false J=${{ steps.cmake.outputs.j }} test
working-directory: tests/
timeout-minutes: 15
- name: TCK
run: |
make RM_DIR=false DEBUG=false J=${{ steps.cmake.outputs.j }} tck
working-directory: tests/
timeout-minutes: 60
- name: Down cluster
run: |
make RM_DIR=false down
working-directory: tests/
timeout-minutes: 2
- name: Upload logs
uses: actions/upload-artifact@v2
if: ${{ failure() }}
with:
name: ${{ matrix.os }}-${{ matrix.compiler }}-nebula-test-logs
path: ./build/server_*/logs*/
7 changes: 5 additions & 2 deletions .github/workflows/rc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@ jobs:
- ubuntu2004
- centos7
- centos8
extra_config:
- "ENABLE_SSL=true CA_SIGNED=true QUERY_CONCURRENTLY=false"
- "ENABLE_SSL=false CA_SIGNED=false QUERY_CONCURRENTLY=true"
env:
OSS_DIR: nebula-graph/rc
container:
Expand Down Expand Up @@ -235,7 +238,7 @@ jobs:
- name: Setup cluster
run: |
make CONTAINERIZED=true ENABLE_SSL=true CA_SIGNED=true up
make CONTAINERIZED=true ${{ matrix.extra_config }} up
working-directory: tests/
timeout-minutes: 2
- name: Pytest
Expand All @@ -258,4 +261,4 @@ jobs:
if: ${{ failure() }}
with:
name: ${{ matrix.os }}-${{ matrix.compiler }}-nebula-test-logs
path: ./build/server_*/logs/
path: ./build/server_*/logs*/
1 change: 1 addition & 0 deletions src/common/expression/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ set(expression_test_common_libs
$<TARGET_OBJECTS:graph_obj>
$<TARGET_OBJECTS:ssl_obj>
$<TARGET_OBJECTS:memory_obj>
$<TARGET_OBJECTS:gc_obj>
)


Expand Down
16 changes: 16 additions & 0 deletions src/common/thread/GenericThreadPool.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,16 @@ class GenericThreadPool final : public boost::noncopyable, public nebula::cpp::N
*/
void purgeTimerTask(uint64_t id);

/**
* To add a repeated timer task for all workers which will be executed in each period.
* @ms interval in milliseconds
* @task a callable object
* @args variadic arguments
* @return ID of the added task, unique for this worker
*/
template <typename F, typename... Args>
void addRepeatTaskForAll(size_t ms, F &&f, Args &&... args);

private:
size_t nrThreads_{0};
std::atomic<size_t> nextThread_{0};
Expand Down Expand Up @@ -156,6 +166,12 @@ uint64_t GenericThreadPool::addRepeatTask(size_t ms, F &&f, Args &&... args) {
return ((idx << GenericWorker::TIMER_ID_BITS) | id);
}

template <typename F, typename... Args>
void GenericThreadPool::addRepeatTaskForAll(size_t ms, F &&f, Args &&... args) {
for (auto idx = 0UL; idx < nrThreads_; ++idx) {
pool_[idx]->addRepeatTask(ms, std::forward<F>(f), std::forward<Args>(args)...);
}
}
} // namespace thread
} // namespace nebula

Expand Down
2 changes: 2 additions & 0 deletions src/daemons/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ nebula_add_executable(
$<TARGET_OBJECTS:graph_obj>
$<TARGET_OBJECTS:ft_es_graph_adapter_obj>
$<TARGET_OBJECTS:storage_thrift_obj>
$<TARGET_OBJECTS:gc_obj>
${common_deps}
LIBRARIES
${PROXYGEN_LIBRARIES}
Expand Down Expand Up @@ -245,6 +246,7 @@ nebula_add_executable(
$<TARGET_OBJECTS:meta_version_man_obj>
$<TARGET_OBJECTS:meta_data_upgrade_obj>
$<TARGET_OBJECTS:meta_v2_thrift_obj>
$<TARGET_OBJECTS:gc_obj>
${storage_meta_deps}
${common_deps}
LIBRARIES
Expand Down
1 change: 1 addition & 0 deletions src/graph/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ nebula_add_subdirectory(stats)
nebula_add_subdirectory(util)
nebula_add_subdirectory(validator)
nebula_add_subdirectory(visitor)
nebula_add_subdirectory(gc)
4 changes: 0 additions & 4 deletions src/graph/context/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
# Copyright (c) 2020 vesoft inc. All rights reserved.
#
# This source code is licensed under Apache 2.0 License.

nebula_add_library(
graph_context_obj OBJECT
QueryContext.cpp
Expand Down
10 changes: 9 additions & 1 deletion src/graph/context/ExecutionContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

#include "graph/context/ExecutionContext.h"

#include "graph/gc/GC.h"
#include "graph/service/GraphFlags.h"

namespace nebula {
namespace graph {
constexpr int64_t ExecutionContext::kLatestVersion;
Expand All @@ -23,7 +26,12 @@ void ExecutionContext::setResult(const std::string& name, Result&& result) {
}

void ExecutionContext::dropResult(const std::string& name) {
valueMap_[name].clear();
auto& val = valueMap_[name];
if (FLAGS_enable_async_gc) {
GC::instance().clear(std::move(val));
} else {
val.clear();
}
}

size_t ExecutionContext::numVersions(const std::string& name) const {
Expand Down
14 changes: 14 additions & 0 deletions src/graph/context/Iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,15 @@ void GetNeighborsIter::clearEdges() {
}
}

SequentialIter::SequentialIter(const SequentialIter& iter)
: Iterator(iter.valuePtr(), Kind::kSequential) {
auto valuePtr = iter.valuePtr();
auto& ds = valuePtr->mutableDataSet();
iter_ = ds.rows.begin();
rows_ = &ds.rows;
colIndices_ = iter.getColIndices();
}

SequentialIter::SequentialIter(std::shared_ptr<Value> value, bool checkMemory)
: Iterator(value, Kind::kSequential, checkMemory) {
DCHECK(value->isDataSet());
Expand Down Expand Up @@ -715,6 +724,11 @@ Value SequentialIter::getEdge() const {
return getColumn("EDGE");
}

PropIter::PropIter(const PropIter& iter) : SequentialIter(iter) {
dsIndex_ = iter.dsIndex_;
kind_ = Kind::kProp;
}

PropIter::PropIter(std::shared_ptr<Value> value, bool checkMemory)
: SequentialIter(value, checkMemory) {
DCHECK(value->isDataSet());
Expand Down
3 changes: 2 additions & 1 deletion src/graph/context/Iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -422,10 +422,10 @@ class GetNeighborsIter final : public Iterator {
class SequentialIter : public Iterator {
public:
explicit SequentialIter(std::shared_ptr<Value> value, bool checkMemory = false);
explicit SequentialIter(const SequentialIter& iter);

// Union multiple sequential iterators
explicit SequentialIter(std::vector<std::unique_ptr<Iterator>> inputList);

// Union two sequential iterators.
SequentialIter(std::unique_ptr<Iterator> left, std::unique_ptr<Iterator> right);

Expand Down Expand Up @@ -527,6 +527,7 @@ class SequentialIter : public Iterator {
class PropIter final : public SequentialIter {
public:
explicit PropIter(std::shared_ptr<Value> value, bool checkMemory = false);
explicit PropIter(const PropIter& iter);

std::unique_ptr<Iterator> copy() const override {
auto copy = std::make_unique<PropIter>(*this);
Expand Down
1 change: 1 addition & 0 deletions src/graph/context/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ SET(CONTEXT_TEST_LIBS
$<TARGET_OBJECTS:graph_stats_obj>
$<TARGET_OBJECTS:meta_client_stats_obj>
$<TARGET_OBJECTS:storage_client_stats_obj>
$<TARGET_OBJECTS:gc_obj>
)

if(ENABLE_STANDALONE_VERSION)
Expand Down
11 changes: 10 additions & 1 deletion src/graph/executor/Executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <folly/String.h>
#include <folly/executors/InlineExecutor.h>

#include <algorithm>
#include <atomic>

#include "common/base/ObjectPool.h"
Expand Down Expand Up @@ -665,7 +666,6 @@ void Executor::drop(const PlanNode *node) {
// Make sure drop happened-after count decrement
CHECK_EQ(inputVar->userCount.load(std::memory_order_acquire), 0);
ectx_->dropResult(inputVar->name);
VLOG(1) << node->kind() << " Drop variable " << inputVar->name;
}
}
}
Expand Down Expand Up @@ -734,5 +734,14 @@ folly::Executor *Executor::runner() const {
return qctx()->rctx()->runner();
}

size_t Executor::getBatchSize(size_t totalSize) const {
// batch size should be the greater one of FLAGS_min_batch_size and (totalSize/FLAGS_max_job_size)
size_t jobSize = FLAGS_max_job_size;
size_t minBatchSize = FLAGS_min_batch_size;
size_t batchSizeTmp = std::ceil(static_cast<float>(totalSize) / jobSize);
size_t batchSize = batchSizeTmp > minBatchSize ? batchSizeTmp : minBatchSize;
return batchSize;
}

} // namespace graph
} // namespace nebula
39 changes: 39 additions & 0 deletions src/graph/executor/Executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,17 @@ class Executor : private boost::noncopyable, private cpp::NonMovable {
// Store the default result which not used for later executor
Status finish(Value &&value);

size_t getBatchSize(size_t totalSize) const;

// ScatterFunc: A callback function that handle partial records of a dataset.
// GatherFunc: A callback function that gather all results of ScatterFunc, and do post works.
// Iterator: An iterator of a dataset.
template <
class ScatterFunc,
class ScatterResult = typename std::result_of<ScatterFunc(size_t, size_t, Iterator *)>::type,
class GatherFunc>
auto runMultiJobs(ScatterFunc &&scatter, GatherFunc &&gather, Iterator *iter);

int64_t id_;

// Executor name
Expand All @@ -129,6 +140,34 @@ class Executor : private boost::noncopyable, private cpp::NonMovable {
std::unordered_map<std::string, std::string> otherStats_;
};

template <class ScatterFunc, class ScatterResult, class GatherFunc>
auto Executor::runMultiJobs(ScatterFunc &&scatter, GatherFunc &&gather, Iterator *iter) {
size_t totalSize = iter->size();
size_t batchSize = getBatchSize(totalSize);

// Start multiple jobs for handling the results
std::vector<folly::Future<ScatterResult>> futures;
size_t begin = 0, end = 0, dispathedCnt = 0;
while (dispathedCnt < totalSize) {
end = begin + batchSize > totalSize ? totalSize : begin + batchSize;
futures.emplace_back(folly::via(
runner(),
[begin, end, tmpIter = iter->copy(), f = std::move(scatter)]() mutable -> ScatterResult {
// Since not all iterators are linear, so iterates to the begin pos
size_t tmp = 0;
for (; tmpIter->valid() && tmp < begin; ++tmp) {
tmpIter->next();
}

return f(begin, end, tmpIter.get());
}));
begin = end;
dispathedCnt += batchSize;
}

// Gather all results and do post works
return folly::collect(futures).via(runner()).thenValue(std::move(gather));
}
} // namespace graph
} // namespace nebula

Expand Down
2 changes: 2 additions & 0 deletions src/graph/executor/algo/ShortestPathBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,9 @@ void ShortestPathBase::addStats(PropRpcResponse& resp, int64_t timeInUSec) const
ss << "\n}";
}
ss << "\n}";
statsLock_.lock();
stats_->emplace(folly::sformat("get_prop "), ss.str());
statsLock_.unlock();
}

} // namespace graph
Expand Down
Loading

0 comments on commit 518c7d7

Please sign in to comment.