Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ConcurrentLRUCache #1268

Merged
merged 6 commits into from Nov 18, 2019
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
257 changes: 257 additions & 0 deletions src/common/base/ConcurrentLRUCache.h
@@ -0,0 +1,257 @@
/* Copyright (c) 2019 vesoft inc. All rights reserved.
*
* This source code is licensed under Apache 2.0 License,
* attached with Common Clause Condition 1.0, found in the LICENSES directory.
*/

#ifndef COMMON_BASE_CONCURRENTLRUCACHE_H_
#define COMMON_BASE_CONCURRENTLRUCACHE_H_

#include "base/Base.h"
#include "base/StatusOr.h"
#include <list>
#include <utility>
#include <boost/optional.hpp>
#include <gtest/gtest_prod.h>
darionyaphet marked this conversation as resolved.
Show resolved Hide resolved

namespace nebula {

template<class Key, class Value>
class LRU;

template<typename K, typename V>
class ConcurrentLRUCache final {
FRIEND_TEST(ConcurrentLRUCacheTest, SimpleTest);

public:
explicit ConcurrentLRUCache(size_t capacity, uint32_t bucketsExp = 4)
: bucketsNum_(1 << bucketsExp)
, bucketsExp_(bucketsExp) {
CHECK(capacity > bucketsNum_ && bucketsNum_ > 0);
auto capPerBucket = capacity >> bucketsExp;
auto left = capacity;
for (uint32_t i = 0; i < bucketsNum_ - 1; i++) {
buckets_.emplace_back(capPerBucket);
left -= capPerBucket;
}
CHECK_GT(left, 0);
buckets_.emplace_back(left);
}

bool contains(const K& key, int32_t hint = -1) {
return buckets_[bucketIndex(key, hint)].contains(key);
}

void insert(const K& key, const V& val, int32_t hint = -1) {
buckets_[bucketIndex(key, hint)].insert(key, val);
}

StatusOr<V> get(const K& key, int32_t hint = -1) {
return buckets_[bucketIndex(key, hint)].get(key);
}

/**
* Insert the {key, val} if key not existed, and return Status::Inserted.
* Otherwise, just return the value for the existed key.
* */
StatusOr<V> putIfAbsent(const K& key, const V& val, int32_t hint = -1) {
return buckets_[bucketIndex(key, hint)].putIfAbsent(key, val);
}

void evict(const K& key, int32_t hint = -1) {
buckets_[bucketIndex(key, hint)].evict(key);
}

void clear() {
for (auto i = 0; i < bucketsNum_; i++) {
buckets_[i].clear();
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it need buckets_.clear() at here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When we call clear, what we want is to clear the content in cache, not destroy the buckets.

}

private:
class Bucket {
public:
explicit Bucket(size_t capacity)
: lru_(std::make_unique<LRU<K, V>>(capacity)) {}

Bucket(Bucket&& b)
: lru_(std::move(b.lru_)) {}

bool contains(const K& key) {
std::lock_guard<std::mutex> guard(lock_);
return lru_->contains(key);
}

void insert(const K& key, const V& val) {
std::lock_guard<std::mutex> guard(lock_);
lru_->insert(key, val);
}

StatusOr<V> get(const K& key) {
std::lock_guard<std::mutex> guard(lock_);
auto v = lru_->get(key);
if (v == boost::none) {
return Status::Error();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Status::Error(folly::stringPrintf("%s not found", key)) . would be better ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will hurt the performance because miss cache is normal in practice.

}
return std::move(v).value();
Comment on lines +91 to +96
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ask a question,do we need insert it to cache immediate If this key is not exist in cache ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, maybe we need another method putIfAbsent

}

StatusOr<V> putIfAbsent(const K& key, const V& val) {
std::lock_guard<std::mutex> guard(lock_);
auto v = lru_->get(key);
if (v == boost::none) {
lru_->insert(key, val);
return Status::Inserted();
}
return std::move(v).value();
}

void evict(const K& key) {
std::lock_guard<std::mutex> guard(lock_);
lru_->evict(key);
}

void clear() {
std::lock_guard<std::mutex> guard(lock_);
lru_->clear();
}

private:
std::mutex lock_;
std::unique_ptr<LRU<K, V>> lru_;
};


private:
/**
* If hint is specified, we could use it to cal the bucket index directly without hash key.
* */
uint32_t bucketIndex(const K& key, int32_t hint = -1) {
return hint >= 0 ? (hint & ((1 << bucketsExp_) - 1))
: (std::hash<K>()(key) & ((1 << bucketsExp_) - 1));
}


private:
std::vector<Bucket> buckets_;
uint32_t bucketsNum_ = 1;
uint32_t bucketsExp_ = 0;
};


/**
It is copied from boost::compute::detail::LRU.
The differences:
1. Add methed evict(const K& key);
2. Instead std::map with std::unordered_map
3. Update the code style.
*/
template<class Key, class Value>
class LRU {
public:
typedef Key key_type;
typedef Value value_type;
typedef std::list<key_type> list_type;
typedef std::unordered_map<
key_type,
std::tuple<value_type, typename list_type::iterator>
> map_type;

explicit LRU(size_t capacity)
: capacity_(capacity) {
}

~LRU() = default;

size_t size() const {
return map_.size();
}

size_t capacity() const {
return capacity_;
}

bool empty() const {
return map_.empty();
}

bool contains(const key_type& key) {
return map_.find(key) != map_.end();
}

void insert(const key_type& key, const value_type& value) {
typename map_type::iterator i = map_.find(key);
if (i == map_.end()) {
// insert item into the cache, but first check if it is full
if (size() >= capacity_) {
VLOG(3) << "Size:" << size() << ", capacity " << capacity_;
// cache is full, evict the least recently used item
evict();
}
// insert the new item
list_.push_front(key);
map_[key] = std::make_tuple(value, list_.begin());
VLOG(3) << "Insert key " << key << ", val " << value;
}
}

boost::optional<value_type> get(const key_type& key) {
// lookup value in the cache
typename map_type::iterator i = map_.find(key);
if (i == map_.end()) {
// value not in cache
VLOG(3) << key << " not found!";
return boost::none;
}

// return the value, but first update its place in the most
// recently used list
typename list_type::iterator j = std::get<1>(i->second);
const value_type& value = std::get<0>(i->second);
if (j != list_.begin()) {
// move item to the front of the most recently used list
list_.erase(j);
list_.push_front(key);
// update iterator in map
j = list_.begin();
i->second = std::make_tuple(value, j);
}
VLOG(3) << "Get key : " << key << ", val: " << value;
return value;
}

/**
* evict the key if exist.
* */
void evict(const key_type& key) {
auto it = map_.find(key);
if (it != map_.end()) {
list_.erase(std::get<1>(it->second));
map_.erase(it);
}
}

void clear() {
map_.clear();
list_.clear();
}

private:
void evict() {
// evict item from the end of most recently used list
typename list_type::iterator i = --list_.end();
VLOG(3) << "Evict the oldest key " << *i;
map_.erase(*i);
list_.erase(i);
}

private:
map_type map_;
list_type list_;
size_t capacity_;
};

} // namespace nebula

#endif // COMMON_BASE_CONCURRENTLRUCACHE_H_

3 changes: 3 additions & 0 deletions src/common/base/Status.h
Expand Up @@ -92,6 +92,8 @@ class Status final {
bool is##ERROR() const { \
return code() == k##ERROR; \
}
// Some succeeded codes
STATUS_GENERATOR(Inserted);

// General errors
STATUS_GENERATOR(Error);
Expand Down Expand Up @@ -128,6 +130,7 @@ class Status final {
enum Code : uint16_t {
// OK
kOk = 0,
kInserted = 1,
// 1xx, for general errors
kError = 101,
kNoSuchFile = 102,
Expand Down
7 changes: 7 additions & 0 deletions src/common/base/test/CMakeLists.txt
Expand Up @@ -89,6 +89,13 @@ nebula_add_test(
LIBRARIES gtest gtest_main
)

nebula_add_test(
NAME lru_test
SOURCES ConcurrentLRUCacheTest.cpp
OBJECTS $<TARGET_OBJECTS:base_obj>
LIBRARIES gtest gtest_main
)

nebula_add_executable(
NAME range_vs_transform_bm
SOURCES RangeVsTransformBenchmark.cpp
Expand Down