Skip to content

Commit

Permalink
Reduce binary search when reseek into the same data block (facebook#5256
Browse files Browse the repository at this point in the history
)

Summary:
Right now, when Seek() is called again, RocksDB always does a binary search against the files and index blocks, even if they end up with the same file/block. Improve it as following:
1. in LevelIterator, reseek first try to check the boundary of the current file. If it falls into the same file, skip the binary search to find the file
2. in block based table iterator, reseek skip to reseek the iterator block if the seek key is larger than the current key and lower than the index key (boundary of the current block and the next block).
Pull Request resolved: facebook#5256

Differential Revision: D15105072

Pulled By: siying

fbshipit-source-id: 39634bdb4a881082451fa39cecd7ecf12160bf80
  • Loading branch information
siying authored and facebook-github-bot committed May 1, 2019
1 parent f25e3df commit 4c386ea
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 11 deletions.
3 changes: 3 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
### Public API Change
* Now DB::Close() will return Aborted() error when there is unreleased snapshot. Users can retry after all snapshots are released.

### New Features
* Reduce binary search when iterator reseek into the same data block.

## 6.2.0 (4/30/2019)
### New Features
* Add an option `strict_bytes_per_sync` that causes a file-writing thread to block rather than exceed the limit on bytes pending writeback specified by `bytes_per_sync` or `wal_bytes_per_sync`.
Expand Down
98 changes: 98 additions & 0 deletions db/db_iterator_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2450,6 +2450,104 @@ TEST_P(DBIteratorTest, SeekBackwardAfterOutOfUpperBound) {
ASSERT_EQ("a", it->key().ToString());
}

TEST_P(DBIteratorTest, AvoidReseekLevelIterator) {
Options options = CurrentOptions();
options.compression = CompressionType::kNoCompression;
BlockBasedTableOptions table_options;
table_options.block_size = 800;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
Reopen(options);

Random rnd(301);
std::string random_str = RandomString(&rnd, 180);

ASSERT_OK(Put("1", random_str));
ASSERT_OK(Put("2", random_str));
ASSERT_OK(Put("3", random_str));
ASSERT_OK(Put("4", random_str));
// A new block
ASSERT_OK(Put("5", random_str));
ASSERT_OK(Put("6", random_str));
ASSERT_OK(Put("7", random_str));
ASSERT_OK(Flush());
ASSERT_OK(Put("8", random_str));
ASSERT_OK(Put("9", random_str));
ASSERT_OK(Flush());
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));

int num_find_file_in_level = 0;
int num_idx_blk_seek = 0;
SyncPoint::GetInstance()->SetCallBack(
"LevelIterator::Seek:BeforeFindFile",
[&](void* /*arg*/) { num_find_file_in_level++; });
SyncPoint::GetInstance()->SetCallBack(
"IndexBlockIter::Seek:0", [&](void* /*arg*/) { num_idx_blk_seek++; });
SyncPoint::GetInstance()->EnableProcessing();

{
std::unique_ptr<Iterator> iter(NewIterator(ReadOptions()));
iter->Seek("1");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(1, num_find_file_in_level);
ASSERT_EQ(1, num_idx_blk_seek);

iter->Seek("2");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(1, num_find_file_in_level);
ASSERT_EQ(1, num_idx_blk_seek);

iter->Seek("3");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(1, num_find_file_in_level);
ASSERT_EQ(1, num_idx_blk_seek);

iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(1, num_find_file_in_level);
ASSERT_EQ(1, num_idx_blk_seek);

iter->Seek("5");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(1, num_find_file_in_level);
ASSERT_EQ(2, num_idx_blk_seek);

iter->Seek("6");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(1, num_find_file_in_level);
ASSERT_EQ(2, num_idx_blk_seek);

iter->Seek("7");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(1, num_find_file_in_level);
ASSERT_EQ(3, num_idx_blk_seek);

iter->Seek("8");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(2, num_find_file_in_level);
// Still re-seek because "8" is the boundary key, which has
// the same user key as the seek key.
ASSERT_EQ(4, num_idx_blk_seek);

iter->Seek("5");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(3, num_find_file_in_level);
ASSERT_EQ(5, num_idx_blk_seek);

iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(3, num_find_file_in_level);
ASSERT_EQ(5, num_idx_blk_seek);

// Seek backward never triggers the index block seek to be skipped
iter->Seek("5");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(3, num_find_file_in_level);
ASSERT_EQ(6, num_idx_blk_seek);
}

SyncPoint::GetInstance()->DisableProcessing();
}

INSTANTIATE_TEST_CASE_P(DBIteratorTestInstance, DBIteratorTest,
testing::Values(true, false));

Expand Down
20 changes: 18 additions & 2 deletions db/version_set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1007,9 +1007,25 @@ class LevelIterator final : public InternalIterator {
};

void LevelIterator::Seek(const Slice& target) {
size_t new_file_index = FindFile(icomparator_, *flevel_, target);
// Check whether the seek key fall under the same file
bool need_to_reseek = true;
if (file_iter_.iter() != nullptr && file_index_ < flevel_->num_files) {
const FdWithKeyRange& cur_file = flevel_->files[file_index_];
if (icomparator_.InternalKeyComparator::Compare(
target, cur_file.largest_key) <= 0 &&
icomparator_.InternalKeyComparator::Compare(
target, cur_file.smallest_key) >= 0) {
need_to_reseek = false;
assert(static_cast<size_t>(FindFile(icomparator_, *flevel_, target)) ==
file_index_);
}
}
if (need_to_reseek) {
TEST_SYNC_POINT("LevelIterator::Seek:BeforeFindFile");
size_t new_file_index = FindFile(icomparator_, *flevel_, target);
InitFileIterator(new_file_index);
}

InitFileIterator(new_file_index);
if (file_iter_.iter() != nullptr) {
file_iter_.Seek(target);
}
Expand Down
1 change: 1 addition & 0 deletions table/block.cc
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) {
}

void IndexBlockIter::Seek(const Slice& target) {
TEST_SYNC_POINT("IndexBlockIter::Seek:0");
Slice seek_key = target;
if (!key_includes_seq_) {
seek_key = ExtractUserKey(target);
Expand Down
36 changes: 27 additions & 9 deletions table/block_based_table_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2334,17 +2334,35 @@ void BlockBasedTableIterator<TBlockIter, TValue>::Seek(const Slice& target) {
return;
}

SavePrevIndexValue();

index_iter_->Seek(target);

if (!index_iter_->Valid()) {
ResetDataIter();
return;
bool need_seek_index = true;
if (block_iter_points_to_real_block_) {
// Reseek.
prev_index_value_ = index_iter_->value();
// We can avoid an index seek if:
// 1. The new seek key is larger than the current key
// 2. The new seek key is within the upper bound of the block
// Since we don't necessarily know the internal key for either
// the current key or the upper bound, we check user keys and
// exclude the equality case. Considering internal keys can
// improve for the boundary cases, but it would complicate the
// code.
if (user_comparator_.Compare(ExtractUserKey(target),
block_iter_.user_key()) > 0 &&
user_comparator_.Compare(ExtractUserKey(target),
index_iter_->user_key()) < 0) {
need_seek_index = false;
}
}

if (need_seek_index) {
index_iter_->Seek(target);
if (!index_iter_->Valid()) {
ResetDataIter();
return;
}
InitDataBlock();
}

InitDataBlock();

block_iter_.Seek(target);

FindKeyForward();
Expand Down

0 comments on commit 4c386ea

Please sign in to comment.