forked from google/leveldb
/
filter_block.cc
132 lines (114 loc) · 4.46 KB
/
filter_block.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "table/filter_block.h"
#include "leveldb/filter_policy.h"
#include "util/coding.h"
namespace leveldb {
// See doc/table_format.md for an explanation of the filter block format.
// Generate new filter every 2KB of data
static const size_t kFilterBaseLg = 11;
static const size_t kFilterBase = 1 << kFilterBaseLg;//2KB
FilterBlockBuilder::FilterBlockBuilder(const FilterPolicy* policy)
: policy_(policy) {
}
void FilterBlockBuilder::StartBlock(uint64_t block_offset) {
//每2KB一个filter,计算当前数据大小总共需要多少个filter
uint64_t filter_index = (block_offset / kFilterBase);
assert(filter_index >= filter_offsets_.size());
while (filter_index > filter_offsets_.size()) {
GenerateFilter();
}
}
void FilterBlockBuilder::AddKey(const Slice& key) {
Slice k = key;
//start_记录key在keys的offset,因此可以还原出key
//为什么不直接使用std::vecotr<std::string>?
start_.push_back(keys_.size());
keys_.append(k.data(), k.size());
}
Slice FilterBlockBuilder::Finish() {
if (!start_.empty()) {
GenerateFilter();
}
// Append array of per-filter offsets
const uint32_t array_offset = result_.size();
// 每 4 个bytes记录1个filter_offsets
for (size_t i = 0; i < filter_offsets_.size(); i++) {
PutFixed32(&result_, filter_offsets_[i]);
}
//记录全部 filter 的总大小
PutFixed32(&result_, array_offset);
//11 = 0x0b
result_.push_back(kFilterBaseLg); // Save encoding parameter in result
return Slice(result_);
}
void FilterBlockBuilder::GenerateFilter() {
const size_t num_keys = start_.size();
//如果相比上一个filter data没有新的key
//那么只更新offsets数组就返回
if (num_keys == 0) {
// Fast path if there are no keys for this filter
filter_offsets_.push_back(result_.size());
return;
}
// Make list of keys from flattened key structure
// starts最后一个元素是keys_的总大小,此时starts元素个数=num_keys + 1
// 这样 [starts[i], starts[i+1]) 就可以还原所有的key了
start_.push_back(keys_.size()); // Simplify length computation
tmp_keys_.resize(num_keys);
//遍历start_,同时通过keys_获取当前记录的所有key,存储到tmp_keys_
for (size_t i = 0; i < num_keys; i++) {
const char* base = keys_.data() + start_[i];
size_t length = start_[i+1] - start_[i];
tmp_keys_[i] = Slice(base, length);
}
// Generate filter for current set of keys and append to result_.
// 记录当前result_大小,也就是新的filter数据的offset
filter_offsets_.push_back(result_.size());
// 生成filter数据,追加到result_
policy_->CreateFilter(&tmp_keys_[0], static_cast<int>(num_keys), &result_);
tmp_keys_.clear();
keys_.clear();
start_.clear();
}
FilterBlockReader::FilterBlockReader(const FilterPolicy* policy,
const Slice& contents)
: policy_(policy),
data_(nullptr),
offset_(nullptr),
num_(0),
base_lg_(0) {
size_t n = contents.size();
if (n < 5) return; // 1 byte for base_lg_ and 4 for start of offset array
//最后1个字节记录kFilterBaseLg
base_lg_ = contents[n-1];
//base_lg_前4个字节,记录filter data总大小,也是filter offset的起始位置
uint32_t last_word = DecodeFixed32(contents.data() + n - 5);
if (last_word > n - 5) return;
data_ = contents.data();
//filter data offsets
offset_ = data_ + last_word;
//filter的个数
num_ = (n - 5 - last_word) / 4;
}
bool FilterBlockReader::KeyMayMatch(uint64_t block_offset, const Slice& key) {
//位于哪个filter data
uint64_t index = block_offset >> base_lg_;
if (index < num_) {
//[start, limit)标记了一个block_offset对应的filter data
//如果index = num_ - 1,那么limit指向filter data的总大小
uint32_t start = DecodeFixed32(offset_ + index*4);
uint32_t limit = DecodeFixed32(offset_ + index*4 + 4);
if (start <= limit && limit <= static_cast<size_t>(offset_ - data_)) {
//取出 filter data,判断key是否存在
Slice filter = Slice(data_ + start, limit - start);
return policy_->KeyMayMatch(key, filter);
} else if (start == limit) {
// Empty filters do not match any keys
return false;
}
}
return true; // Errors are treated as potential matches
}
}