/
bloom-filter.ts
92 lines (81 loc) · 2.36 KB
/
bloom-filter.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
// minimal implementation MurmurHash2 hash function
function murmurhash2(str: string) {
let h = 0
for (let i = 0; i < str.length; i++) {
const c = str.charCodeAt(i)
h = Math.imul(h ^ c, 0x5bd1e995)
h ^= h >>> 13
h = Math.imul(h, 0x5bd1e995)
}
return h >>> 0
}
export class BloomFilter {
numItems: number
errorRate: number
numBits: number
numHashes: number
bitArray: number[]
constructor(numItems: number, errorRate: number) {
this.numItems = numItems
this.errorRate = errorRate
this.numBits = Math.ceil(
-(numItems * Math.log(errorRate)) / (Math.log(2) * Math.log(2))
)
this.numHashes = Math.ceil((this.numBits / numItems) * Math.log(2))
this.bitArray = new Array(this.numBits).fill(0)
if (typeof window === 'undefined') {
if (errorRate < 0.01) {
const filterData = JSON.stringify(this.export())
const gzipSize = require('next/dist/compiled/gzip-size').sync(
filterData
)
if (gzipSize > 1024) {
console.warn(
`Creating filter with error rate less than 1% (0.01) can increase the size dramatically proceed with caution. Received error rate ${errorRate} resulted in size ${gzipSize} bytes (gzip)`
)
}
}
}
}
static from(items: string[], errorRate = 0.01) {
const filter = new BloomFilter(items.length, errorRate)
for (const item of items) {
filter.add(item)
}
return filter
}
export() {
return {
numItems: this.numItems,
errorRate: this.errorRate,
numBits: this.numBits,
numHashes: this.numHashes,
bitArray: this.bitArray,
}
}
import(data: ReturnType<typeof this['export']>) {
this.numItems = data.numItems
this.errorRate = data.errorRate
this.numBits = data.numBits
this.numHashes = data.numHashes
this.bitArray = data.bitArray
}
add(item: string) {
const hashValues = this.getHashValues(item)
hashValues.forEach((hash) => {
this.bitArray[hash] = 1
})
}
contains(item: string) {
const hashValues = this.getHashValues(item)
return hashValues.every((hash) => this.bitArray[hash])
}
getHashValues(item: string) {
const hashValues = []
for (let i = 1; i <= this.numHashes; i++) {
const hash = murmurhash2(`${item}${i}`) % this.numBits
hashValues.push(hash)
}
return hashValues
}
}