-
Notifications
You must be signed in to change notification settings - Fork 0
/
dumbloom.sql
160 lines (139 loc) · 2.85 KB
/
dumbloom.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
CREATE TYPE dumbloom AS (
m integer,
k integer,
bits integer[]
);
CREATE FUNCTION dumbloom_empty (
p float8 DEFAULT 0.01,
n integer DEFAULT 10000
) RETURNS dumbloom AS
$$
DECLARE
m integer;
k integer;
i integer;
bits integer[];
BEGIN
m := abs(ceil(n * ln(p) / (ln(2) ^ 2)))::integer;
k := round(ln(2) * m / n)::integer;
bits := NULL;
FOR i in 1 .. ceil(m / 32.0) LOOP
bits := array_append(bits, 0);
END LOOP;
RETURN (m, k, bits)::dumbloom;
END;
$$
LANGUAGE 'plpgsql' IMMUTABLE;
CREATE FUNCTION dumbloom_fingerprint (
b dumbloom,
item text
) RETURNS integer[] AS
$$
DECLARE
h1 bigint;
h2 bigint;
i integer;
fingerprint integer[];
BEGIN
h1 := abs(hashtext(upper(item)));
h2 := abs(hashtext(lower(item)));
fingerprint := NULL;
FOR i IN 1 .. b.k LOOP
fingerprint := array_append(fingerprint, ((h1 + i * h2) % b.m)::integer);
END LOOP;
RETURN fingerprint;
END;
$$
LANGUAGE 'plpgsql' IMMUTABLE;
CREATE FUNCTION dumbloom_add (
b dumbloom,
item text,
p float8 DEFAULT 0.01,
n integer DEFAULT 10000
) RETURNS dumbloom AS
$$
DECLARE
i integer;
idx integer;
BEGIN
IF b IS NULL THEN
b := dumbloom_empty(p, n);
END IF;
FOREACH i IN ARRAY dumbloom_fingerprint(b, item) LOOP
idx := i / 32 + 1;
b.bits[idx] := b.bits[idx] | (1 << (i % 32));
END LOOP;
RETURN b;
END;
$$
LANGUAGE 'plpgsql' IMMUTABLE;
CREATE FUNCTION dumbloom_add (
b dumbloom,
item text
) RETURNS dumbloom AS
$$
BEGIN
RETURN dumbloom_add(b, item, 0.01, 10000);
END;
$$
LANGUAGE 'plpgsql' IMMUTABLE;
CREATE FUNCTION dumbloom_contains (
b dumbloom,
item text
) RETURNS boolean AS
$$
DECLARE
i integer;
idx integer;
BEGIN
IF b IS NULL THEN
RETURN FALSE;
END IF;
FOREACH i IN ARRAY dumbloom_fingerprint(b, item) LOOP
idx := i / 32 + 1;
IF NOT (b.bits[idx] & (1 << (i % 32)))::boolean THEN
RETURN FALSE;
END IF;
END LOOP;
RETURN TRUE;
END;
$$
LANGUAGE 'plpgsql' IMMUTABLE;
CREATE FUNCTION dumbloom_union (
b1 dumbloom,
b2 dumbloom
) RETURNS dumbloom AS
$$
DECLARE
i integer;
bits integer[];
BEGIN
IF b1 IS NULL THEN
RETURN b2;
END IF;
IF NOT (b1.m = b2.m AND b1.k = b2.k) THEN
RAISE EXCEPTION 'bloom filter mismatch!';
END IF;
bits := NULL;
FOR i IN 1 .. ceil(b1.m / 32.0) LOOP
bits := array_append(bits, b1.bits[i] | b2.bits[i]);
END LOOP;
RETURN (b1.m, b1.k, bits)::dumbloom;
END;
$$
LANGUAGE 'plpgsql' IMMUTABLE;
CREATE AGGREGATE dumbloom_agg (text) (
stype = dumbloom,
sfunc = dumbloom_add,
combinefunc = dumbloom_union
);
CREATE AGGREGATE dumbloom_agg (text, float8, integer) (
stype = dumbloom,
sfunc = dumbloom_add,
combinefunc = dumbloom_union
);
CREATE AGGREGATE dumbloom_union_agg (dumbloom) (
stype = dumbloom,
sfunc = dumbloom_union,
combinefunc = dumbloom_union
);