Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ DATE ?= 2025-01-08

# Path to the code repo.
VALKEY_ROOT ?= ../valkey

VALKEY_BLOOM_ROOT ?= ../valkey-bloom
# Where to install man pages
INSTALL_MAN_DIR ?= /usr/local/share/man

Expand All @@ -30,6 +30,10 @@ ifeq ("$(wildcard $(VALKEY_ROOT))","")
$(error Please provide the VALKEY_ROOT variable pointing to the Valkey source code)
endif

ifeq ("$(wildcard $(VALKEY_BLOOM_ROOT))","")
$(info Valkey bloom variable pointed to nothing, skipping bloom filter commands)
endif

ifeq ("$(shell which pandoc)","")
$(error Please install pandoc)
endif
Expand All @@ -54,7 +58,9 @@ endif

documented_commands = $(wildcard commands/*.md)
commands_json_files = $(wildcard $(VALKEY_ROOT)/src/commands/*.json)
existing_commands = $(commands_json_files:$(VALKEY_ROOT)/src/commands/%.json=commands/%.md)
bloom_commands_json_files = $(wildcard $(VALKEY_BLOOM_ROOT)/src/commands/*.json)
existing_commands = $(commands_json_files:$(VALKEY_ROOT)/src/commands/%.json=commands/%.md) \
$(bloom_commands_json_files:$(VALKEY_BLOOM_ROOT)/src/commands/%.json=commands/%.md)

topics = $(wildcard topics/*)
commands = $(filter $(existing_commands),$(documented_commands))
Expand All @@ -65,7 +71,9 @@ topics_pics = $(filter-out %.md,$(topics))
# ---- Temp files ----

# JSON files for the commands that have a .md file (excluding undocumented commands).
json_for_documented_commands = $(commands:commands/%.md=$(VALKEY_ROOT)/src/commands/%.json)
json_for_documented_commands = \
$(patsubst commands/%.md,$(VALKEY_ROOT)/src/commands/%.json,$(filter $(commands_json_files:$(VALKEY_ROOT)/src/commands/%.json=commands/%.md),$(commands))) \
$(patsubst commands/%.md,$(VALKEY_BLOOM_ROOT)/src/commands/%.json,$(filter $(bloom_commands_json_files:$(VALKEY_BLOOM_ROOT)/src/commands/%.json=commands/%.md),$(commands)))

$(BUILD_DIR)/.commands-per-group.json: $(VALKEY_ROOT)/src/commands/. utils/build-command-groups.py | $(BUILD_DIR)
utils/build-command-groups.py $(json_for_documented_commands) > $@~~
Expand Down Expand Up @@ -148,6 +156,9 @@ progs = valkey-cli valkey-server valkey-benchmark valkey-sentinel valkey-check-r
programs = $(progs:valkey-%=topics/%.md)
configs = topics/valkey.conf.md

# Define the base directories where valkey commands can come from
VALKEY_ROOTS := $(VALKEY_ROOT) $(VALKEY_BLOOM_ROOT)

man1_src = $(filter $(programs),$(topics_md))
man3_src = $(commands)
man5_src = $(filter $(configs),$(topics_md))
Expand Down Expand Up @@ -175,12 +186,14 @@ $(MAN_DIR)/man1/valkey-%.1.gz: topics/%.md $(man_scripts)
utils/preprocess-markdown.py --man --page-type program \
--version $(VERSION) --date $(DATE) \$< \
| utils/links-to-man.py - | $(to_man) > $@
$(MAN_DIR)/man3/%.3valkey.gz: commands/%.md $(VALKEY_ROOT)/src/commands/%.json $(BUILD_DIR)/.commands-per-group.json $(man_scripts)
utils/preprocess-markdown.py --man --page-type command \
--version $(VERSION) --date $(DATE) \
--commands-per-group-json $(BUILD_DIR)/.commands-per-group.json \
--valkey-root $(VALKEY_ROOT) $< \
| utils/links-to-man.py - | $(to_man) > $@
$(MAN_DIR)/man3/%.3valkey.gz: commands/%.md $(BUILD_DIR)/.commands-per-group.json $(man_scripts)
$(eval FINAL_ROOT := $(firstword $(foreach root,$(VALKEY_ROOTS),$(if $(wildcard $(root)/src/commands/$*.json),$(root)))))
$(if $(FINAL_ROOT), \
utils/preprocess-markdown.py --man --page-type command \
--version $(VERSION) --date $(DATE) \
--commands-per-group-json $(BUILD_DIR)/.commands-per-group.json \
--valkey-root $(FINAL_ROOT) $< \
| utils/links-to-man.py - | $(to_man) > $@)
$(MAN_DIR)/man5/%.5.gz: topics/%.md $(man_scripts)
utils/preprocess-markdown.py --man --page-type config \
--version $(VERSION) --date $(DATE) $< \
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@ for generating content for the website and man pages.

This repo comes with a Makefile to build and install man pages.

make VALKEY_ROOT=path/to/valkey
make VALKEY_ROOT=path/to/valkey VALKEY_BLOOM_ROOT=path/to/valkey-bloom
sudo make install INSTALL_MAN_DIR=/usr/local/share/man

Prerequisites: GNU Make, Python 3, Python 3 YAML (pyyaml), Pandoc.
Additionally, the scripts need access to the valkey code repo,
where metadata files about the commands are stored.
where metadata files about the commands are stored. Additionally
access to the valkey-bloom repo is optional.

The pages are generated under `_build/man/` by default. The default install
location is `/usr/local/share/man` (in the appropriate subdirectories).
Expand Down
14 changes: 14 additions & 0 deletions commands/bf.add.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Adds a single item to a bloom filter. If the specified bloom filter does not exist, a bloom filter is created with the provided name with default properties.

To add multiple items to a bloom filter, you can use the `BF.MADD` or `BF.INSERT` commands.

This comment was marked as resolved.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is more grammatically correct to have the s as it it showing there are multiple commands that can do this not just one


To create a bloom filter with non-default properties, use the `BF.INSERT` or `BF.RESERVE` command.

## Examples

```
127.0.0.1:6379> BF.ADD key val
(integer) 1
127.0.0.1:6379> BF.ADD key val
(integer) 0
```
12 changes: 12 additions & 0 deletions commands/bf.card.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Returns the cardinality of a bloom filter which is the number of items that have been successfully added to it.

## Examples

```
127.0.0.1:6379> BF.ADD key val
(integer) 1
127.0.0.1:6379> BF.CARD key
(integer) 1
127.0.0.1:6379> BF.CARD nonexistentkey
(integer) 0
```
18 changes: 18 additions & 0 deletions commands/bf.exists.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
Determines if an item has been added to the bloom filter previously.

A bloom filter has two possible responses when you check if an item exists:

* 0 - The item definitely does not exist since with bloom filters, false negatives are not possible.

* 1 - The item exists with a given false positive (`fp`) percentage. There is an `fp` rate % chance that the item does not exist. You can create bloom filters with a more strict false positive rate as needed.

## Examples

```
127.0.0.1:6379> BF.ADD key val
(integer) 1
127.0.0.1:6379> BF.EXISTS key val
(integer) 1
127.0.0.1:6379> BF.EXISTS key nonexistent
(integer) 0
```
41 changes: 41 additions & 0 deletions commands/bf.info.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
Returns usage information and properties of a specific bloom filter.

## Info Fields

* CAPACITY - The number of unique items that would need to be added before a scale out occurs or (non scaling) before it rejects addition of unique items.
* SIZE - The number of bytes allocated by this bloom filter.
* FILTERS - Returns the number of sub filters contained within the bloom filter.
* ITEMS - The number of unique items that have been added to the bloom filter.
* ERROR - The false positive rate of the bloom filter.
* EXPANSION - The expansion rate of the bloom filter. Non scaling filters will have an expansion rate of nil.
* TIGHTENING - The tightening ratio of the bloom filter.
* MAXSCALEDCAPACITY - The [maximum capacity](../topics/bloomfilters.md) that a scalable bloom filter can be expand to and reach before a subsequent scale out will fail.

For non-scaling filters, the `TIGHTENING` and `MAXSCALEDCAPACITY` fields are not applicable and will not be returned.
When no optional fields are specified, all available fields for the given filter type are returned.

## Examples

```
127.0.0.1:6379> BF.ADD key val
(integer) 1
127.0.0.1:6379> BF.INFO key
1) Capacity
2) (integer) 100
3) Size
4) (integer) 384
5) Number of filters
6) (integer) 1
7) Number of items inserted
Comment on lines +22 to +29
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't match the documentation of the field names above. I would expect the field names to be CAPACITY, SIZE, FILTERS etc. rather than "Capacity", "Size", "Number of filters", ...

Btw, why are these uppercase? In the INFO command, the field names are lowercase.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This name convention is following the bloom filter Command Syntax of existing client libraries

8) (integer) 2
9) Error rate
10) "0.01"
11) Expansion rate
12) (integer) 2
13) Tightening ratio
14) "0.5"
15) Max scaled capacity
16) (integer) 26214300
127.0.0.1:6379> BF.INFO key CAPACITY
(integer) 100
```
44 changes: 44 additions & 0 deletions commands/bf.insert.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
If the bloom filter does not exist under the specified name, a bloom filter is created with the specified parameters. Default properties will be used if the options below are not specified.

When the `ITEMS` option is provided, all items provided will be attempted to be added.

## Insert Fields

* CAPACITY *capacity* - The number of unique items that would need to be added before a scale out occurs or (non scaling) before it rejects addition of unique items.
* ERROR *fp_error* - The false positive rate of the bloom filter.
* EXPANSION *expansion* - This option will specify the bloom filter as scaling and controls the size of the sub filter that will be created upon scale out / expansion of the bloom filter.
* NOCREATE - Will not create the bloom filter and add items if the filter does not exist already.
* TIGHTENING *tightening_ratio* - The tightening ratio for the bloom filter.
* SEED *seed* - The 32 byte seed the bloom filter's hash functions will use.
* NONSCALING - This option will configure the bloom filter as non scaling; it cannot expand / scale beyond its specified capacity.
* VALIDATESCALETO *validatescaleto* - Validates if the filter can scale out and reach to this capacity based on limits and if not, return an error without creating the bloom filter.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is why you had to add validatescaleto to the spellcheck wordlist.

The idea is that keywords like this should be put within backticks. Then it doesn't need to be in the wordlist.

It's possible to combine italics + backticks if needed, for example:

`VALIDATESCALETO` *`validatescaleto`*

Rendered as

VALIDATESCALETO validatescaleto

* ITEMS *item* - One or more items to be added to the bloom filter.

Due to the nature of `NONSCALING` and `VALIDATESCALETO` arguments, specifying `NONSCALING` and `VALIDATESCALETO` together is not allowed.

## Examples

```
127.0.0.1:6379> BF.INSERT key ITEMS item1 item2
1) (integer) 1
2) (integer) 1
# This does not update the capacity since the filter already exists. It only adds the provided items.
127.0.0.1:6379> BF.INSERT key CAPACITY 1000 ITEMS item2 item3
1) (integer) 0
2) (integer) 1
127.0.0.1:6379> BF.INSERT key_new CAPACITY 1000
[]
```

```
127.0.0.1:6379> BF.INSERT key NONSCALING VALIDATESCALETO 100
(error) ERR cannot use NONSCALING and VALIDATESCALETO options together
127.0.0.1:6379> BF.INSERT key CAPACITY 1000 VALIDATESCALETO 999999999999999999 ITEMS item2 item3
(error) ERR provided VALIDATESCALETO causes bloom object to exceed memory limit
127.0.0.1:6379> BF.INSERT key VALIDATESCALETO 999999999999999999 EXPANSION 1 ITEMS item2 item3
(error) ERR provided VALIDATESCALETO causes false positive to degrade to 0
```
```
127.0.0.1:6379> BF.INSERT key NOCREATE ITEMS item1 item2
(error) ERR not found
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a very good error, is it too late to make it better?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This error message is from existing API and error messages from existing client libraries that support bloom filters
We followed the existing error messages to be API compatible with the bloom filter commands of existing client libraries

```
1 change: 1 addition & 0 deletions commands/bf.load.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Restores a bloom filter from a dump of an existing bloom filter with all of its specific the properties and bit vector dump of sub filter/s. This command is only generated during AOF rewrite to restore a bloom filter in the future.
16 changes: 16 additions & 0 deletions commands/bf.madd.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Adds one or more items to a bloom filter. If the specified bloom filter does not exist, a bloom filter is created with the provided name with default properties.

If you want to create a bloom filter with non-default properties, use the `BF.INSERT` or `BF.RESERVE` command.

## Examples

```
127.0.0.1:6379> BF.MADD key item1 item2
1) (integer) 1
2) (integer) 1
127.0.0.1:6379> BF.MADD key item2 item3
1) (integer) 0
2) (integer) 1
127.0.0.1:6379> BF.MADD key_new item1
1) (integer) 1
```
21 changes: 21 additions & 0 deletions commands/bf.mexists.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
Determines if the provided item/s have been added to a bloom filter previously.

A Bloom filter has two possible responses when you check if an item exists:

* 0 - The item definitely does not exist since with bloom filters, false negatives are not possible.

* 1 - The item exists with a given false positive (`fp`) percentage. There is an `fp` rate % chance that the item does not exist. You can create bloom filters with a more strict false positive rate as needed.
Comment on lines +3 to +7
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Skip this. Responses are documented in the response JSON files.

(I know, I don't like it. It's unnecessarily complex. I want to move the reply docs into the markdown files some day. But for now, let's just follow the existing structure.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this was wanted to make it explicit how false positive affects the exist command and determining if an item is present. I could try and reword so it explains false positive not based on response but I think the thinking is that showing the response makes it more understandable

Copy link
Contributor

@zuiderkwast zuiderkwast Mar 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The rendered page is showing the response from resp2_responses.json etc. but unfortunately it gets added in the bottom of the web page.

(On the generated man pages, the reply section gets inserted before Examples, which I think is a better place.)

You can keep this text here if you think it's better, and keep it brief in resp{2,3}_replies.json so there is not too much duplicated text.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that it does have some slight duplication but in my opinion I like having this explained as one of the main behaviours of bloom filters is the false positive rate. But am happy to change if others would rather not have the duplication.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't mind, but feel free to formulate it in a way so that it doesn't look too much like duplication.


## Examples

```
127.0.0.1:6379> BF.MADD key item1 item2
1) (integer) 1
2) (integer) 1
127.0.0.1:6379> BF.MEXISTS key item1 item2 item3
1) (integer) 1
2) (integer) 1
3) (integer) 0
127.0.0.1:6379> BF.MEXISTS key item1
1) (integer) 1
```
27 changes: 27 additions & 0 deletions commands/bf.reserve.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Creates an empty bloom filter with the specified capacity and false positive rate. By default, a scaling filter is created with the default expansion rate.

To specify the scaling / non scaling nature of the bloom filter, use the options: `NONSCALING` or `SCALING <expansion rate>`. It is invalid to provide both options together.

## Reserve fields

* error_rate - The false positive rate of the bloom filter
* capacity - The number of unique items that would need to be added before a scale out occurs or (non scaling) before it rejects addition of unique items.
* EXPANSION expansion - This option will specify the bloom filter as scaling and controls the size of the sub filter that will be created upon scale out / expansion of the bloom filter.
* NONSCALING - This option will configure the bloom filter as non scaling; it cannot expand / scale beyond its specified capacity.

## Examples

```
127.0.0.1:6379> BF.RESERVE key 0.01 1000
OK
127.0.0.1:6379> BF.RESERVE key 0.1 1000000
(error) ERR item exists
```
```
127.0.0.1:6379> BF.RESERVE bf_expansion 0.0001 5000 EXPANSION 3
OK
```
```
127.0.0.1:6379> BF.RESERVE bf_nonscaling 0.0001 5000 NONSCALING
OK
```
4 changes: 4 additions & 0 deletions groups.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
"display": "Bitmap",
"description": "Operations on the Bitmap data type"
},
"bloom": {
"display": "Bloom filter",
"description": "Operations on the Bloom filter data type"
},
"cluster": {
"display": "Cluster",
"description": "Valkey Cluster management"
Expand Down
7 changes: 7 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"valkey_bloom": {
"name": "valkey-bloom",
"repo": "https://github.com/valkey-io/valkey-bloom",
"description": "Module that allows users to use the bloom filter data type"
}
}
41 changes: 41 additions & 0 deletions resp2_replies.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,47 @@
"AUTH": [
"[Simple string reply](../topics/protocol.md#simple-strings): `OK`, or an error if the password, or username/password pair, is invalid."
],
"BF.ADD": [
"One of the following:",
"* [Integer reply](../topics/protocol.md#integers): `1` if the item was successfully added",
"* [Integer reply](../topics/protocol.md#integers): `0` if the item already existed in the bloom filter",
"",
"The command will be rejected if input is invalid, if a non bloom filter key with the same name already exists, if the bloom filter creation / scale out exceeds limits, or if an item is being added to a full non scaling filter."
],
"BF.CARD": [
"[Integer reply](../topics/protocol.md#integers): The number of items successfully added to the bloom filter, or 0 if the key does not exist"
],
"BF.EXISTS": [
"One of the following:",
"* [Integer reply](../topics/protocol.md#integers): `1` if the item exists in the bloom filter",
"* [Integer reply](../topics/protocol.md#integers): `0` if the bloom filter does not exist or the item has not been added to the bloom filter"
],
"BF.INFO": [
"When no optional arguments are provided:",
"* [Array reply](../topics/protocol.md#arrays): List of information about the bloom filter.",
"When an optional argument excluding ERROR is provided:",
"* [Integer reply](../topics/protocol.md#integers): argument value",
"When ERROR is provided as an optional argument:",
"* [String reply](../topics/protocol.md#simple-strings): argument value"
],
"BF.INSERT": [
"[Array reply](../topics/protocol.md#arrays): Array of ints (1’s and 0’s) - if filter already exists or if creation was successful. An empty array if no items are provided",
"",
"The command will be rejected if input is invalid, if a non bloom filter key with the same name already exists, if the bloom filter creation / scale out exceeds limits, or if an item is being added to a full non scaling filter."
],
"BF.MADD": [
"[Array reply](../topics/protocol.md#arrays): Array of ints (1’s and 0’s)",
"",
"The command will be rejected if input is invalid, if a non bloom filter key with the same name already exists, if the bloom filter creation / scale out exceeds limits, or if an item is being added to a full non scaling filter."
],
"BF.MEXISTS": [
"[Array reply](../topics/protocol.md#arrays): Array of ints (1’s and 0’s)"
],
"BF.RESERVE": [
"[Simple string reply](../topics/protocol.md#simple-strings): `OK`.",
"",
"The command will be rejected if input is invalid, if a key with the same name already exists, or if the bloom filter creation exceeds limits."
],
"BGREWRITEAOF": [
"[Simple string reply](../topics/protocol.md#simple-strings): a simple string reply indicating that the rewriting started or is about to start ASAP when the call is executed with success.",
"",
Expand Down
Loading