In [184]:
!pip install mmh3



You should consider upgrading via the 'c:\users\utkarsh priyadarshi\appdata\local\programs\python\python39\python.exe -m pip install --upgrade pip' command.


# Bloom Filter
This is part of the series **Mastering Data Structures for Databases**
You can find the article at https://medium.com/@utkarshpriyadarshi5026/mastering-data-structures-for-databases-part-3-bloom-filters-f92f3bff7dcc 

## Hash functions used in the Bloom Filter

Let's create a hash function type that will be used in the Bloom Filter.

Our hash function will take two arguments:
- a string to be hashed
- an integer that will be used as a seed for the hash function

In [185]:
from typing import Callable

HashFunction = Callable[[str, int], int]


### CryptoGraphic Hash Functions 

Different types of hash functions can be used in the Bloom Filter.


In [186]:
import hashlib

def sha256_hash(item: str, seed: int = 0) -> int:
    hash_value = int(hashlib.sha256((item + str(seed)).encode()).hexdigest(), 16)
    return hash_value

def md5_hash(item: str, seed: int = 0) -> int:
    hash_value = int(hashlib.md5((item + str(seed)).encode()).hexdigest(), 16)
    return hash_value

def sha1_hash(item: str, seed: int = 0) -> int:
    return int(hashlib.sha1((item + str(seed)).encode()).hexdigest(), 16)

### Non-Cryptographic Hash Functions

#### Murmur Hash
MurmurHash processes the input data in blocks, mixing the bits in each block to produce a final hash value. It uses a combination of multiplication and bitwise operations to achieve a good distribution of hash values.
#### DJB2 Hash
DJB2 starts with an initial hash value (often 5381) and iterates over each character in the input string. For each character, it multiplies the current hash value by 33 and adds the ASCII value of the character. This process is repeated for all characters in the input string.
#### FNV-1a Hash
FNV-1a starts with an initial hash value (FNV offset basis) and iterates over each character in the input string. For each character, it XORs the current hash value with the ASCII value of the character and then multiplies the result by the FNV prime. This process is repeated for all characters in the input string.


In [187]:
import mmh3

def murmur_hash(item: str, seed: int = 0) -> int:
    hash_value = mmh3.hash(item, seed)
    return hash_value

def djb2_hash(item: str, seed: int = 0) -> int:
    hash_value = seed
    for char in item:
        hash_value = ((hash_value << 5) + hash_value) + ord(char)
    return hash_value & 0xFFFFFFFF

def fnv1a_hash(item: str, seed: int = 0) -> int:
    hash_value = 0x811c9dc5 + seed # FNV offset basis
    fnv_prime = 0x01000193 # 32 bit FNV prime
    for char in item:
        hash_value ^= ord(char) # XOR
        hash_value *= fnv_prime # Multiplication
    return hash_value & 0xFFFFFFFF 

## Structure of the Bloom Filter
A Bloom filter is a space-efficient probabilistic data structure used to test whether an element is a member of a set. The structure of a Bloom filter consists of the following components:

1. Bit Array:  
    - A fixed-size bit array (or bit vector) initialized to all zeros. The size of the bit array determines the accuracy and space efficiency of the Bloom filter.
2. Hash Functions:  
    - Multiple independent hash functions that map elements to positions in the bit array. Each hash function should uniformly distribute the input elements across the bit array.

## Operations on the Bloom Filter

1. Add:
    - To add an element to the Bloom filter, each hash function is applied to the element to get multiple hash values. The corresponding positions in the bit array are then set to 1
2. Check:
    - To check if an element is in the Bloom filter, each hash function is applied to the element to get multiple hash values. If all the corresponding positions in the bit array are set to 1, the element is likely in the set. If any position is 0, the element is definitely not in the set.

In [188]:
from typing import List

class BloomFilter:
    def __init__(self, size: int, hash_functions: List[HashFunction]) -> None:
        self.size = size
        self.hash_functions = hash_functions
        self.bit_array = [0] * size
        self.item_count = 0
        print(f"Initialized Bloom Filter with size {size} and {len(hash_functions)} hash functions.")

    def _hashes(self, item: str) -> List[int]:
        hashes = []
        for i, hash_func in enumerate(self.hash_functions):
            hash_value = hash_func(item, i) % self.size
            hashes.append(hash_value)
           
        return hashes

    def add(self, item: str) -> None:
        hashes = self._hashes(item)
        for i, hash_value in enumerate(hashes):
            self.bit_array[hash_value] = 1
        
        self.item_count += 1

    def check(self, item: str) -> bool:
        hashes = self._hashes(item)
        result = all(self.bit_array[hash_value] == 1 for hash_value in hashes)
        return result
    
    def show(self):
        print(self.bit_array)

In [189]:
import random

def get_random_hash_functions(num_functions: int) -> List[HashFunction]:
    available_hash_functions: List[HashFunction] = [
        sha256_hash,
        md5_hash,
        sha1_hash,
        murmur_hash,
        djb2_hash,
        fnv1a_hash
    ]
    if num_functions > len(available_hash_functions):
        num_functions = num_functions % len(available_hash_functions)
        num_functions = num_functions if num_functions > 0 else random.randint(1, len(available_hash_functions))
    
    selected_functions = random.sample(available_hash_functions, num_functions)
    
    for i, func in enumerate(selected_functions):
        print(f"Selected Hash Function {i+1}: {func.__name__}")
        
    print("\n")
    return selected_functions

In [190]:
funcs = get_random_hash_functions(3)
bloom_filter = BloomFilter(10, funcs)

Selected Hash Function 1: murmur_hash
Selected Hash Function 2: md5_hash
Selected Hash Function 3: fnv1a_hash


Initialized Bloom Filter with size 10 and 3 hash functions.


In [191]:
bloom_filter.add("apple")
bloom_filter.check("apple")

True

# The Mathematics of Bloom Filters

Bloom filters are probabilistic data structures that efficiently represent sets and support membership queries. Understanding the mathematics behind Bloom filters is crucial for optimizing their performance and understanding their behavior. This document explores the key mathematical concepts and formulas underlying Bloom filters.

## 1. Basic Structure

A Bloom filter consists of:
- A bit array of m bits, initially all set to 0
- k different hash functions, each mapping an element to one of the m array positions

## 2. Insertion

When an element is inserted:
- It is hashed by each of the k hash functions
- The bits at the resulting k positions are set to 1

## 3. Querying

To query for an element:
- It is hashed by each of the k hash functions
- If all the bits at the resulting k positions are 1, the element is considered "possibly in the set"
- If any of the bits are 0, the element is "definitely not in the set"

## 4. False Positive Probability

The probability of a false positive (p) after inserting n elements is approximately:

p ≈ (1 - e^(-kn/m))^k

Where:
- m is the size of the bit array
- n is the number of inserted elements
- k is the number of hash functions

## 5. Optimal Number of Hash Functions

The optimal number of hash functions (k) that minimizes the false positive probability is:

k = (m/n) * ln(2)

This can be derived by differentiating the false positive probability formula with respect to k and setting it to zero.

## 6. Optimal Size of the Bit Array

Given a desired false positive probability p and the number of elements n, the optimal size of the bit array m is:

m = -(n * ln(p)) / (ln(2))^2

## 7. Capacity and Fill Ratio

The fill ratio of a Bloom filter is the proportion of bits set to 1. It can be approximated as:

fill_ratio ≈ 1 - e^(-kn/m)

As the fill ratio approaches 1, the false positive rate increases.

## 8. Estimating the Number of Elements

Given a Bloom filter with m bits, k hash functions, and x bits set to 1, we can estimate the number of elements n as:

n ≈ -(m/k) * ln(1 - x/m)


In [192]:
def optimal_filter_size(n: int, p: float) -> int:
    """
    Calculate the optimal size of a Bloom filter.
    m = -(n * log(p)) / (log(2)^2)
    
    Args:
    n (int): The expected number of elements to be inserted.
    p (float): The desired false positive probability.
    
    Returns:
    int: The optimal size of the filter in bits.
    """
    m = -(n * math.log(p)) / (math.log(2)**2)
    return math.ceil(m)


def optimal_number_of_hashes(m: int, n: int) -> int:
    """
    Calculate the optimal number of hash functions for a Bloom filter.
    
    k = (m / n) * log(2)
    
    Args:
    m (int): The size of the Bloom filter in bits.
    n (int): The expected number of elements to be inserted.
    
    Returns:
    int: The optimal number of hash functions.
    """
    k = (m / n) * math.log(2)
    return math.ceil(k)


def false_positive_probability(m: int, n: int, k: int) -> float:
    """
    Calculate the false positive probability of a Bloom filter.
    p = (1 - e^(-k * n / m))^k
    
    Args:
    m (int): The size of the Bloom filter in bits.
    n (int): The number of elements inserted in the filter.
    k (int): The number of hash functions used.
    
    Returns:
    float: The false positive probability.
    """
    return (1 - math.exp(-k * n / m))**k


def estimated_elements_count(m: int, k: int, x: int) -> int:
    """
    Estimate the number of elements in a Bloom filter based on the number of bits set.
    
    n = -m / k * log(1 - x / m)
    
    Args:
    m (int): The size of the Bloom filter in bits.
    k (int): The number of hash functions used.
    x (int): The number of bits set to 1 in the filter.
    
    Returns:
    int: The estimated number of elements in the filter.
    """
    return math.ceil(-m / k * math.log(1 - x / m))


## Counting Bloom Filter

A Counting Bloom Filter (CBF) is an extension of the standard Bloom Filter that allows for the removal of elements. It achieves this by using an array of counters instead of a simple bit array. Each counter keeps track of the number of times a particular position has been set, enabling both addition and deletion of elements.

#### Key Components:
1. **Count Array**:
   - An array of integers (counters) initialized to all zeros. Each counter represents the number of times a particular bit position has been set.

2. **Hash Functions**:
   - Multiple independent hash functions that map elements to positions in the count array. Each hash function should uniformly distribute the input elements across the count array.

#### Basic Operations:
1. **Add Operation**:
   - To add an element, each hash function is applied to the element to get multiple hash values. The corresponding positions in the count array are incremented by 1.

2. **Remove Operation**:
   - To remove an element, each hash function is applied to the element to get multiple hash values. The corresponding positions in the count array are decremented by 1, ensuring the count does not go below zero.

3. **Check Operation**:
   - To check if an element is in the filter, each hash function is applied to the element to get multiple hash values. If all the corresponding positions in the count array are greater than zero, the element is likely in the set. If any position is zero, the element is definitely not in the set.

In [193]:
class CountingBloomFilter(BloomFilter):
    """
    A Counting Bloom Filter (CBF) is an extension of the standard Bloom Filter that allows for the removal of elements.
    It uses an array of counters instead of a simple bit array to keep track of the number of times a particular position has been set.

    Attributes:
        size (int): The size of the count array.
        hash_functions (List[HashFunction]): A list of hash functions used to map elements to positions in the count array.
        count_array (List[int]): The count array used to store the presence of elements.
    """

    def __init__(self, size: int, hash_functions: List[HashFunction]) -> None:
        super().__init__(size, hash_functions)
        # Initialize a count array instead of a bit array
        self.count_array = [0] * size
        print(f"Initialized Counting Bloom Filter with size {size}.")

    def add(self, item: str) -> None:
        hashes = self._hashes(item)
        for i, hash_value in enumerate(hashes):
            self.count_array[hash_value] += 1
            

    def remove(self, item: str) -> None:
        hashes = self._hashes(item)
        for i, hash_value in enumerate(hashes):
            if self.count_array[hash_value] > 0:
                # Decrement the count if it's greater than 0
                self.count_array[hash_value] -= 1
              

    def check(self, item: str) -> bool:
        hashes = self._hashes(item)
        result = all(self.count_array[hash_value] > 0 for hash_value in hashes)
        print(f"Checking '{item}': {'Present' if result else 'Not Present'} in Counting Bloom Filter.")
        return result

    def show(self) -> None:
        print(self.count_array)

In [194]:
def check_count_filter():
    hash_funcs = get_random_hash_functions(3)
    count_bloom_filter = CountingBloomFilter(size=10,
                                             hash_functions=hash_funcs)
    
    print("Adding 'apple' to the filter.")
    count_bloom_filter.add("apple")
    
    if count_bloom_filter.check("apple"):
        print("Found apple in the filter.")
        print("Removing 'apple' from the filter.")
        count_bloom_filter.remove("apple")
        
    print("Checking 'apple' after removal.")
    if not count_bloom_filter.check("apple"):
        print("Apple not found in the filter.")
        
check_count_filter()

Selected Hash Function 1: djb2_hash
Selected Hash Function 2: md5_hash
Selected Hash Function 3: sha256_hash


Initialized Bloom Filter with size 10 and 3 hash functions.
Initialized Counting Bloom Filter with size 10.
Adding 'apple' to the filter.
Checking 'apple': Present in Counting Bloom Filter.
Found apple in the filter.
Removing 'apple' from the filter.
Checking 'apple' after removal.
Checking 'apple': Not Present in Counting Bloom Filter.
Apple not found in the filter.


## Scalable Bloom Filter

A Scalable Bloom Filter (SBF) is an extension of the standard Bloom Filter that dynamically grows to accommodate more elements while maintaining a specified error rate.

#### Key Components:
1. **Initial Size**:
   - The initial size of the Bloom Filter.
2. **Error Rate**:
   - The desired false positive rate.
3. **Hash Functions**:
   - A list of hash functions used to map elements to positions in the bit array.
4. **Growth Factor**:
   - The factor by which the Bloom Filter size grows when it needs to expand.
5. **Capacity Factor**:
   - The factor that determines when the Bloom Filter needs to expand based on its fill ratio.
6. **Current Filter Size**:
   - The current size of the Bloom Filter.
7. **Filters**:
   - A list of Bloom Filters used to store elements.

#### Basic Operations:
1. **Add Operation**:
   - Adds an item to the Scalable Bloom Filter. If the current filter is full, a new filter is created and the item is added to it.
2. **Check Operation**:
   - Checks if an item is present in the Scalable Bloom Filter. It checks if the item is present in any of the filters.
3. **Expansion Check**:
   - Determines if the Bloom Filter needs to be expanded based on its fill ratio and capacity factor.

In [195]:
class ScalableBloomFilter:
    """
    A Scalable Bloom Filter (SBF) is an extension of the standard Bloom Filter that dynamically grows to accommodate more elements while maintaining a specified error rate.

    Attributes:
        initial_size (int): The initial size of the Bloom Filter.
        error_rate (float): The desired false positive rate.
        hash_functions (list[HashFunction]): A list of hash functions used to map elements to positions in the bit array.
        growth_factor (float): The factor by which the Bloom Filter size grows when it needs to expand.
        capacity_factor (float): The factor that determines when the Bloom Filter needs to expand based on its fill ratio.
        current_filter_size (int): The current size of the Bloom Filter.
        filters (List[BloomFilter]): A list of Bloom Filters used to store elements.
    """
    def __init__(self, initial_size: int, error_rate: float, hash_functions: list[HashFunction], growth_factor: float = 2.0, capacity_factor: float = 0.75) -> None:
        self.filters: List[BloomFilter] = []
        self.initial_size = initial_size
        self.error_rate = error_rate
        self.hash_functions = hash_functions
        self.growth_factor = growth_factor
        self.capacity_factor = capacity_factor
        self.current_filter_size = initial_size
        self._create_new_filter(initial_size, error_rate)
        
        
    def _create_new_filter(self, size: int, error_rate: float) -> None:
        new_filter = BloomFilter(size, self.hash_functions)
        self.filters.append(new_filter)
        
        print(f"Created new Bloom Filter with size {size}.\n")
        
    
    def add(self, item: str) -> None:
        last_filter = self.filters[-1]
        last_filter.add(item)
        
        if self._is_expansion_needed(last_filter):
            self.current_filter_size = int(self.current_filter_size * self.growth_factor)
            
            filter_size = self.current_filter_size
            new_error_rate = self.error_rate / len(self.filters)
            self._create_new_filter(filter_size, new_error_rate)
        
    def check(self, item: str) -> bool:
        return any(bloom.check(item) for bloom in self.filters)
    
    def _is_expansion_needed(self, bl_filter: BloomFilter) -> bool:
        num_hashes = len(bl_filter.hash_functions)
        optimal_items = int((bl_filter.size * math.log(2)) / num_hashes)
        return bl_filter.item_count >= (optimal_items * self.capacity_factor)
    
    def __str__(self) -> str:
        return f"ScalableBloomFilter with {len(self.filters)} filters, current size: {self.current_filter_size}"
    


In [196]:
def check_scale_filter():
    hash_funcs = get_random_hash_functions(3)
    print("Checking for items:")
    sbf = ScalableBloomFilter(initial_size=10,
                              error_rate=0.01,
                              hash_functions=hash_funcs)
    for i in range(1000):
        sbf.add(f"item_{i}")
        if i % 100 == 0:
            print(sbf)
    
    # Check for items
    print("\nChecking for items:")
    for i in range(1100):  # Check for 100 items that weren't added
        if not sbf.check(f"item_{i}"):
            print(f"item_{i} not found")
            
check_scale_filter()


Selected Hash Function 1: md5_hash
Selected Hash Function 2: djb2_hash
Selected Hash Function 3: fnv1a_hash


Checking for items:
Initialized Bloom Filter with size 10 and 3 hash functions.
Created new Bloom Filter with size 10.

ScalableBloomFilter with 1 filters, current size: 10
Initialized Bloom Filter with size 20 and 3 hash functions.
Created new Bloom Filter with size 20.

Initialized Bloom Filter with size 40 and 3 hash functions.
Created new Bloom Filter with size 40.

Initialized Bloom Filter with size 80 and 3 hash functions.
Created new Bloom Filter with size 80.

Initialized Bloom Filter with size 160 and 3 hash functions.
Created new Bloom Filter with size 160.

Initialized Bloom Filter with size 320 and 3 hash functions.
Created new Bloom Filter with size 320.

ScalableBloomFilter with 6 filters, current size: 320
Initialized Bloom Filter with size 640 and 3 hash functions.
Created new Bloom Filter with size 640.

ScalableBloomFilter with 7 filters, current size: 640
Ini

### Compressed Bloom Filter

A Compressed Bloom Filter (CBF) is a variant of the standard Bloom Filter designed to reduce the space required to store the filter. This is particularly useful in scenarios where memory is limited or when the Bloom Filter needs to be transmitted over a network.

#### Key Concepts:
1. **Compression**:
   - The bit array of the Bloom Filter is compressed using standard compression techniques (e.g., run-length encoding, Huffman coding, etc.) to reduce its size.

2. **Decompression**:
   - When an element needs to be added or checked, the compressed bit array is decompressed, the operation is performed, and then the bit array is recompressed.

3. **Space Efficiency**:
   - By compressing the bit array, the Compressed Bloom Filter achieves better space efficiency compared to a standard Bloom Filter, at the cost of additional computational overhead for compression and decompression.

#### Basic Operations:
1. **Add Operation**:
   - Decompress the bit array.
   - Apply the hash functions to the element to get multiple hash values.
   - Set the corresponding positions in the bit array to 1.
   - Recompress the bit array.

2. **Check Operation**:
   - Decompress the bit array.
   - Apply the hash functions to the element to get multiple hash values.
   - Check if all the corresponding positions in the bit array are set to 1.
   - Recompress the bit array.

3. **Compression and Decompression**:
   - Use efficient compression algorithms to minimize the size of the bit array while maintaining the ability to quickly decompress and perform operations.

#### Advantages:
- **Reduced Memory Usage**: The primary advantage of a Compressed Bloom Filter is its reduced memory footprint, making it suitable for memory-constrained environments.
- **Network Efficiency**: Compressed Bloom Filters are more efficient to transmit over a network due to their smaller size.

#### Disadvantages:
- **Computational Overhead**: The need to compress and decompress the bit array for each operation introduces additional computational overhead.
- **Latency**: The time taken for compression and decompression can increase the latency of add and check operations.

In [197]:
class CompressedBloomFilter:
    def __init__(self, size: int, hash_functions: List[HashFunction]):
        self.size = size
        self.bit_array = [False] * size
        self.hash_functions = hash_functions
        print(f"Initialized Compressed Bloom Filter with size {size} and {len(hash_functions)} hash functions.")
        
    def add(self, item: str) -> None:
        for i, hash_function in enumerate(self.hash_functions):
            index = hash_function(item, i) % self.size
            self.bit_array[index] = True
            
            
    def check(self, item: str) -> bool:
        for i, hash_function in enumerate(self.hash_functions):
            index = hash_function(item, i) % self.size
            if not self.bit_array[index]:
                return False
        return True
     
    def compress(self) -> bytes:
        
        compressed = []
        count = 1
        current = self.bit_array[0]
        
        for bit in self.bit_array[1:]:
            if bit == current:
                count += 1
            else:
                compressed.append((count, current))
                count = 1
                current = bit

        compressed.append((count, current))
        result = bytearray()
        for count, value in compressed:
            result.extend(count.to_bytes(2, byteorder='big'))
            result.append(1 if value else 0)

        return bytes(result)
    
    @classmethod
    def decompress(cls, compressed: bytes, size: int, hash_functions: List[HashFunction]) -> 'CompressedBloomFilter':
        """Decompress a compressed bit array and create a CompressedBloomFilter instance."""
        cmp_filter = cls(size, hash_functions)
        bit_array = []

        i = 0
        while i < len(compressed):
            count = int.from_bytes(compressed[i:i+2], byteorder='big')
            value = bool(compressed[i+2])
            bit_array.extend([value] * count)
            i += 3

        cmp_filter.bit_array = bit_array[:size]  # Ensure we don't exceed the specified size
        return cmp_filter

In [198]:
def check_cmp_filter(expected_elements: int, fp_rate: float):
    
    size = optimal_filter_size(expected_elements, fp_rate)
    hash_cnt = optimal_number_of_hashes(size, expected_elements)
    hash_funcs = get_random_hash_functions(hash_cnt)
    
    print(f"Optimal size: {size}")
    print(f"Optimal number of hash functions: {hash_cnt}")
    cmp_filter = CompressedBloomFilter(size=size,
                                       hash_functions=hash_funcs)
    
    for i in range(expected_elements):
        cmp_filter.add(f"element_{i}")
        
    compressed = cmp_filter.compress()
    print(f"Compressed size: {len(compressed)} bytes")
    print(f"Compression ratio: {len(compressed) / (size // 8):.2f}")
    
    # Decompress the filter
    decompressed_bf = CompressedBloomFilter.decompress(compressed, size, funcs)

    # Check for elements
    true_positives = sum(1 for i in range(expected_elements) if decompressed_bf.check(f"element_{i}"))
    false_positives = sum(1 for i in range(expected_elements, expected_elements*2) if decompressed_bf.check(f"element_{i}"))
    
    print(f"True positives: {true_positives}")
    print(f"False positives: {false_positives}")
    print(f"Actual false positive rate: {false_positives / expected_elements:.4f}")
    
    
    
check_cmp_filter(1000, 0.01)

Selected Hash Function 1: md5_hash


Optimal size: 9586
Optimal number of hash functions: 7
Initialized Compressed Bloom Filter with size 9586 and 1 hash functions.
Compressed size: 5217 bytes
Compression ratio: 4.35
Initialized Compressed Bloom Filter with size 9586 and 3 hash functions.
True positives: 5
False positives: 3
Actual false positive rate: 0.0030


In [199]:
import math


class SpatialBloomFilter:
    
    def __init__(self, size: int, hash_funcs: list[HashFunction]):
        self.size = size
        self.hash_funcs = hash_funcs
        self.bit_array = [0] * size
        
    
    def add(self, area_id: int, points: set[tuple[float, float]]) -> None:
        for point in points:
            for hash_func in self.hash_funcs:
                index = hash_func(str(point)) % self.size
                self.bit_array[index] = max(self.bit_array[index], area_id)
                
    def check(self, point: tuple[float, float]):
        max_area_id = 0
        for hash_func in self.hash_funcs:
            index = hash_func(str(point)) % self.size
            max_area_id = max(max_area_id, self.bit_array[index])
        return max_area_id
        
def discretize_area(center: tuple[float, float], radius: float, num_points: int) -> set[tuple[float, float]]:
    """
    Discretize a circular area into a set of points.
    this function generates random points within a circle centered at the given center coordinates with the specified radius.
    
    Args:
    center (Tuple[float, float]): The (x, y) coordinates of the circle's center.
    radius (float): The radius of the circle.
    num_points (int): The number of points to generate.
    
    Returns:
    Set[Tuple[float, float]]: A set of (x, y) coordinates representing the area.
    """
    points = set()
    for _ in range(num_points):
        angle = 2 * math.pi * random.random()
        r = radius * math.sqrt(random.random())
        x = center[0] + r * math.cos(angle)
        y = center[1] + r * math.sin(angle)
        points.add((round(x, 2), round(y, 2)))
    return points

In [200]:
def check_spatial_filter(fp_rate: float):
    area1 = discretize_area((0, 0), 5, 1000)
    area2 = discretize_area((10, 10), 3, 500)
    area3 = discretize_area((-5, 5), 2, 300)
    
    total_points = len(area1) + len(area2) + len(area3)
    size = optimal_filter_size(total_points, fp_rate)
    hash_cnt = optimal_number_of_hashes(size, total_points)
    
    print(f"Optimal size: {size} and optimal number of hash functions: {hash_cnt}")
    
    hash_funcs = get_random_hash_functions(hash_cnt)
    spatial_filter = SpatialBloomFilter(size, hash_funcs)
    
    spatial_filter.add(1, area1)
    spatial_filter.add(2, area2)
    spatial_filter.add(3, area3)
    
    # Query points
    print(f"Point (1, 1) belongs to area: {spatial_filter.check((1, 1))}")
    print(f"Point (11, 11) belongs to area: {spatial_filter.check((11, 11))}")
    print(f"Point (-4, 6) belongs to area: {spatial_filter.check((-4, 6))}")
    print(f"Point (20, 20) belongs to area: {spatial_filter.check((20, 20))}")
    

check_spatial_filter(0.01)

Optimal size: 17215 and optimal number of hash functions: 7
Selected Hash Function 1: fnv1a_hash


Point (1, 1) belongs to area: 0
Point (11, 11) belongs to area: 0
Point (-4, 6) belongs to area: 1
Point (20, 20) belongs to area: 0
