# Weaviate v1.26 code samples - Rangeable index

This notebook contains sample code to time queries done using the rangeable index.

The [rangeable index](https://weaviate.io/developers/weaviate/concepts/prefiltering#indexrangefilters) makes it it easier to work with numeric data.

Use the rangeable index for properties that are of type `int`, `number`, or `date`.

Additional information:
- [Weaviate documentation](https://weaviate.io/developers/weaviate)
- [Weaviate v1.26 release notes](https://weaviate.io/developers/weaviate/release-notes/release_1_26)

In [4]:
# CREATE A CLIENT (LOCALHOST)

import weaviate

client = weaviate.connect_to_local()

# Uncomment to check connection
# client.is_ready()

In [9]:
# 1. GENERATE SOME SAMPLE DATA

import random

# Set the characteristics of the sample data
number_of_objects = 750000
locations = ["New York", "Seabright", "Red Bank", "Asbury Park", "Neptune"]
base_temperature = 32

# Create a single sample object
def generate_object():
    location = locations[random.randint(0,4)]
    temperature = base_temperature + round(random.uniform(0,55), 2)

    current_object = (location, temperature)
    return current_object

# Create a list of sample objects
objects = []
for n in range(number_of_objects):
    obj = generate_object()
    obj_dict = {"location": obj[0], "temperature": obj[1], "unindexed_temp": obj[1]}
    objects.append(obj_dict)


In [10]:
# DEFINE A COLLECTION THAT INCLUDES A RANGEABLE INDEX

import weaviate

client = weaviate.connect_to_local()

# Delete old copies to start with a clean collection
if (client.collections.exists("WaterTemperatures")):
    client.collections.delete("WaterTemperatures")

# Create a collection
from weaviate.classes.config import Property, DataType

client.collections.create(
    "WaterTemperatures",
    properties=[
        Property(name="location",
                 data_type=DataType.TEXT
                ),
        # The rangeable index is enabled here
        Property(name="temperature",
                 data_type=DataType.NUMBER,
                 skip_vectorization=True,
                 index_range_filters=True),
        # The rangeable index is not enabled here
        Property(name="unindexed_temp",
                 data_type=DataType.NUMBER,
                 skip_vectorization=True,
                )
    ]
)

# Create a collection object to work with
water_temperatures = client.collections.get("WaterTemperatures")

# Uncomment to check that the property was created with a rangeable index
# import pprint as pp
# pp.pprint(water_temperatures.config.get().properties)


In [None]:
# 3. UPLOAD THE SAMPLE DATA

# Assumes the data is in a list called 'objects'. Run GENERATE A SAMPLE DATA SET
# to create the 'objects' list

import weaviate

# Create a client connection
client = weaviate.connect_to_local()

# Create a collection object to work with
water_temperatures = client.collections.get("WaterTemperatures")

# Import the data
with water_temperatures.batch.dynamic() as batch:
    for obj in objects:
        batch.add_object(properties=obj,)

# Uncomment to print the number of imported objects
# count = water_temperatures.aggregate.over_all(total_count=True).total_count
# print(f"Imported: {count}")

# Uncomment to print the first object
# import pprint as pp
# response = water_temperatures.query.fetch_objects( limit=1 )
# for o in response.objects:
#     pp.pprint(o.properties)


In [None]:
# TIME SAMPLE QUERIES

# This section runs the same aggregated query against range indexed data and
#   data that doesn't have a range index. The experiment repeats NUM_REPS times
#  to improve reliability.
#
# As the data set grows, the timing differences are more pronounced.

import time
import weaviate
from weaviate.classes.aggregate import GroupByAggregate
from weaviate.classes.query import Filter
from weaviate.classes.query import Metrics

# Create a collection object to work with
water_temperatures = client.collections.get("WaterTemperatures")

# Set the number of times to repeat the timing experiments.
NUM_REPS = 200

#######################
# Run the experiments #
#######################

###
### Experiment one: Aggregate over the property
###
total_indexed_time = 0
total_unindexed_time = 0
average_indexed_time = 0
average_unindexed_time = 0
number_of_repetitions = NUM_REPS
for rep in range(number_of_repetitions):
    # Generate descriptive statistics - indexed
    start_indexed = time.process_time_ns()
    indexed_response = water_temperatures.aggregate.over_all(
        group_by=GroupByAggregate(prop="location"),
        filters=(
            Filter.by_property("temperature").greater_than(80.0) &
            Filter.by_property("temperature").less_than(85.0)
        ),
        return_metrics=Metrics("temperature").number(median=True, maximum=True, minimum=True),
    )
    end_indexed = time.process_time_ns()
    total_indexed_time += (end_indexed - start_indexed)

    # Generate descriptive statistics - unindexed
    start_unindexed = time.process_time_ns()
    unindexed_response = water_temperatures.aggregate.over_all(
        group_by=GroupByAggregate(prop="location"),
        filters=(
            Filter.by_property("unindexed_temp").greater_than(80.0) &
            Filter.by_property("unindexed_temp").less_than(85.0)
        ),
        return_metrics=Metrics("unindexed_temp").number(median=True, maximum=True, minimum=True),
    )
    end_unindexed = time.process_time_ns()
    total_unindexed_time += (end_unindexed - start_unindexed)

average_indexed_time = round(total_indexed_time / number_of_repetitions, 2)
average_unindexed_time = round(total_unindexed_time / number_of_repetitions, 2)

# Uncomment to print the last set of statistics
# for indexed_group in indexed_response.groups:
#     print(f"{indexed_group.grouped_by.value}: ", end='')
#     print(f" count: {indexed_group.properties['temperature'].count}", end='')
#     print(f" min: {round(indexed_group.properties['temperature'].minimum, 2)}", end='')
#     print(f" max: {round(indexed_group.properties['temperature'].maximum, 2)}", end='')
#     print(f" median: {round(indexed_group.properties['temperature'].median, 2)}")

# for unindexed_group in unindexed_response.groups:
#     print(f"{unindexed_group.grouped_by.value}: ", end='')
#     print(f" count: {unindexed_group.properties['unindexed_temp'].count}", end='')
#     print(f" min: {round(unindexed_group.properties['unindexed_temp'].minimum, 2)}", end='')
#     print(f" max: {round(unindexed_group.properties['unindexed_temp'].maximum, 2)}", end='')
#     print(f" median: {round(unindexed_group.properties['unindexed_temp'].median, 2)}")


# Print the timing statistics
# Aggregate results
print("Aggregate results")
print(f"Indexed time:   {average_indexed_time}")
print(f"Unindexed time: {average_unindexed_time}")


###
### Experiment two: Query a range
###
total_indexed_time = 0
total_unindexed_time = 0
average_indexed_time = 0
average_unindexed_time = 0
number_of_repetitions = NUM_REPS

for rep in range(number_of_repetitions):
    # Fetch objects - indexed
    start_indexed = time.process_time_ns()
    indexed_response = water_temperatures.query.fetch_objects(
        filters=(
            Filter.by_property("temperature").greater_than(80.0) &
            Filter.by_property("temperature").less_than(85.0)
        ),
    )
    end_indexed = time.process_time_ns()
    total_indexed_time += (end_indexed - start_indexed)

    # Fetch objects - unindexed
    start_unindexed = time.process_time_ns()
    unindexed_response = water_temperatures.query.fetch_objects(
        filters=(
            Filter.by_property("unindexed_temp").greater_than(80.0) &
            Filter.by_property("unindexed_temp").less_than(85.0)
        ),
    )
    end_unindexed = time.process_time_ns()
    total_unindexed_time += (end_unindexed - start_unindexed)

average_indexed_time = round(total_indexed_time / number_of_repetitions, 2)
average_unindexed_time = round(total_unindexed_time / number_of_repetitions, 2)

# Print the timing statistics
# Fetch results
print("Fetch object")
print(f"Indexed time:   {average_indexed_time}")
print(f"Unindexed time: {average_unindexed_time}")
