# BQ Compression – Load Data and compress vectors

## Get keys and urls

In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

WEAVIATE_URL = os.getenv("WEAVIATE_URL")
WEAVIATE_KEY = os.getenv("WEAVIATE_KEY")

print(WEAVIATE_URL[:10])
print(WEAVIATE_KEY[:10])

cwzujlbgtk
L1FUbzRPaU


## Connect to Weaviate

In [2]:
import weaviate
from weaviate.classes.init import Auth

client = weaviate.connect_to_weaviate_cloud(
    cluster_url=WEAVIATE_URL,
    auth_credentials=Auth.api_key(WEAVIATE_KEY),
)

client.is_ready()

True

## Create Collection with BQ configuration

[Docs: Binary Quantization (BQ)](https://weaviate.io/developers/weaviate/configuration/compression/bq-compression)

Note #1: Binary Quantization works from the first object added to the collection. No training required.

Note #2: Binary Quantization works both with HNSW and Flat index.

In [3]:
from weaviate.classes.config import Configure, VectorDistances

client.collections.delete("WikiQ")

# Create a collection here - with Cohere as a vectorizer
client.collections.create(
    name="WikiQ",

    vector_config=[
        Configure.Vectors.text2vec_weaviate(
            name="main_vector",

            model="Snowflake/snowflake-arctic-embed-l-v2.0",
            source_properties=['title', 'text'],

            # Configure BQ with flat vector index
            vector_index_config=Configure.VectorIndex.flat(
                distance_metric=VectorDistances.COSINE,
                vector_cache_max_objects=100_000,
                quantizer=Configure.VectorIndex.Quantizer.bq(
                    rescore_limit=200,
                    cache=True
                )
            ),

            # HSNW example
            # vector_index_config=Configure.VectorIndex.hsnw(
            #     quantizer=Configure.VectorIndex.Quantizer.bq(
            #         rescore_limit=200,
            #         cache=True
            #     )
            # ),
        )
    ],
)

<weaviate.collections.collection.sync.Collection at 0x118f923d0>

## The rest is the same

In [4]:
from data_loader import import_wiki_data
import_wiki_data(client, "WikiQ", 25000)

Importing 25000 data items
Loaded Dataset: 'parquet' - Config: 'default'


100%|█████████▉| 24999/25000 [00:21<00:00, 1182.17it/s]


Imported 25000 items
-----------------------------------


In [5]:
wikiQ = client.collections.get("WikiQ")
wikiQ.aggregate.over_all()

AggregateReturn(properties={}, total_count=22746)

## Clean up

In [None]:
# client.collections.delete("WikiQ")

## Close the client

In [6]:
client.close()