In [1]:
import dotenv

dotenv.load_dotenv()

True

In [2]:
import weaviate
import os

client = weaviate.connect_to_local(
    headers={
        "X-Cohere-API-Key": os.getenv("COHERE_API_KEY"),
    }
)

In [3]:
client.get_meta()["version"]

'1.31.0-rc.0'

In [4]:
client.cluster.nodes()

[Node(git_hash='a6ede0c', name='node1', shards=None, stats=None, status='HEALTHY', version='1.31.0-rc.0'),
 Node(git_hash='a6ede0c', name='node2', shards=None, stats=None, status='HEALTHY', version='1.31.0-rc.0'),
 Node(git_hash='a6ede0c', name='node3', shards=None, stats=None, status='HEALTHY', version='1.31.0-rc.0')]

## Move shards

In [None]:
from weaviate.classes.config import Configure, Property, DataType

collection_name = "TempCollection"

client.collections.delete(collection_name)

client.collections.create(
    collection_name,
    properties=[
        Property(name="title", data_type=DataType.TEXT),
        Property(name="body", data_type=DataType.TEXT),
    ],
    vectorizer_config=[
        Configure.NamedVectors.text2vec_cohere(
            name="default",
            source_properties=["title", "body"],
        ),
    ],
    replication_config=Configure.replication(factor=2),  # Note - this just a demo - do NOT use a RF of 2 in production - use 3 or above odd number
    sharding_config=Configure.sharding(
        desired_count=5  # To demonstrate sharding, we set it to an arbitrary high number (for our dataset size, anyway)
    )
)

/workspaces/weaviate_enablement_1_31/.venv/lib/python3.11/site-packages/weaviate/collections/classes/config.py:1950: PydanticDeprecatedSince211: Accessing the 'model_fields' attribute on the instance is deprecated. Instead, you should access this attribute from the model class. Deprecated in Pydantic V2.11 to be removed in V3.0.
  for cls_field in self.model_fields:


BatchObjectReturn(_all_responses=[UUID('e5290cad-3537-4f5d-9645-1f61f72d7fc1'), UUID('e0738641-0135-44b9-a385-aecba1b616f7'), UUID('9c8a19c5-a1e7-4c4b-bce7-0ab2ba0c9c3c'), UUID('19ed3ff6-cf83-481d-99b3-b741f80d107b'), UUID('955b18b7-87be-4a84-b528-1881d16bd61a'), UUID('34741b84-be2f-4078-a1a7-0c31cdd05836'), UUID('27249845-f99e-4fcf-be36-ae9d68b08a3d'), UUID('43406043-99fd-45c8-943b-34c85f5c087c'), UUID('ed6874ea-dfb1-4d42-8195-b7ef7e496d29'), UUID('4e23e469-abbd-40f6-b193-90ade446a9d8'), UUID('acda7b25-3a68-4ef5-8ca7-5291832d0360'), UUID('67f1d224-2d10-43f1-b672-5fb62b878d76')], elapsed_seconds=0.4034616947174072, errors={}, uuids={0: UUID('e5290cad-3537-4f5d-9645-1f61f72d7fc1'), 1: UUID('e0738641-0135-44b9-a385-aecba1b616f7'), 2: UUID('9c8a19c5-a1e7-4c4b-bce7-0ab2ba0c9c3c'), 3: UUID('19ed3ff6-cf83-481d-99b3-b741f80d107b'), 4: UUID('955b18b7-87be-4a84-b528-1881d16bd61a'), 5: UUID('34741b84-be2f-4078-a1a7-0c31cdd05836'), 6: UUID('27249845-f99e-4fcf-be36-ae9d68b08a3d'), 7: UUID('4340604

In [None]:
c = client.collections.get(collection_name)

objects = [
    {"title": "Howl's Moving Castle", "body": "A fantasy novel by Diana Wynne Jones."},
    {"title": "The Hobbit", "body": "A fantasy novel by J.R.R. Tolkien."},
    {"title": "The Hitchhiker's Guide to the Galaxy", "body": "A science fiction novel by Douglas Adams."},
    {"title": "The Great Gatsby", "body": "A novel by F. Scott Fitzgerald."},
    {"title": "1984", "body": "A dystopian novel by George Orwell."},
    {"title": "To Kill a Mockingbird", "body": "A novel by Harper Lee."},
    {"title": "Pride and Prejudice", "body": "A novel by Jane Austen."},
    {"title": "The Catcher in the Rye", "body": "A novel by J.D. Salinger."},
    {"title": "The Lord of the Rings", "body": "A fantasy novel by J.R.R. Tolkien."},
    {"title": "Brave New World", "body": "A dystopian novel by Aldous Huxley."},
    {"title": "Fahrenheit 451", "body": "A dystopian novel by Ray Bradbury."},
    {"title": "The Picture of Dorian Gray", "body": "A novel by Oscar Wilde."},
]

c.data.insert_many(objects)

In [6]:
len(c)

12

![Shards - explained: 1](./assets/shards-1.png)

![Shards - explained: 2](./assets/shards-2.png)

![Shards - explained: 3](./assets/shards-3.png)

![Shards - explained: 4](./assets/shards-4.png)

In [9]:
for n in client.cluster.nodes(collection=collection_name, output="verbose"):
    print(f"\nNode {n.name} has {len(n.shards)} shards")
    for s in n.shards:
        print(f"Shard {s.name} has {s.object_count} objects from {collection_name}")


Node node1 has 3 shards
Shard 4QoGPeDZF0ij has 3 objects from TempCollection
Shard O36bikitjOSA has 3 objects from TempCollection
Shard DBg0mXKtcZZh has 2 objects from TempCollection

Node node2 has 4 shards
Shard wtEqaeXumWZ5 has 3 objects from TempCollection
Shard W5M7LdGEyzAq has 1 objects from TempCollection
Shard O36bikitjOSA has 3 objects from TempCollection
Shard DBg0mXKtcZZh has 2 objects from TempCollection

Node node3 has 3 shards
Shard 4QoGPeDZF0ij has 3 objects from TempCollection
Shard W5M7LdGEyzAq has 1 objects from TempCollection
Shard wtEqaeXumWZ5 has 3 objects from TempCollection


In [None]:
import requests
import json

# Shard move parameters
SOURCE_NODE = "node1"
DESTINATION_NODE = "node2"
SHARD_ID = "ix6FD7RCVa5o"

# Create the request payload
payload = {
    "sourceNodeName": "node1",
    "destinationNodeName": DESTINATION_NODE,
    "collectionId": collection_name,
    "shardId": SHARD_ID,
    "transferType": "MOVE"  # Use "MOVE" to relocate the shard, or "COPY" to replicate it
}

# Set up the headers
headers = {
    "Content-Type": "application/json"
}
# Make the API request
response = requests.post(
    f"http://localhost:8080/v1/replication/replicate",
    headers=headers,
    data=json.dumps(payload)
)

# Check the response
if response.status_code == 200:
    operation_id = response.json().get("id")
    print(f"Shard move operation started successfully. Operation ID: {operation_id}")

    # Optional: You could use the operation ID to poll for status
    # status_url = f"{WEAVIATE_URL}/v1/replication/replicate/{operation_id}"
else:
    print(f"Error: {response.status_code}")
    print(response.text)

In [None]:
import requests

# Set up the headers
headers = {
    "Content-Type": "application/json"
}

# Make the API request
response = requests.get(
    f"http://localhost:8080/v1/replication/replicate/{operation_id}",
    headers=headers,
)

# Check the response
if response.status_code == 200:
    print("Shard move operation status:")
    print(response.json())
else:
    print(f"Error: {response.status_code}")
    print(response.text)

In [None]:
for n in client.cluster.nodes(collection=collection_name, output="verbose"):
    print(f"\nNode {n.name} has {len(n.shards)} shards")
    for s in n.shards:
        print(f"Shard {s.name} has {s.object_count} objects from {collection_name}")

## MUVERA

Note - check that this is the branch being used:

`uv pip install git+https://github.com/weaviate/weaviate-python-client.git@dev/1.31`

In [6]:
from weaviate.classes.config import Configure, Property, DataType

collection_name = "TempCollection"

client.collections.delete(collection_name)

client.collections.create(
    collection_name,
    properties=[
        Property(name="title", data_type=DataType.TEXT),
        Property(name="body", data_type=DataType.TEXT),
    ],
    vectorizer_config=[
        Configure.NamedVectors.none(
            name="custom_mv",
            vector_index_config=Configure.VectorIndex.hnsw(
                multi_vector=Configure.VectorIndex.MultiVector.multi_vector(
                    encoding=Configure.VectorIndex.MultiVector.Encoding.muvera()
                )
            )
        ),
    ],
    replication_config=Configure.replication(factor=3)
)

<weaviate.collections.collection.sync.Collection at 0xffff79854750>

In [8]:
objects = [
    {"title": "Howl's Moving Castle", "body": "A fantasy novel by Diana Wynne Jones."},
    {"title": "The Hobbit", "body": "A fantasy novel by J.R.R. Tolkien."},
    {"title": "The Hitchhiker's Guide to the Galaxy", "body": "A science fiction novel by Douglas Adams."},
]

vectors = [
    [[0.1, 0.2, 0.3, 0.4, 0.5]] * 3,
    [[0.6, 0.7, 0.8, 0.9, 1.0]] * 4,
    [[1.1, 1.2, 1.3, 1.4, 1.5]] * 5,
]

vectors[0]

[[0.1, 0.2, 0.3, 0.4, 0.5],
 [0.1, 0.2, 0.3, 0.4, 0.5],
 [0.1, 0.2, 0.3, 0.4, 0.5]]

In [10]:
c = client.collections.get(collection_name)

with c.batch.fixed_size(200) as batch:
    for i, obj in enumerate(objects):
        vector = vectors[i]
        batch.add_object(
            properties=obj,
            vector={"custom_mv": vector}
        )

In [11]:
len(c)

3

## Vectorizer changes

Note - check that this is the branch being used:

`uv pip install git+https://github.com/weaviate/weaviate-python-client.git1.31/support-adding-vectors`

In [None]:
from weaviate.classes.config import Configure, Property, DataType

collection_name = "TempCollection"

client.collections.delete(collection_name)

client.collections.create(
    collection_name,
    properties=[
        Property(name="title", data_type=DataType.TEXT),
        Property(name="body", data_type=DataType.TEXT),
    ],
    vectorizer_config=[
        Configure.NamedVectors.text2vec_cohere(
            name="default",
            source_properties=["title", "body"],
        ),
        Configure.NamedVectors.text2vec_cohere(
            name="new_title",
            source_properties=["title"],
        )
    ],
    # vectorizer_config=Configure.Vectorizer.text2vec_cohere(),
)

In [None]:
c = client.collections.get(collection_name)

cc = c.config.get().vector_config.keys()
# cc = c.config.get().vectorizer_config

print(cc)

In [None]:
c.config.add_vector(
    vector_config=Configure.NamedVectors.text2vec_cohere(
        name="body_only",
        source_properties=["body"],
    )
)