Skip to content

BUG: Broken references silently failing using client.batch (v4) #1008

@glesperance

Description

@glesperance

It is great that references can be added to objects created within the same batch.

However, failed references should be listed in client.batch.failed_references.

repro:

import weaviate
import weaviate.classes as wvc
import weaviate.util
from tqdm import tqdm
from weaviate.util import generate_uuid5

print("Weaviate version:", weaviate.__version__)

with weaviate.connect_to_local() as client:
  print(client.get_meta())

  client.collections.delete(["A", "B"])
  client.collections.create("A")
  client.collections.create("B")

  a_collection = client.collections.get("A")
  b_collection = client.collections.get("B")

  a_collection.config.add_property(wvc.config.Property(name="a_name", data_type=wvc.config.DataType.TEXT))
  a_collection.config.add_reference(wvc.config.ReferenceProperty(name="b_ref", target_collection="B"))

  b_collection.config.add_property(wvc.config.Property(name="b_name", data_type=wvc.config.DataType.TEXT))
  b_collection.config.add_reference(wvc.config.ReferenceProperty(name="a_ref", target_collection="A"))


  print({
    "A": a_collection.aggregate.over_all(),
    "B": b_collection.aggregate.over_all()
  })
  print("-" * 80)


client.close()

with  weaviate.connect_to_local() as client:
  with client.batch.dynamic() as batch:
      for i in tqdm(range(5000)):
        a_uuid = generate_uuid5(str(i), "A")
        b_uuid = generate_uuid5(str(i), "B")
        inexistent_uuid = generate_uuid5(str(i), "inexistent")

        batch.add_object(properties={"a_name": "test"}, collection="A", uuid=a_uuid)
        batch.add_object(properties={"b_name": "test"}, collection="B", uuid=b_uuid)

        batch.add_reference(from_uuid=b_uuid, from_collection="B" ,from_property="a_ref", to=a_uuid)
        batch.add_reference(from_uuid=a_uuid, from_collection="A" ,from_property="b_ref", to=inexistent_uuid)


  print(">>> Done inserting objects")
  if client.batch.failed_objects:
    print("Failed objects:", client.batch.failed_objects[:10])
  else:
    print("No failed objects")
    
  if client.batch.failed_references:
    print("Failed references:", client.batch.failed_references[:10])
  else:
    print("No failed references")

# double check all B objects have a reference to an A object
with weaviate.connect_to_local() as client:
  b_collection = client.collections.get("B")
  for b_obj in b_collection.iterator(return_references=[wvc.query.QueryReference(link_on="a_ref")]):
    assert len(b_obj.references) == 1
  print(">>> All B objects have a reference to an A object")

  a_collection = client.collections.get("A")
  for a_obj in a_collection.iterator(return_references=[wvc.query.QueryReference(link_on="b_ref")]):
    assert len(a_obj.references) == 0
  print(">>> All A objects have no references")

with weaviate.connect_to_local() as client:
  a_collection = client.collections.get("A")
  b_collection = client.collections.get("B")
  print("-" * 80)
  print({
    "A": a_collection.aggregate.over_all(),
    "B": b_collection.aggregate.over_all()
  })

Output:

Weaviate version: 4.5.5
{'hostname': 'http://127.0.0.1:8080/', 'modules': {}, 'version': '1.23.7'}
{'A': AggregateReturn(properties={}, total_count=0), 'B': AggregateReturn(properties={}, total_count=0)}
--------------------------------------------------------------------------------
100%|██████████| 5000/5000 [00:01<00:00, 2547.51it/s]
>>> Done inserting objects
No failed objects
No failed references
>>> All B objects have a reference to an A object
>>> All A objects have no references
--------------------------------------------------------------------------------
{'A': AggregateReturn(properties={}, total_count=5000), 'B': AggregateReturn(properties={}, total_count=5000)}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions