In [1]:
import os

In [2]:
import random
import time
from typing import List



In [4]:

from pymilvus import (
    connections,
    utility,
    FieldSchema,
    CollectionSchema,
    DataType,
    Collection,
)

In [5]:


HOST = "192.168.16.138"
PORT = 19530
DIM = 128
COLLECTION_NAME = f"demo_crud_{int(time.time())}"


def random_vectors(n: int, dim: int) -> List[List[float]]:
    return [[random.random() for _ in range(dim)] for _ in range(n)]



In [6]:

def ensure_disconnected():
    try:
        connections.disconnect(alias="default")
    except Exception:
        pass



print(f"Connecting to Milvus at {HOST}:{PORT} ...")
ensure_disconnected()
connections.connect(alias="default", host=HOST, port=str(PORT))


Connecting to Milvus at 192.168.16.138:19530 ...


In [7]:

# Clean up any old collection with the same name
if utility.has_collection(COLLECTION_NAME):
    print(f"Dropping existing collection: {COLLECTION_NAME}")
    utility.drop_collection(COLLECTION_NAME)

# 1) Create collection
print(f"Creating collection: {COLLECTION_NAME}")
fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=False),
    FieldSchema(name="vec", dtype=DataType.FLOAT_VECTOR, dim=DIM),
]
schema = CollectionSchema(fields, description="CRUD demo collection")
col = Collection(name=COLLECTION_NAME, schema=schema)


Creating collection: demo_crud_1762844056


In [9]:
# 2) Create index for ANN
index_params = {
    "index_type": "IVF_FLAT",
    "metric_type": "L2",
    "params": {"nlist": 64},
}
print("Creating index ...")
col.create_index(field_name="vec", index_params=index_params)

Creating index ...


Status(code=0, message=)

In [10]:
# 3) Insert some entities
N = 20
ids = list(range(1, N + 1))
vecs = random_vectors(N, DIM)
print(f"Inserting {N} vectors ...")
mr = col.insert([ids, vecs])
# Wait for persistence
col.flush()
print(f"Inserted entities primary keys: {ids[:5]} ... total {N}")

Inserting 20 vectors ...
Inserted entities primary keys: [1, 2, 3, 4, 5] ... total 20


In [11]:
# 4) Load for search
print("Loading collection into memory ...")
col.load()

Loading collection into memory ...


In [12]:
# 5) Search using the first vector
print("Searching top-3 neighbors of first inserted vector ...")
search_params = {"metric_type": "L2", "params": {"nprobe": 8}}
results = col.search(data=[vecs[0]], anns_field="vec", param=search_params, limit=3, output_fields=["id"])
for hit in results[0]:
    print(f"  hit id={hit.entity.get('id')} distance={hit.distance:.6f}")

Searching top-3 neighbors of first inserted vector ...
  hit id=1 distance=0.000000
  hit id=2 distance=15.812815
  hit id=12 distance=16.219112


In [13]:
# 6) Query by primary key
sample_ids = ids[:3]
expr = f"id in {sample_ids}"
print(f"Querying by expr: {expr}")
out = col.query(expr=expr, output_fields=["id"])
print(f"  query returned {len(out)} rows: {out}")


Querying by expr: id in [1, 2, 3]
  query returned 3 rows: data: ["{'id': 1}", "{'id': 2}", "{'id': 3}"], extra_info: {}


In [14]:
# 7) Update: delete and re-insert one id
id_to_update = ids[5]
print(f"Updating entity id={id_to_update} (delete + insert new vector)")
col.delete(expr=f"id in [{id_to_update}]")
col.flush()
new_vec = random_vectors(1, DIM)[0]
col.insert([[id_to_update], [new_vec]])
col.flush()

out2 = col.query(expr=f"id in [{id_to_update}]", output_fields=["id"])
print(f"  after update, query id={id_to_update} returned: {out2}")


Updating entity id=6 (delete + insert new vector)
  after update, query id=6 returned: data: ["{'id': 6}"], extra_info: {}


In [15]:
# 8) Delete a few ids
to_delete = ids[-3:]
print(f"Deleting last 3 entities: {to_delete}")
col.delete(expr=f"id in {to_delete}")
col.flush()

Deleting last 3 entities: [18, 19, 20]


In [16]:
# 9) Summary
print(f"num_entities: {col.num_entities}")

print("Demo completed successfully.")


num_entities: 38
Demo completed successfully.
