In [52]:
# Milvus 컬렉션 조회 및 Parquet 파일 저장 예시 (Jupyter Notebook)

from pymilvus import (
    connections,
    utility,
    Collection,
)
import numpy as np
import pandas as pd

# 1) Milvus 서버 연결
connections.connect(
    alias="default",
    host="localhost",
    port="19530"
)

# 2) 모든 컬렉션 이름 리스트 확인
print("=== Milvus에 존재하는 컬렉션 목록 ===")
all_collections = utility.list_collections()
for idx, name in enumerate(all_collections, start=1):
    print(f"{idx}. {name}")

# # 3) 컬렉션 선택 인터랙티브 위젯 사용 (Jupyter Notebook 전용)
# from ipywidgets import widgets, Layout
# from IPython.display import display

# # 드롭다운 위젯 생성
# collection_dropdown = widgets.Dropdown(
#     options=all_collections,
#     description='컬렉션:',
#     layout=Layout(width='50%')
# )
# display(collection_dropdown)

=== Milvus에 존재하는 컬렉션 목록 ===
1. images


In [47]:
# Milvus 컬렉션 조회 및 동적 데이터 삽입 예시 (Jupyter Notebook)

from pymilvus import (
    connections,
    utility,
    Collection,
    FieldSchema,
    DataType,
    MilvusClient
)

import numpy as np

client = MilvusClient(host='localhost', port=3333)

In [9]:
client.list_collections()

['images', 'test_collection']

In [12]:
col_name= "test_collection"

In [22]:
index_params = client.prepare_index_params()

# 3) IndexParams에 인덱스 설정 추가
index_params.add_index(
    field_name="embedding",    # 벡터 필드명
    index_type="IVF_FLAT",     # 인덱스 타입
    metric_type="COSINE",      # 거리(metric) 유형
    params={"nlist": 128}      # IVF_FLAT 전용 파라미터
)

# 4) 실제 인덱스 생성 호출
client.create_index(
    collection_name=col_name,
    index_params=index_params,
    sync=True                  # 동기 모드로 완전히 빌드될 때까지 대기
)

In [23]:
client.load_collection(
    collection_name=col_name,
    load_fields=["id", "filename",'embedding'], # Load only the specified fields
    skip_load_dynamic_field=True # Skip loading the dynamic field
)

In [51]:
# # 컬렉션 만들기
# collection_name = "test_collection"
# if not utility.has_collection(collection_name):
#     fields = [
#         FieldSchema(
#             name="id",
#             dtype=DataType.INT64,
#             is_primary=True,
#             auto_id=False,
#             description="기본 키",
#         ),
#         FieldSchema(
#             name="filename",
#             dtype=DataType.VARCHAR,
#             max_length=256,
#             description="파일 이름",
#         ),
#         FieldSchema(
#             name="embedding",
#             dtype=DataType.FLOAT_VECTOR,
#             dim=128,
#             description="임베딩 벡터",
#         ),
#     ]
#     schema = CollectionSchema(fields, description="테스트용 컬렉션 스키마")
#     test_col = Collection(
#         name=collection_name,
#         schema=schema,
#         using="default",
#         shards_num=2,
#     )
#     print(f"컬렉션 '{collection_name}' 생성됨.")
# else:
#     print(f"컬렉션 '{collection_name}' 이미 존재합니다.")

컬렉션 'test_collection' 생성됨.


In [54]:
# if utility.has_collection(collection_name):
#     utility.drop_collection(collection_name)
#     print(f"컬렉션 '{collection_name}' 이 성공적으로 삭제되었습니다.")
# else:
#     print(f"삭제할 컬렉션 '{collection_name}' 가 없습니다.")

In [24]:
data = [
    {
        "id":            200,                     # INT64
        "filename":      "img_100.png",           # VARCHAR
        "embedding":     np.random.rand(128).tolist(),  # FLOAT_VECTOR
    }
]

# 4) insert 호출
res = client.insert(
    collection_name=col_name,
    data=data
)

res

{'insert_count': 1, 'ids': [200], 'cost': 0}

In [25]:
res = client.get_load_state(
    collection_name=col_name
)

print(res)

{'state': <LoadState: Loaded>}


In [34]:
from pymilvus import Collection

col = Collection("test_collection")

# ① 컬렉션에 실제 저장된 엔티티 수
print(col.num_entities)    # int

# ② 스키마(필드 정보) 확인
print(col.schema)

# ③ 간단한 쿼리로 실제 내용을 살짝 들여다보기
#    예: id가 0 이상인 첫 5개 행 가져오기
results = col.query(
    expr="id>0",
    output_fields=["id","filename",'embedding'],
    limit=5
)
print(results)   # [{'id': 1, 'filename': 'img1.jpg'}, …]

25
{'auto_id': False, 'description': '테스트용 컬렉션 스키마', 'fields': [{'name': 'id', 'description': '기본 키', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'filename', 'description': '파일 이름', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 256}}, {'name': 'embedding', 'description': '임베딩 벡터', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 128}}], 'enable_dynamic_field': False}
data: ["{'id': 1, 'filename': 'filename_0', 'embedding': [0.30807522, 0.9442458, 0.31456813, 0.0033547555, 0.060718384, 0.23237507, 0.0041235783, 0.372246, 0.6687562, 0.1803126, 0.8560904, 0.82708395, 0.5066612, 0.24645224, 0.099753484, 0.8612459, 0.33135414, 0.88554686, 0.9805263, 0.16511753, 0.8476271, 0.2792158, 0.9924026, 0.5698289, 0.70872283, 0.15187828, 0.52347124, 0.27031213, 0.1825831, 0.5171643, 0.46563652, 0.6355188, 0.15775536, 0.5220429, 0.9270214, 0.9079603, 0.6851829, 0.8034762, 0.4167268, 0.5455673, 0.91903895, 0.15417121, 0.46069437, 0.855242, 0.77325106, 

In [35]:
data = [
    {
        "id":            1,                     # INT64
        "filename":      "img_300.png",           # VARCHAR
        "embedding":     np.random.rand(128).tolist(),  # FLOAT_VECTOR
    }
]

# 4) insert 호출
res = client.upsert(
    collection_name=col_name,
    data=data
)

res

{'upsert_count': 1, 'cost': 0}

In [37]:
results = col.query(
    expr="id==1",
    output_fields=["id","filename",'embedding'],
    limit=5
)
print(results)

data: ["{'id': 1, 'filename': 'img_300.png', 'embedding': [0.825905, 0.79972094, 0.91773266, 0.5064248, 0.40132236, 0.5383646, 0.10380822, 0.08179004, 0.20847358, 0.75837123, 0.4368953, 0.6386708, 0.43984187, 0.9555287, 0.029556133, 0.26269972, 0.90382165, 0.9704297, 0.18944234, 0.70308393, 0.8107239, 0.20228821, 0.53722167, 0.9172774, 0.84055734, 0.5689862, 0.04404725, 0.9457807, 0.15538996, 0.879912, 0.41422054, 0.9535921, 0.74553686, 0.31296408, 0.3578892, 0.4419021, 0.47498137, 0.029958632, 0.2688308, 0.21585464, 0.6741697, 0.34661758, 0.61295885, 0.67650217, 0.032911442, 0.32876682, 0.78722125, 0.8835573, 0.06497636, 0.33290797, 0.8102243, 0.48820272, 0.09302919, 0.104013935, 0.9014799, 0.90882736, 0.63667953, 0.36317152, 0.9302369, 0.19153634, 0.7307174, 0.9396004, 0.85466534, 0.98857147, 0.45665783, 0.6573568, 0.76245296, 0.66109306, 0.5511565, 0.3303178, 0.7052476, 0.43945882, 0.5879226, 0.558517, 0.7669795, 0.36165977, 0.07194951, 0.34389937, 0.5619882, 0.8566826, 0.56626326, 

In [44]:
res = client.delete(
    collection_name=col_name,
    ids=[1,2,3,100,200]
)

print(res)

{'delete_count': 5}


In [45]:
col.query(
    expr="id>0",
    output_fields=["id","filename",'embedding'],
    limit=5
)

data: []

In [50]:
help(client.drop_collection)

Help on method drop_collection in module pymilvus.milvus_client.milvus_client:

drop_collection(collection_name: str, timeout: Optional[float] = None, **kwargs) method of pymilvus.milvus_client.milvus_client.MilvusClient instance
    Delete the collection stored in this object



In [51]:
client.drop_collection(collection_name=col_name)

In [53]:
from pymilvus import MilvusClient, DataType

client = MilvusClient(uri="http://localhost:19530")

# 1) 스키마 레벨에서 auto_id=True 켜기
schema = client.create_schema(
    auto_id=True,               # Milvus가 id를 자동으로 생성
    enable_dynamic_field=True,
)

# 2) 자동 PK 필드 추가 (이 필드는 JSON에서 보내지 않습니다)
schema.add_field(
    field_name="id",            # PK 이름
    datatype=DataType.INT64,
    is_primary=True,
    auto_id=True                # ← 필수!
)

# 3) 나머지 필드 추가
schema.add_field(
    field_name="embedding",
    datatype=DataType.FLOAT_VECTOR,
    dim=4                        # 예시: 차원 4
)
schema.add_field(
    field_name="filename",
    datatype=DataType.VARCHAR,
    max_length=512
)

# 4) 컬렉션 생성
client.create_collection(
    collection_name="test",
    schema=schema,
    index_params={
        "index_type": "IVF_FLAT",
        "metric_type": "COSINE",
        "params": {"nlist": 128}
    }
)

ParamError: <ParamError: (code=1, message=wrong type of argument [index_params], expected type: [IndexParams], got type: [dict])>