# Connection

### persistent client - local database

In [86]:
import chromadb
client = chromadb.PersistentClient(path="./chroma_storage")

In [87]:
client.heartbeat() # returns a nanosecond heartbeat. Useful for making sure the client remains connected.
# client.reset() # Empties and completely resets the database. ⚠️ This is destructive and not reversible.

1734346193362968000

In [88]:
client.get_settings()

Settings(environment='', chroma_api_impl='chromadb.api.segment.SegmentAPI', chroma_server_nofile=None, chroma_server_thread_pool_size=40, tenant_id='default', topic_namespace='default', chroma_server_host=None, chroma_server_headers=None, chroma_server_http_port=None, chroma_server_ssl_enabled=False, chroma_server_ssl_verify=None, chroma_server_api_default_path=<APIVersion.V2: '/api/v2'>, chroma_server_cors_allow_origins=[], is_persistent=True, persist_directory='./chroma_storage', chroma_memory_limit_bytes=0, chroma_segment_cache_policy=None, allow_reset=False, chroma_auth_token_transport_header=None, chroma_client_auth_provider=None, chroma_client_auth_credentials=None, chroma_server_auth_ignore_paths={'APIVersion.V2': ['GET'], 'APIVersion.V2/heartbeat': ['GET'], 'APIVersion.V2/version': ['GET'], 'APIVersion.V1': ['GET'], 'APIVersion.V1/heartbeat': ['GET'], 'APIVersion.V1/version': ['GET']}, chroma_overwrite_singleton_tenant_database_access_from_auth=False, chroma_server_authn_provid

### HttpClient : client-server mode

http client

async HTTP client

Using the Python HTTP-only client#

# Collection

### Creating, inspecting, and deleting Collections

In [89]:
from chromadb.utils import embedding_functions as ef

# embedding_function의 기본값은 Sentence Transformers all-MiniLM-L6-v2
# default_ef = embedding_functions.DefaultEmbeddingFunction()
# sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")

api_key = open('/Users/jaesolshin/Documents/GitHub/pg_test/API_KEY', 'r').read()
openai_ef = ef.OpenAIEmbeddingFunction(
                api_key=api_key,
                model_name="text-embedding-3-small"
            )


In [90]:
collection = client.create_collection(name="my_collection", embedding_function=openai_ef)
client.list_collections()


[Collection(name=manual), Collection(name=my_collection)]

In [91]:
client.delete_collection(name="my_collection")
client.list_collections()

[Collection(name=manual)]

In [92]:
# collection = client.get_collection(name="test") # Get a collection object from an existing collection, by name. Will raise an exception if it's not found.
collection = client.get_or_create_collection(name="test") # Get a collection object from an existing collection, by name. If it doesn't exist, create it.

In [93]:
collection.get_model()

Collection(id=UUID('0b768187-48ee-4853-8ae1-43a4bea6a573'), name='test', configuration_json={'hnsw_configuration': {'space': 'l2', 'ef_construction': 100, 'ef_search': 10, 'num_threads': 14, 'M': 16, 'resize_factor': 1.2, 'batch_size': 100, 'sync_threshold': 1000, '_type': 'HNSWConfigurationInternal'}, '_type': 'CollectionConfigurationInternal'}, metadata=None, dimension=None, tenant='default_tenant', database='default_database', version=0, log_position=0)

In [94]:
collection.peek() # returns a list of the first 10 items in the collection

{'ids': [],
 'embeddings': array([], dtype=float64),
 'documents': [],
 'uris': None,
 'data': None,
 'metadatas': [],
 'included': [<IncludeEnum.embeddings: 'embeddings'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [95]:
collection.count() # returns the number of items in the collection

0

In [96]:
collection.modify(name="mytest") # Rename the collection: 'test'->'mytest'

In [97]:
client.list_collections()

[Collection(name=manual), Collection(name=mytest)]

In [98]:
client.delete_collection(name="mytest") 

### Changing the distance function

In [99]:
# create_collection에서 metadata를 통해 거리계산방식을 지정할 수 있다
collection = client.create_collection(
        name="test",
        metadata={"hnsw:space": "cosine"}, # l2 is the default
        embedding_function=openai_ef
)

In [None]:
# "hnsw:space" - 거리 측정방식을 지정. "l2", "ip", "cosine". 디폴트는 "l2".
# "hnsw:construction_df" - 인덱스를 생성할 때 고려하는 후보 이웃의 수를 지정. 양의 정수. 디폴트는 100.
# "hnsw:M" - 그래프에서 각 노드가 가질 수 있는 최대 연결(이웃 노드)의 개수. 2~100. 디폴트는 16.
# "hnsw:search_ef" - 검색할 때 고려하는 이웃의 개수. 양의 정수. 디폴트는 10. 콜렉션 생성 이후에도 수정 가능.
# "hnsw:num_threads" -  HNSW 알고리즘이 사용하는 쓰레드의 개수. 양의 정수. 디폴트는 CPU 코어수. 콜렉션 생성 이후에도 수정 가능.
# "hnsw:resize_factor" - 인덱스 내부 데이터 구조가 확장될 때의 크기 배율. 양의 정수. 디폴트는 1.2. 콜렉션 생성 이후에도 수정 가능. 
# "hnsw:batch_size" - 인덱스에 데이터를 추가하거나 검색할 때 한 번에 처리할 데이터의 양. 양수 < sync_threshold. 디폴트는 1.2. 콜렉션 생성 이후에도 수정 가능. 
# "hnsw:sync_threshold" - 인덱스의 변경 사항을 디스크에 동기화할 빈도. 양수. 디폴트는 1000. 콜렉션 생성 이후에도 수정 가능. 

collection = client.create_collection(
        name="test",
        metadata={
            "hnsw:space": "cosine",
            "hnsw:construction_df":100,
            "hnsw:M":16,
            "hnsw:search_ef":10,
            "hnsw:num_threads":4,
            "hnsw:resize_factor":1.2,
            "hnsw:batch_size":100,
            "hnsw:sync_threshold":1000
        }, # l2 is the default
        embedding_function=openai_ef
)

# configure_collection.py Line 117 metadata 딕셔너리의 value 값을 수정

In [100]:
client.list_collections()

[Collection(name=manual), Collection(name=test)]

In [101]:
collection.get_model()

Collection(id=UUID('a2323777-b7cf-4dc2-a56e-704f290c397b'), name='test', configuration_json={'hnsw_configuration': {'space': 'l2', 'ef_construction': 100, 'ef_search': 10, 'num_threads': 14, 'M': 16, 'resize_factor': 1.2, 'batch_size': 100, 'sync_threshold': 1000, '_type': 'HNSWConfigurationInternal'}, '_type': 'CollectionConfigurationInternal'}, metadata={'hnsw:space': 'cosine'}, dimension=None, tenant='default_tenant', database='default_database', version=0, log_position=0)

# Insert

### Adding data to a Collection

#### use embedding function

In [102]:
# documents : 임베딩할 문서 내용
# metadatas : 메타 데이터
# ids : 문서ID
# embeddings 인자를 넣지 않으면 collection 생성시 정의된 embedding function에 따라 자동으로 document를 임베딩함
collection.add(
    documents=["lorem ipsum...", "doc2", "doc3"],
    metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}],
    ids=["id1", "id2", "id3"]
)

In [103]:
result = collection.peek()

In [104]:
collection.peek()

{'ids': ['id1', 'id2', 'id3'],
 'embeddings': array([[ 0.04966333,  0.03326806, -0.01821254, ...,  0.00498756,
          0.03791074, -0.01797377],
        [ 0.02054628,  0.02163102,  0.01030505, ..., -0.01190823,
         -0.00529609, -0.00301694],
        [ 0.01418145,  0.04145983,  0.03606211, ..., -0.00342532,
          0.00275474,  0.01248431]]),
 'documents': ['lorem ipsum...', 'doc2', 'doc3'],
 'uris': None,
 'data': None,
 'metadatas': [{'chapter': '3', 'verse': '16'},
  {'chapter': '3', 'verse': '5'},
  {'chapter': '29', 'verse': '11'}],
 'included': [<IncludeEnum.embeddings: 'embeddings'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [105]:
collection.count()

3

In [106]:
len(result['embeddings'][0])

1536

In [107]:
# 메타데이터는 없어도 삽입이 가능하다
collection.add(
    documents=["doc4", "doc5", "doc6"],
    ids=["id4", "id5", "id6"]
)

In [108]:
collection.peek()

{'ids': ['id1', 'id2', 'id3', 'id4', 'id5', 'id6'],
 'embeddings': array([[ 0.04966333,  0.03326806, -0.01821254, ...,  0.00498756,
          0.03791074, -0.01797377],
        [ 0.02054628,  0.02163102,  0.01030505, ..., -0.01190823,
         -0.00529609, -0.00301694],
        [ 0.01418145,  0.04145983,  0.03606211, ..., -0.00342532,
          0.00275474,  0.01248431],
        [ 0.01773529,  0.02958861,  0.02347423, ..., -0.02935619,
          0.01712742,  0.01691289],
        [ 0.00792155,  0.05334781,  0.00661152, ..., -0.01848947,
          0.01628125, -0.01284624],
        [ 0.01972202,  0.03384865,  0.05426145, ...,  0.01204563,
          0.0128573 , -0.00704173]]),
 'documents': ['lorem ipsum...', 'doc2', 'doc3', 'doc4', 'doc5', 'doc6'],
 'uris': None,
 'data': None,
 'metadatas': [{'chapter': '3', 'verse': '16'},
  {'chapter': '3', 'verse': '5'},
  {'chapter': '29', 'verse': '11'},
  None,
  None,
  None],
 'included': [<IncludeEnum.embeddings: 'embeddings'>,
  <IncludeEnum.docu

#### manually insert

In [109]:
client.delete_collection(name="manual")

In [110]:
# "hnsw:space" : "l2", "ip", "cosine"
collection_manual = client.create_collection(name="manual")

In [111]:
client.list_collections()

[Collection(name=test), Collection(name=manual)]

In [112]:
# 수동으로 데이터 추가 가능
collection_manual.add(
    documents=["doc1", "doc2", "doc3"],
    embeddings=[[1.1, 2.3, 3.2], [4.5, 6.9, 4.4], [1.1, 2.3, 3.2]],
    metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}],
    ids=["id1", "id2", "id3"]
)


In [113]:
collection_manual.peek()

{'ids': ['id1', 'id2', 'id3'],
 'embeddings': array([[1.10000002, 2.29999995, 3.20000005],
        [4.5       , 6.9000001 , 4.4000001 ],
        [1.10000002, 2.29999995, 3.20000005]]),
 'documents': ['doc1', 'doc2', 'doc3'],
 'uris': None,
 'data': None,
 'metadatas': [{'chapter': '3', 'verse': '16'},
  {'chapter': '3', 'verse': '5'},
  {'chapter': '29', 'verse': '11'}],
 'included': [<IncludeEnum.embeddings: 'embeddings'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [114]:
# documents 없이 추가
# chromadb에는 vector만 저장하고 별도의 장소에 id-documents로 관리
collection_manual.add(
    embeddings=[[1.1, 2.3, 3.2], [4.5, 6.9, 4.4], [1.1, 2.3, 3.2]],
    metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}],
    ids=["id4", "id5", "id6"]
)

In [115]:
# 'id4', 'id5', 'id6'
collection_manual.peek()

{'ids': ['id1', 'id2', 'id3', 'id4', 'id5', 'id6'],
 'embeddings': array([[1.10000002, 2.29999995, 3.20000005],
        [4.5       , 6.9000001 , 4.4000001 ],
        [1.10000002, 2.29999995, 3.20000005],
        [1.10000002, 2.29999995, 3.20000005],
        [4.5       , 6.9000001 , 4.4000001 ],
        [1.10000002, 2.29999995, 3.20000005]]),
 'documents': ['doc1', 'doc2', 'doc3', None, None, None],
 'uris': None,
 'data': None,
 'metadatas': [{'chapter': '3', 'verse': '16'},
  {'chapter': '3', 'verse': '5'},
  {'chapter': '29', 'verse': '11'},
  {'chapter': '3', 'verse': '16'},
  {'chapter': '3', 'verse': '5'},
  {'chapter': '29', 'verse': '11'}],
 'included': [<IncludeEnum.embeddings: 'embeddings'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [116]:
collection_manual.count()

6

#### Update

In [125]:
collection_manual.get(
    ids=['id1', 'id2', 'id3']
)

{'ids': ['id1', 'id2', 'id3'],
 'embeddings': None,
 'documents': ['doc1', 'doc2', 'doc3'],
 'uris': None,
 'data': None,
 'metadatas': [{'chapter': '3', 'verse': '16'},
  {'chapter': '3', 'verse': '5'},
  {'chapter': '29', 'verse': '11'}],
 'included': [<IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [126]:
collection_manual.update(
    ids=["id1", "id2", "id3"],
    embeddings=[[1.1, 2.3, 3.2], [4.5, 6.9, 4.4], [1.1, 2.3, 3.2]],
    metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}],
    documents=["doc_A", "doc_B", "doc_C"],
)

In [127]:
collection_manual.peek()

{'ids': ['id1', 'id2', 'id3', 'id4', 'id5', 'id6'],
 'embeddings': array([[1.10000002, 2.29999995, 3.20000005],
        [4.5       , 6.9000001 , 4.4000001 ],
        [1.10000002, 2.29999995, 3.20000005],
        [1.10000002, 2.29999995, 3.20000005],
        [4.5       , 6.9000001 , 4.4000001 ],
        [1.10000002, 2.29999995, 3.20000005]]),
 'documents': ['doc_A', 'doc_B', 'doc_C', None, None, None],
 'uris': None,
 'data': None,
 'metadatas': [{'chapter': '3', 'verse': '16'},
  {'chapter': '3', 'verse': '5'},
  {'chapter': '29', 'verse': '11'},
  {'chapter': '3', 'verse': '16'},
  {'chapter': '3', 'verse': '5'},
  {'chapter': '29', 'verse': '11'}],
 'included': [<IncludeEnum.embeddings: 'embeddings'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [143]:
# upsert : update or insert
collection_manual.upsert(
    ids=["id1", "id7", "id8"],
    embeddings=[[1.5, -2.0, -3.0], [3.0, 0.0, 0.1], [2.1, 4.3, 1.2]],
    metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "8", "verse": "1"}, {"chapter": "8", "verse": "2"}],
    documents=["doc_AA", "doc_B", "doc_C"],
)

In [140]:
collection_manual.peek()

{'ids': ['id1', 'id2', 'id3', 'id4', 'id5', 'id6', 'id7', 'id8'],
 'embeddings': array([[ 1.5       , -2.        , -3.        ],
        [ 4.5       ,  6.9000001 ,  4.4000001 ],
        [ 1.10000002,  2.29999995,  3.20000005],
        [ 1.10000002,  2.29999995,  3.20000005],
        [ 4.5       ,  6.9000001 ,  4.4000001 ],
        [ 1.10000002,  2.29999995,  3.20000005],
        [ 3.        ,  0.        ,  0.1       ],
        [ 2.0999999 ,  4.30000019,  1.20000005]]),
 'documents': ['doc_AA', 'doc_B', 'doc_C', None, None, None, 'doc_B', 'doc_C'],
 'uris': None,
 'data': None,
 'metadatas': [{'chapter': '3', 'verse': '16'},
  {'chapter': '3', 'verse': '5'},
  {'chapter': '29', 'verse': '11'},
  {'chapter': '3', 'verse': '16'},
  {'chapter': '3', 'verse': '5'},
  {'chapter': '29', 'verse': '11'},
  {'chapter': '8', 'verse': '1'},
  {'chapter': '8', 'verse': '2'}],
 'included': [<IncludeEnum.embeddings: 'embeddings'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'met

#### delete

In [144]:
collection_manual.delete(
    ids=["id7", "id8"],
	where={"chapter": "8"}
)

In [147]:
collection_manual.peek()

{'ids': ['id1', 'id2', 'id3', 'id4', 'id5', 'id6'],
 'embeddings': array([[ 1.5       , -2.        , -3.        ],
        [ 4.5       ,  6.9000001 ,  4.4000001 ],
        [ 1.10000002,  2.29999995,  3.20000005],
        [ 1.10000002,  2.29999995,  3.20000005],
        [ 4.5       ,  6.9000001 ,  4.4000001 ],
        [ 1.10000002,  2.29999995,  3.20000005]]),
 'documents': ['doc_AA', 'doc_B', 'doc_C', None, None, None],
 'uris': None,
 'data': None,
 'metadatas': [{'chapter': '3', 'verse': '16'},
  {'chapter': '3', 'verse': '5'},
  {'chapter': '29', 'verse': '11'},
  {'chapter': '3', 'verse': '16'},
  {'chapter': '3', 'verse': '5'},
  {'chapter': '29', 'verse': '11'}],
 'included': [<IncludeEnum.embeddings: 'embeddings'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

# Query

In [128]:
collection_manual.query(
    query_embeddings=[[11.1, 12.1, 13.1]]
)

Number of requested results 10 is greater than number of elements in index 6, updating n_results = 6


{'ids': [['id2', 'id5', 'id6', 'id1', 'id3', 'id4']],
 'embeddings': None,
 'documents': [['doc_B', None, None, 'doc_A', 'doc_C', None]],
 'uris': None,
 'data': None,
 'metadatas': [[{'chapter': '3', 'verse': '5'},
   {'chapter': '3', 'verse': '5'},
   {'chapter': '29', 'verse': '11'},
   {'chapter': '3', 'verse': '16'},
   {'chapter': '29', 'verse': '11'},
   {'chapter': '3', 'verse': '16'}]],
 'distances': [[146.2900129890445,
   146.2900129890445,
   294.05002217292827,
   294.05002217292827,
   294.05002217292827,
   294.05002217292827]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [129]:
collection_manual.query(
    query_embeddings=[[11.1, 12.1, 13.1]],
    n_results=3,
    where={"chapter": "3"}
)


{'ids': [['id2', 'id5', 'id1']],
 'embeddings': None,
 'documents': [['doc_B', None, 'doc_A']],
 'uris': None,
 'data': None,
 'metadatas': [[{'chapter': '3', 'verse': '5'},
   {'chapter': '3', 'verse': '5'},
   {'chapter': '3', 'verse': '16'}]],
 'distances': [[146.2900129890445, 146.2900129890445, 294.05002217292827]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [130]:
collection_manual.query(
    query_embeddings=[[11.1, 12.1, 13.1]],
    n_results=3,
    where={"chapter": "3"},
    where_document={"$contains":"doc"}
)


{'ids': [['id2', 'id1']],
 'embeddings': None,
 'documents': [['doc_B', 'doc_A']],
 'uris': None,
 'data': None,
 'metadatas': [[{'chapter': '3', 'verse': '5'},
   {'chapter': '3', 'verse': '16'}]],
 'distances': [[146.2900129890445, 294.05002217292827]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [131]:
collection_manual.get(
    ids=['id1', 'id2'],
    where={'verse':'5'}
)

{'ids': ['id2'],
 'embeddings': None,
 'documents': ['doc_B'],
 'uris': None,
 'data': None,
 'metadatas': [{'chapter': '3', 'verse': '5'}],
 'included': [<IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

## where filter

#### Filtering by metadata

In [132]:
# without filter operators
collection_manual.query(
    query_embeddings=[[0,0,0]],
    where={"chapter": "3"}
)


Number of requested results 10 is greater than number of elements in index 6, updating n_results = 6


{'ids': [['id4', 'id1', 'id2', 'id5']],
 'embeddings': None,
 'documents': [[None, 'doc_A', 'doc_B', None]],
 'uris': None,
 'data': None,
 'metadatas': [[{'chapter': '3', 'verse': '16'},
   {'chapter': '3', 'verse': '16'},
   {'chapter': '3', 'verse': '5'},
   {'chapter': '3', 'verse': '5'}]],
 'distances': [[16.740000138282785,
   16.740000138282785,
   87.22000215530397,
   87.22000215530397]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [133]:
# $eq - equal to (string, int, float)
# $ne - not equal to (string, int, float)
# $gt - greater than (int, float)
# $gte - greater than or equal to (int, float)
# $lt - less than (int, float)
# $lte - less than or equal to (int, float)

collection_manual.query(
    query_embeddings=[[0,0,0]],
    where={"chapter": {"$ne":"3"}}
)

Number of requested results 10 is greater than number of elements in index 6, updating n_results = 6


{'ids': [['id6', 'id3']],
 'embeddings': None,
 'documents': [[None, 'doc_C']],
 'uris': None,
 'data': None,
 'metadatas': [[{'chapter': '29', 'verse': '11'},
   {'chapter': '29', 'verse': '11'}]],
 'distances': [[16.740000138282785, 16.740000138282785]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

#### Filtering by document contents

In [134]:
# $contains
collection_manual.query(
    query_embeddings=[[0,0,0]],
    where_document={"$contains":"doc"}
)

Number of requested results 10 is greater than number of elements in index 6, updating n_results = 6


{'ids': [['id3', 'id1', 'id2']],
 'embeddings': None,
 'documents': [['doc_C', 'doc_A', 'doc_B']],
 'uris': None,
 'data': None,
 'metadatas': [[{'chapter': '29', 'verse': '11'},
   {'chapter': '3', 'verse': '16'},
   {'chapter': '3', 'verse': '5'}]],
 'distances': [[16.740000138282785, 16.740000138282785, 87.22000215530397]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [135]:
# $not_contains
collection_manual.query(
    query_embeddings=[[0,0,0]],
    where_document={"$not_contains":"doc"}
)

Number of requested results 10 is greater than number of elements in index 6, updating n_results = 6


{'ids': [['id6', 'id4', 'id5']],
 'embeddings': None,
 'documents': [[None, None, None]],
 'uris': None,
 'data': None,
 'metadatas': [[{'chapter': '29', 'verse': '11'},
   {'chapter': '3', 'verse': '16'},
   {'chapter': '3', 'verse': '5'}]],
 'distances': [[16.740000138282785, 16.740000138282785, 87.22000215530397]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

#### logical operators

In [136]:
# $and
# $or
collection_manual.query(
    query_embeddings=[[0,0,0]],
    where={
    "$and": [
        {
            "chapter": {"$eq":"3"}
        },
        {
            "verse": {"$eq":"5"}
        }
    ]
    }
)

Number of requested results 10 is greater than number of elements in index 6, updating n_results = 6


{'ids': [['id2', 'id5']],
 'embeddings': None,
 'documents': [['doc_B', None]],
 'uris': None,
 'data': None,
 'metadatas': [[{'chapter': '3', 'verse': '5'},
   {'chapter': '3', 'verse': '5'}]],
 'distances': [[87.22000215530397, 87.22000215530397]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [137]:
# $in
# $nin

# without filter operators
collection_manual.query(
    query_embeddings=[[0,0,0]],
    where={
        "chapter": {"$in":['1','2','3']}
    }
)


Number of requested results 10 is greater than number of elements in index 6, updating n_results = 6


{'ids': [['id4', 'id1', 'id2', 'id5']],
 'embeddings': None,
 'documents': [[None, 'doc_A', 'doc_B', None]],
 'uris': None,
 'data': None,
 'metadatas': [[{'chapter': '3', 'verse': '16'},
   {'chapter': '3', 'verse': '16'},
   {'chapter': '3', 'verse': '5'},
   {'chapter': '3', 'verse': '5'}]],
 'distances': [[16.740000138282785,
   16.740000138282785,
   87.22000215530397,
   87.22000215530397]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [138]:
collection_manual.peek()

{'ids': ['id1', 'id2', 'id3', 'id4', 'id5', 'id6'],
 'embeddings': array([[1.10000002, 2.29999995, 3.20000005],
        [4.5       , 6.9000001 , 4.4000001 ],
        [1.10000002, 2.29999995, 3.20000005],
        [1.10000002, 2.29999995, 3.20000005],
        [4.5       , 6.9000001 , 4.4000001 ],
        [1.10000002, 2.29999995, 3.20000005]]),
 'documents': ['doc_A', 'doc_B', 'doc_C', None, None, None],
 'uris': None,
 'data': None,
 'metadatas': [{'chapter': '3', 'verse': '16'},
  {'chapter': '3', 'verse': '5'},
  {'chapter': '29', 'verse': '11'},
  {'chapter': '3', 'verse': '16'},
  {'chapter': '3', 'verse': '5'},
  {'chapter': '29', 'verse': '11'}],
 'included': [<IncludeEnum.embeddings: 'embeddings'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

# Example

In [157]:
# 데이터 임베딩 저장을 위한 콜렉션 생성
collection_name = "medical"
collection = client.create_collection(
    name=collection_name,
    metadata={
        'hnsw:space': 'cosine',            # 코사인 거리
        'hnsw:construction_ef': 200,      # 인덱스 생성 시 후보 이웃 수
        'hnsw:M': 32,                     # 그래프 연결성
        'hnsw:search_ef': 50,             # 검색 시 고려할 이웃 수
        'hnsw:resize_factor': 1.5         # 메모리 확장 배율
    }
)

In [158]:
client.list_collections()

[Collection(name=medical), Collection(name=test), Collection(name=manual)]

In [159]:
# 데이터 추가
documents = [
    "재초진은 뭔가요?",
    "재초진은 어떤 기록을 써야 하나요?",
    "외래 진료의 기준은 무엇인가요?",
    "외래 진료의 수납은 어떻게 이루어지나요?",
    "마약성 진통제의 관리는 어떻게 이루어지나요?"
]
metadatas = [
    {"category": "진료"},
    {"category": "진료"},
    {"category": "외래진료"},
    {"category": "외래진료"},
    {"category": "약품"},
]
ids = ["doc1", "doc2", "doc3", "doc4", "doc5"]

# 데이터 임베딩 추가
collection.add(
    documents=documents,
    metadatas=metadatas,
    ids=ids
)

In [160]:
# 쿼리 예제
query_text = "외래 진료는 어떻게 이루어지나요?"

results = collection.query(
    query_texts=[query_text],
    n_results=2
)

# 결과 출력
for result in results["documents"]:
    print("검색 결과:", result)


검색 결과: ['외래 진료의 수납은 어떻게 이루어지나요?', '재초진은 어떤 기록을 써야 하나요?']


In [162]:
# 쿼리 예제
query_text = "외래 진료는 어떻게 이루어지나요?"

results = collection.query(
    query_texts=[query_text],
    n_results=2,
    where={"category": "외래진료"}  # 카테고리 필터링
)

# 결과 출력
for result in results["documents"]:
    print("검색 결과:", result)

검색 결과: ['외래 진료의 수납은 어떻게 이루어지나요?', '외래 진료의 기준은 무엇인가요?']
