# Starting Milvus Lite

In [1]:
! pip install milvus pymilvus sentence-transformer



In [2]:
from milvus import default_server

In [3]:
default_server.start()

In [4]:
from pymilvus import connections

# connect to milvus just like any other server
connections.connect(
  host="127.0.0.1", 
  port=default_server.listen_port)

# Preparing a Milvus Collection

In [5]:
from pymilvus import FieldSchema, CollectionSchema, DataType

# number of dimensions in your embedding model, in the case of 
# sentence-transformers/all-MiniLM-L12-v2, that's 384
DIMENSION = 384

# id and embedding are required to define
fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=DIMENSION)
]
# "enable_dynamic_field" lets us insert data with any metadata fields
schema = CollectionSchema(fields=fields, enable_dynamic_field=True)

In [6]:
from pymilvus import Collection

# define the collection name and pass the schema
collection = Collection(name="example_name", schema=schema)

In [7]:
index_params = {
    "index_type": "IVF_FLAT", # one of 11 Milvus indexes, IVF is the most intuitive
    "metric_type": "L2", # L2, Cosine, or IP
    "params": {"nlist": 4}, # how many "centroids" do you want for IVF?
}

In [8]:
# pass the field to index on and the parameters to index with
collection.create_index(field_name="embedding", index_params=index_params)
# load the collection into memory
collection.load()

# Adding Data to Milvus

In [9]:
from sentence_transformers import SentenceTransformer

# a popular 384 dimension vector embedding model
transformer = SentenceTransformer('all-MiniLM-L12-v2')

In [10]:
# available at 
with open("./Seattle.txt", "r") as f:
    x = f.read() # read the entire file in as a string

# split on the number of sentences for simplicity
sentences = x.split(".")

In [11]:
# hold embeddings and sentences together
milvus_input = []
for sentence in sentences:
    entry = {}
    vector_embedding = transformer.encode(sentence)
    entry["embedding"] = vector_embedding
    entry["sentence"] = sentence
    milvus_input.append(entry)

In [12]:
# milvus expects a list of dicts 
collection.insert(milvus_input)

(insert count: 568, delete count: 0, upsert count: 0, timestamp: 447595179724767234, success count: 568, err count: 0)

In [13]:
collection.flush()

# Reading Data from Milvus

In [15]:
query = "the tallest point in Seattle"
q_embedding = transformer.encode(query)

In [16]:
res = collection.search(
    data=[q_embedding],  # Embeded search value
    anns_field="embedding",  # Search across embeddings
    param={"metric_type": "L2",
            "params": {"nprobe": 2}},
    limit = 3,  # Limit to top_k results per search
    output_fields=["sentence"]  # Include title field in result
)

In [17]:
for i, hits in enumerate(res):
    for hit in hits:
        print(hit.entity.get("sentence"))
        print(hit.entity.id)

 The highest point within city limits is at High Point in West Seattle, which is roughly located near 35th Ave SW and SW Myrtle St
447594821656779660
2 km2) Discovery Park (the largest park in the city) in Magnolia, along the shores of Myrtle Edwards Park on the Downtown waterfront, along the shoreline of Lake Washington at Seward Park, along Alki Beach in West Seattle, or along the Burke-Gilman Trail
447594821656779901
 Seattle
447594821656780045


# Deleting Data from Milvus

In [18]:
expr = "id == 447594821656779660"

In [19]:
collection.delete(expr)

(insert count: 0, delete count: 1, upsert count: 0, timestamp: 447595296299417601, success count: 0, err count: 0)

In [20]:
res_after_del = collection.search(
    data=[q_embedding],  # Embeded search value
    anns_field="embedding",  # Search across embeddings
    param={"metric_type": "L2",
            "params": {"nprobe": 2}},
    limit = 3,  # Limit to top_k results per search
    output_fields=["sentence"]  # Include title field in result
)

In [21]:
for i, hits in enumerate(res_after_del):
    for hit in hits:
        print(hit.entity.get("sentence"))
        print(hit.entity.id)
        

2 km2) Discovery Park (the largest park in the city) in Magnolia, along the shores of Myrtle Edwards Park on the Downtown waterfront, along the shoreline of Lake Washington at Seward Park, along Alki Beach in West Seattle, or along the Burke-Gilman Trail
447594821656779901
 Seattle
447594821656780045
Seattle is situated on an isthmus between Puget Sound (an inlet of the Pacific Ocean) and Lake Washington
447594821656779533


# Utilities

In [22]:
from pymilvus import utility

In [23]:
utility.has_collection("collection_name")

False

In [24]:
utility.list_collections()

['example_name']

In [25]:
utility.drop_collection('example_name')

In [26]:
utility.list_collections()

[]

In [27]:
default_server.stop()
default_server.cleanup()