# Starting Milvus Lite

In [1]:
! pip install milvus pymilvus


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
from milvus import default_server

In [3]:
default_server.start()

# Preparing a Milvus Collection

In [4]:
from pymilvus import connections

# connect to milvus just like any other server
connections.connect(
  host="127.0.0.1", 
  port = default_server.listen_port)

In [5]:
from pymilvus import FieldSchema, CollectionSchema, DataType

# number of dimensions in your embedding model, in the case of 
# sentence-transformers/all-MiniLM-L12-v2, that's 384
DIMENSION = 384

# object should be inserted in the format of (title, date, location, speech embedding)
fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=DIMENSION)
]
schema = CollectionSchema(fields=fields, enable_dynamic_field=True)

In [6]:
from pymilvus import Collection

# define the collection name and pass the schema
collection = Collection(name="example_name", schema=schema)

In [7]:
index_params = {
    "index_type": "IVF_FLAT", # one of 11 Milvus indexes, IVF is the most intuitive
    "metric_type": "L2", # L2, Cosine, or IP
    "params": {"nlist": 4}, # how many "centroids" do you want for IVF?
}

In [8]:
# pass the field to index on and the parameters to index with
collection.create_index(field_name="embedding", index_params=index_params)
# load the collection into memory
collection.load()

# Adding Data to Milvus

In [9]:
from sentence_transformers import SentenceTransformer

# a popular 384 dimension vector embedding model
transformer = SentenceTransformer('all-MiniLM-L12-v2')

In [16]:
with open("./Seattle.txt", "r") as f:
    x = f.read() # read the entire file in as a string

# split on the number of sentences for simplicity
sentences = x.split(".")