<a href="https://colab.research.google.com/github/vsuhas9/LLM-Notebooks/blob/main/Exploring_Milvus/Exploring_Milvus.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Step - 1: Installing Packages



In [None]:
!pip install pymilvus pymilvus[model]

# Step - 2: Import Packages

In [None]:
from pymilvus import MilvusClient
from pymilvus import model
import json


# Step - 3: Define process items

In [None]:
db_name = "test_db"
collection_name = "test_collection"
documents = [
    "Hello how are you doing",
     "Hello World",
]
documents_metadata = [
"hello text-1 metadata",
"hello text-2 metadata"
]
query = "Hello"

# Step - 4: Creating a Milvus In-Memeory DB

In [None]:
# Creates a In-memory DB with the name milvus
client = MilvusClient("milvus.db")

# Step - 5: Create Database

In [None]:
# Creates a Data base
try:
  client.create_database(db_name)
except:
  print("Running In colab, will be using default db")

# Step - 6: Create Collection

In [None]:
# Creates a Collection
client.create_collection(collection_name, dimension=1024, auto_id=True)

# Step - 7: Convert Documents to Vectors for Storage

In [None]:
# Load the Model
bge_m3 = model.hybrid.BGEM3EmbeddingFunction(
            model_name='BAAI/bge-m3', # Specify t`he model name
            device='cpu', # Specify the device to use, e.g., 'cpu' or 'cuda:0'
            use_fp16=False # Whether to use fp16. `False` for `device='cpu'`.
        )

# Convert the documents
vectors = bge_m3.encode_documents(documents)

# Step - 8: Store Documents

In [None]:
data = []
for doc_id in range(len(documents)):
    data.append({
        "vector": vectors["dense"][doc_id],
        "text": documents[doc_id],
        "metadata": documents_metadata[doc_id]
    })
client.insert(collection_name, data)

# Step - 9: Retrieve Document based on Query

## Step - 9A: Convert query to vector

In [None]:
query_vector = bge_m3.encode_documents([query])["dense"]

## Step - 9B: Query the DB

In [None]:
retrieved_data = client.search(
    collection_name=collection_name,  # target collection
    data=query_vector,  # query vectors
    limit=2,  # number of returned entities
    output_fields=["text", "metadata"],  # specifies fields to be returned
)

results = eval(json.dumps(retrieved_data, indent=4))[0]
for doc in results:
  current_doc = doc["entity"]
  print("content: ", current_doc["text"])
  print("metadata: ", current_doc["metadata"])
  print("-"*50)
  print()


# Step - 10: Delete the Collection and Database

In [None]:
# Drops a Collection
client.drop_collection(collection_name)

In [None]:
# Drops a Database
try:
  client.drop_database(db_name)
except:
  print("Running In colab")