In [None]:
# ! pip install bentoml>=1.2.2 sentence-transformers==2.2.2 sentencepiece==0.1.99 torch transformers==4.37.1 pymilvus>=2.3 octoai-sdk

Steps:
1. Spin up Bento Sentence Transformers Server. [Instructions here.](https://github.com/bentoml/BentoSentenceTransformers)
2. Embed data via Bento and store in Milvus via [Milvus Docker](https://milvus.io/docs/install_standalone-docker-compose.md)
3. Get LLM from [OctoAI](octoai.cloud)
4. Do RAG

Note: BentoSentenceTransformers already cloned into this repo

# Step 1: Spin up Bento Sentence Transformers Server. [Instructions here.](https://github.com/bentoml/BentoSentenceTransformers)

In [None]:
import bentoml

bento_client = bentoml.SyncHTTPClient("http://localhost:3000")

In [None]:
def get_embeddings(texts: list) -> list:
    if len(texts) > 25:
        splits = [texts[x:x+25] for x in range(0, len(texts), 25)]
        embeddings = []
        for split in splits:
            embedding_split = bento_client.encode(
                sentences = split
            )
            for embedding in embedding_split:
                embeddings.append(embedding)
        return embeddings
    return bento_client.encode(
        sentences=texts,
    )

# Step 2: Embed data via Bento and store in Milvus via [Milvus Docker](https://milvus.io/docs/install_standalone-docker-compose.md)

In [None]:
from pymilvus import connections, utility

In [None]:
COLLECTION_NAME = "bmo_test"
connections.connect(host="localhost", port=19530)

In [None]:
from pymilvus import FieldSchema, CollectionSchema, DataType

DIMENSION = 384

# id and embedding are required to define
fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=DIMENSION)
]
# "enable_dynamic_field" lets us insert data with any metadata fields
schema = CollectionSchema(fields=fields, enable_dynamic_field=True)

In [None]:
from pymilvus import Collection

# define the collection name and pass the schema
collection = Collection(name=COLLECTION_NAME, schema=schema)

In [None]:
index_params = {
    "index_type": "HNSW", # one of 11 Milvus indexes
    "metric_type": "IP", # L2, Cosine, or IP
    "params": {
        "M": 8, # higher M = consumes more memory but better search quality
        "efConstruction": 64 # higher efConstruction = slower build, better search
    }, 
}

In [None]:
# pass the field to index on and the parameters to index with
collection.create_index(field_name="embedding", index_params=index_params)
# load the collection into memory
collection.load()

In [None]:
# naively chunk on newlines
def chunk_text(filename: str) -> list:
    with open(filename, "r") as f:
        text = f.read()
    sentences = text.split("\n")
    return sentences

In [None]:
import os

In [None]:
cities = os.listdir("data")

In [None]:
# store chunked text for each of the cities in a list of dicts
city_chunks = []
for city in cities:
    chunked = chunk_text(f"data/{city}")
    cleaned = []
    for chunk in chunked:
        if len(chunk) > 7:
            cleaned.append(chunk)
    mapped = {
        "city_name": city.split(".")[0],
        "chunks": cleaned
    }
    city_chunks.append(mapped)

In [None]:
entries = []
for city_dict in city_chunks:
    embedding_list = get_embeddings(city_dict["chunks"]) # returns a list of lists
    # now match texts with embeddings and city name
    for i, embedding in enumerate(embedding_list):
        entry = {"embedding": embedding,
                 "sentence": city_dict["chunks"][i], # poorly named cuz it's really a bunch of sentences, but meh
                 "city": city_dict["city_name"]}
        entries.append(entry)

In [None]:
collection.insert(entries)

In [None]:
collection.flush()

# Step 3: Get LLM from [OctoAI](octoai.cloud)

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
os.environ["OCTOAI_TOKEN"] = os.getenv("OCTOAI_API_TOKEN")

In [None]:
from octoai.client import Client

octo_client = Client()

# Step 4: Do RAG

In [None]:
def dorag(question: str, context: str):

    completion = octo_client.chat.completions.create(
    messages=[
            {
                "role": "system",
                "content": f"You are a helpful assistant. The user has a question. Answer the user question based only on the context: {context}"
            },
            {
                "role": "user",
                "content": f"{question}"
            }
        ],
        model="nous-hermes-2-mixtral-8x7b-dpo",
        max_tokens=512,
        presence_penalty=0,
        temperature=0.1,
        top_p=0.9,
    )
    return completion.model_dump()

In [None]:
def ask_a_question(question):
    embeddings = get_embeddings([question])
    res = collection.search(
        data=embeddings,  # search for the one (1) embedding returned as a list of lists
        anns_field="embedding",  # Search across embeddings
        param={"metric_type": "IP",
                "params": {"ef": 16}},
        limit = 5,  # get me the top 3 results
        output_fields=["sentence"]  # get the sentence/chunk and city
    )
    sentences = []
    for hits in res:
        for hit in hits:
            sentences.append(hit.entity.get("sentence"))
    context = ". ".join(sentences)
    return dorag(question, context)

In [None]:
print(ask_a_question("What state is Cambridge in?")["choices"][0]["message"]["content"])

In [None]:
# cleanup
# if utility.has_collection(COLLECTION_NAME):
#     utility.drop_collection(COLLECTION_NAME)