This version uses Milvus through Docker Compose so you must have Docker installed to run this notebook (Milvus is spun up via `docker compose up -d` as shown in the block below)

In [None]:
# ! pip install -qU pymilvus langchain sentence-transformers tiktoken octoai-sdk openai
# docker-compose up -d

In [1]:
from dotenv import load_dotenv
import os

load_dotenv()
os.environ["OCTOAI_TOKEN"] = os.getenv("OCTOAI_API_TOKEN")

In [2]:
from langchain_community.llms.octoai_endpoint import OctoAIEndpoint
llm = OctoAIEndpoint(
        model="mixtral-8x7b-instruct-fp16",
        max_tokens=200,
        presence_penalty=0,
        temperature=0.1,
        top_p=0.9,
    )

                model was transferred to model_kwargs.
                Please confirm that model is what you intended.


In [3]:
from langchain_community.embeddings import OctoAIEmbeddings
from langchain_community.vectorstores import Milvus

In [4]:
embeddings = OctoAIEmbeddings(endpoint_url="https://text.octoai.run/v1/embeddings")

In [5]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
import os

In [6]:
files = os.listdir("../data")

In [7]:
files

['1.txt']

In [17]:
file_texts = []

In [18]:
for file in files:
    with open(f"../data/{file}") as f:
        file_text = f.read()
    print(file_text)
    text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=128, chunk_overlap=64, 
    )
    texts = text_splitter.split_text(file_text)
    for i, chunked_text in enumerate(texts):
        file_texts.append(Document(page_content=chunked_text, 
                metadata={"doc_title": file.split(".")[0], "chunk_num": i}))

2022-11-12 N Swimming for an hour
2023-01-05 N Running in the park
2022-10-30 N Yoga and meditation
2023-03-18 Y Cycling along the river
2022-08-17 Y Hiking in the mountains
2023-06-20 Y Playing tennis with friends
2022-12-03 Y Strength training at the gym
2023-02-10 N Dancing salsa in a class
2022-09-28 N Pilates session at home
2023-05-15 N Walking in the neighborhood
2022-11-12 N Swimming for an hour



In [21]:
# For the first run
# 
# print(file_texts)
# vector_store = Milvus.from_documents(
#     file_texts,
#     embedding=embeddings,
#     connection_args={"host": "localhost", "port": 19530},
#     collection_name="tasks"
# )

# if you already have the data you need stored in Milvus
vector_store = Milvus(
    embedding_function=embeddings,
    connection_args={"host": "localhost", "port": 19530},
    collection_name="tasks"
)

In [22]:
retriever = vector_store.as_retriever()

In [23]:
from langchain.prompts import ChatPromptTemplate
template="""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:"""
prompt = ChatPromptTemplate.from_template(template)

In [24]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [27]:
response = chain.invoke("can you give me a summary for what did I do")

In [28]:
response

' Based on the context, you have been active in various physical activities. In 2022, you went swimming, did yoga and meditation, hiking, strength training at the gym, and swimming again. In 2023, you have run in the park, cycled along the river, played tennis with friends, and did not go for a pilates session at home or walk in the neighborhood. You also tried salsa dancing but it was in a class.'