In [1]:
# ! pip install --upgrade --quiet pymilvus langchain langchain-community langchainhub langchain-openai unstructured octoai-sdk sentence-transformers
# ! docker-compose up -d

In [1]:
from langchain_community.document_loaders import DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [2]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=128)
loader = DirectoryLoader("../city_data")
docs = loader.load_and_split(text_splitter=text_splitter)

In [3]:
from dotenv import load_dotenv
import os

load_dotenv()
os.environ["OCTOAI_API_TOKEN"] = os.getenv("OCTOAI_API_TOKEN")

In [8]:
from pymilvus import connections, utility
connections.connect(
    host="localhost",
    port=19530
)
collections = utility.list_collections()
for collection in collections:
    print(collection)
    utility.drop_collection(collection)

In [9]:
from langchain_community.vectorstores import Milvus
from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings()
db = Milvus.from_documents(
    docs, 
    embedding = embeddings,
    connection_args={"host": "127.0.0.1", "port": 19530},
    collection_name="cities")



TypeError: VectorStore.from_documents() missing 1 required positional argument: 'embedding'

In [5]:
retriever = db.as_retriever()

In [6]:
from langchain.tools.retriever import create_retriever_tool

tool = create_retriever_tool(
    retriever,
    "search_cities",
    "Searches and returns excerpts from Wikipedia entries of many cities.",
)
tools = [tool]

In [7]:
from langchain import hub

prompt = hub.pull("hwchase17/openai-tools-agent")
prompt.messages

[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')),
 MessagesPlaceholder(variable_name='chat_history', optional=True),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')),
 MessagesPlaceholder(variable_name='agent_scratchpad')]

In [8]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(temperature=0)

In [9]:
from langchain.agents import AgentExecutor, create_openai_tools_agent

agent = create_openai_tools_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools)

In [10]:
result = agent_executor.invoke(
    {
        "input": "What is the size of San Francisco?"
    }
)

In [11]:
result["output"]

'The size of San Francisco is approximately 46.9 square miles (121 square kilometers). It is the fourth most populous city in California with 808,437 residents and the 17th most populous city in the United States as of 2022.'