In [7]:
import os
import openai
from pathlib import Path
from dotenv import load_dotenv, find_dotenv

env_path = Path.home().joinpath('.config/py.env')
_ = load_dotenv(find_dotenv(env_path))

openai.api_key = os.environ['OPENAI_API_KEY']

os.environ["http_proxy"] = "http://127.0.0.1:7890"
os.environ["https_proxy"] = "http://127.0.0.1:7890"

In [2]:
MILVUS_HOST = "localhost"
MILVUS_PORT = "19530"

In [3]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Milvus
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import CharacterTextSplitter

loader = WebBaseLoader([
    "https://milvus.io/docs/overview.md",
])

docs = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=1024, chunk_overlap=0)
docs = text_splitter.split_documents(docs)

Created a chunk of size 1743, which is longer than the specified 1024
Created a chunk of size 1278, which is longer than the specified 1024


In [4]:
embeddings = OpenAIEmbeddings()

vector_store = Milvus.from_documents(
    docs,
    embedding=embeddings,
    connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT}
)

In [5]:
query = "What is milvus?"
docs = vector_store.similarity_search(query)

print(docs)

[Document(page_content='Milvus workflow.', metadata={'source': 'https://milvus.io/docs/overview.md', 'title': 'Introduction Milvus documentation', 'description': 'Milvus is an open-source vector database designed specifically for AI application development, embeddings similarity search, and MLOps v2.3.x.', 'language': 'en'}), Document(page_content='Introduction Milvus documentationDocsTutorialsToolsBlogCommunityStars0Join SlackTry Managed Milvus FREESearchHomev2.3.x\u200bAbout MilvusWhat is MilvusMilvus AdoptersMilvus RoadmapMilvus LimitsReleasesEnhancement ProposalsBootcampGet StartedUser GuideAdministration GuideIntegrationsBenchmarksToolsReferenceExample ApplicationsFAQsAPI referenceIntroduction\nThis page aims to give you an overview of Milvus by answering several questions. After reading this page, you will learn what Milvus is and how it works, as well as the key concepts, why use Milvus, supported indexes and metrics, example applications, the architecture, and relevant tools.\n

In [6]:
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.llms import OpenAI

chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type="map_reduce", return_intermediate_steps=True)
query = "What is Milvus?"
chain({"input_documents": docs, "question": query}, return_only_outputs=True)


{'intermediate_steps': [' No relevant text.',
  ' What is Milvus? Milvus was created in 2019 with a singular goal: store, index, and manage massive embedding vectors generated by deep neural networks and other machine learning (ML) models. As a database specifically designed to handle queries over input vectors, it is capable of indexing vectors on a trillion scale. Unlike existing relational databases which mainly deal with structured data following a pre-defined pattern, Milvus is designed from the bottom-up to handle embedding vectors converted from unstructured data.',
  ' Milvus is supported by rich APIs and tools to facilitate DevOps. Milvus has client libraries wrapped on top of the Milvus API that can be used to insert, delete, and query data programmatically from application code. The Milvus ecosystem provides helpful tools including: Milvus CLI, Attu, a graphical management system for Milvus, MilvusDM (Milvus Data Migration), an open-source tool designed specifically for impo