In [None]:
! pip install llama-index python-dotenv openai

In [1]:
import os
from dotenv import load_dotenv
import openai

load_dotenv()

openai.api_key = os.getenv("OPENAI_API_KEY")

zilliz_uri = os.getenv("ZILLIZ_URI")
zilliz_token = os.getenv("ZILLIZ_TOKEN")

In [2]:
from llama_index.vector_stores import MilvusVectorStore
from llama_index import VectorStoreIndex, ServiceContext

In [3]:
from llama_index.embeddings import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L12-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
vdb = MilvusVectorStore(
    uri = zilliz_uri,
    token = zilliz_token,
    collection_name = "tds_articles",
    similarity_metric = "L2",
    text_key="paragraph"
)

In [5]:
service_context = ServiceContext.from_defaults(embed_model=embed_model)
vector_index = VectorStoreIndex.from_vector_store(vector_store=vdb, service_context=service_context)

In [6]:
from llama_index.query_engine import CitationQueryEngine

In [7]:
query_engine = CitationQueryEngine.from_args(
    vector_index
)

In [9]:
res = query_engine.query("What is a large language model?")

In [10]:
from pprint import pprint

In [11]:
pprint(res)

Response(response='A large language model refers to a type of model that has '
                  'been developed to have significant capabilities in '
                  'understanding and generating human language. These models '
                  'have achieved notable milestones in their development [1].',
         source_nodes=[NodeWithScore(node=TextNode(id_='40e156b7-57ba-4026-a9a7-7a7153d309ea', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='7e3207d34fa042975d8321bf9c2b8ab275ff63968ed46f977eba9cac20cd721b', text='Source 1:\nThe following papers introduce some milestones on the journey toward large language models.\n', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=24.350723266601562),
                       NodeWithScore(node=TextNode(id_='90f54c36-e2cb-4c2f-aca6-627d4d411b96', embedding=None, metadata={}, 