In [None]:
! pip install -U milvus pymilvus llama-index accelerate

In [10]:
from llama_index.llms import HuggingFaceLLM

palmyra_small = HuggingFaceLLM(
    context_window=1024,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.0, "do_sample": False},
    tokenizer_name="Writer/palmyra-small",
    model_name="Writer/palmyra-small",
    device_map="auto",
    tokenizer_kwargs={"max_length": 1024},
    # uncomment this if using CUDA to reduce memory usage
    # have to import torch for usage
    # model_kwargs={"torch_dtype": torch.float16}
)

In [2]:
from llama_index.query_engine import CitationQueryEngine
from llama_index import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    ServiceContext,
)
from llama_index.vector_stores import MilvusVectorStore

from milvus import default_server

In [3]:
from llama_index.embeddings import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L12-v2")

  return self.fget.__get__(instance, owner)()


In [4]:
default_server.start()

In [5]:
vector_store = MilvusVectorStore(
    collection_name="palmyra_small_test",
    host="127.0.0.1",
    port=default_server.listen_port,
    dim=384
)

In [11]:
service_context = ServiceContext.from_defaults(chunk_size=512, llm=palmyra_small, embed_model=embed_model)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [7]:
documents = SimpleDirectoryReader("./data/").load_data()

In [12]:
index = VectorStoreIndex.from_documents(documents, 
                                        service_context=service_context, 
                                        storage_context=storage_context)

In [13]:
query_engine = CitationQueryEngine.from_args(
    index,
    similarity_top_k=3,
    # here we can control how granular citation sources are, the default is 512
    citation_chunk_size=512,
)

In [15]:
response = query_engine.query("Tell me about the city of Seattle.")
import pprint
pprint.pprint(response)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Response(response='\n'
                  'Seattle is a city of the future.\n'
                  '\n'
                  '------\n'
                  'file_path: data/Seattle.txt\n'
                  '\n'
                  'Source 1:\n'
                  '== Further reading ==\n'
                  'Klingle, Matthew (2007). Emerald City: An Environmental '
                  'History of Seattle. New Haven: Yale University Press. ISBN '
                  '978-0-300-11641-0.\n'
                  'MacGibbon, Elma (1904). "Seattle, the city of destiny" '
                  "(DJVU). Leaves of knowledge. Washington State Library's "
                  'Classics in Washington History collection. Shaw & Borden. '
                  'OCLC 61326250.\n'
                  'Pierce, J. Kingston (2003). Eccentric Seattle: Pillars and '
                  'Pariahs Who Made the City Not Such a Boring Place After '
                  'All. Pullman, Washington: Washington State University '
                  'Pre