In [None]:
! pip install llama-index nltk milvus pymilvus openai python-dotenv requests

In [3]:
from pathlib import Path
import requests

wiki_page = "The Nightmare Before Christmas"
response = requests.get(
    'https://en.wikipedia.org/w/api.php',
    params={
        'action': 'query',
        'format': 'json',
        'titles': wiki_page,
        'prop': 'extracts',
        'explaintext': True,
    }
).json()
page = next(iter(response['query']['pages'].values()))
wiki_text = page['extract']

data_path = Path('halloween_data')
if not data_path.exists():
    Path.mkdir(data_path)

with open(data_path / f"{wiki_page}.txt", 'w') as fp:
    fp.write(wiki_text)

In [1]:
import os
from dotenv import load_dotenv
import openai
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
ZILLIZ_URI = os.getenv("ZILLIZ_CLUSTER_01_URI")
ZILLIZ_TOKEN = os.getenv("ZILLIZ_CLUSTER_01_TOKEN")

In [2]:
from llama_index.vector_stores import MilvusVectorStore
vdb = MilvusVectorStore(
    uri = ZILLIZ_URI,
    token = ZILLIZ_TOKEN,
    collection_name = "halloween",
    dim = 384
)

In [3]:
from llama_index import ServiceContext
from llama_index.embeddings import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L12-v2")

Downloading (…)lve/main/config.json:   0%|          | 0.00/573 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/134M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/352 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [4]:
from llama_index import SimpleDirectoryReader, VectorStoreIndex, StorageContext
storage_context = StorageContext.from_defaults(vector_store=vdb)
service_context = ServiceContext.from_defaults(embed_model=embed_model)
documents = SimpleDirectoryReader("./halloween_data/").load_data()
vector_index = VectorStoreIndex.from_documents(documents, storage_context=storage_context, service_context=service_context)

In [5]:
from llama_index.query_engine import CitationQueryEngine
query_engine = CitationQueryEngine.from_args(
    vector_index,
    similarity_top_k=3,
    # here we can control how granular citation sources are, the default is 512
    citation_chunk_size=512,
)

In [6]:
response = query_engine.query("Who is the main character?")

In [7]:
response



In [8]:
for source in response.source_nodes:
    print(source.node.get_text())

Source 1:
=== Filming ===
Selick and his team of animators began production in July 1991 in San Francisco, California, with a crew of over 120 workers, utilizing 20 sound stages for filming. Joe Ranft was hired from Disney as a storyboard supervisor, while Eric Leighton was hired to supervise animation. At the peak of production, 20 individual stages were simultaneously being used for filming. In total, there were 109,440 frames taken for the film. The work of Ray Harryhausen, Ladislas Starevich, Edward Gorey, Étienne Delessert, Gahan Wilson, Charles Addams, Jan Lenica, Francis Bacon, and Wassily Kandinsky influenced the filmmakers. Selick described the production design as akin to a pop-up book. In addition, Selick stated, "When we reach Halloween Town, it's entirely German Expressionism. When Jack enters Christmas Town, it's an outrageous Dr. Seuss-esque setpiece. Finally, when Jack is delivering presents in the 'Real World', everything is plain, simple and perfectly aligned." Vincen

In [21]:
query_engine_64 = CitationQueryEngine.from_args(
    vector_index,
    similarity_top_k=3,
    # here we can control how granular citation sources are, the default is 512
    citation_chunk_size=64,
)

In [11]:
query_engine_512 = CitationQueryEngine.from_args(
    vector_index,
    similarity_top_k=3,
    # here we can control how granular citation sources are, the default is 512
    citation_chunk_size=512,
)

In [22]:
res_64_main_char = query_engine_64.query("Who is the main character?")
res_512_main_char = query_engine_512.query("Who is the main character?")

In [23]:
res_64_plot = query_engine_64.query("What is the plot?")
res_512_plot = query_engine_512.query("What is the plot?")

In [27]:
from pprint import pprint

In [29]:
pprint(res_64_main_char.response)

'The main character of the film is Jack Skellington [23].'


In [30]:
pprint(res_512_main_char.response)

'The main character of the film is Jack Skellington [6].'


In [31]:
pprint(res_64_plot.response)

('The plot of "The Nightmare Before Christmas" revolves around Jack '
 'Skellington, the Pumpkin King of Halloween Town, who becomes bored with the '
 'same routine of Halloween and discovers Christmas Town. Intrigued by the '
 'concept of Christmas, Jack decides to take over Christmas and assigns the '
 'residents of Halloween Town various Christmas-themed tasks. However, their '
 "efforts lead to chaos and Jack's plan goes awry. Eventually, Jack realizes "
 'his mistake and works to fix the mess he created. The film ends with Jack '
 'and Sally declaring their love for each other and sharing a kiss [23-38].')


In [32]:
pprint(res_512_plot.response)

('The plot of "The Nightmare Before Christmas" revolves around Jack '
 'Skellington, the Pumpkin King of Halloween Town, who becomes tired of the '
 'same routine of Halloween and discovers Christmas Town. Intrigued by the '
 'concept of Christmas, Jack decides that Halloween Town will take over '
 'Christmas this year. He assigns the residents various Christmas-themed '
 "tasks, but his efforts lead to disastrous consequences. Jack's love "
 'interest, Sally, warns him about the potential disaster, but he dismisses '
 'chaos he has caused. With the help of Santa Claus, Jack saves Christmas and '
 'learns the true meaning of the holiday. The film ends with Jack and Sally '
 'declaring their love for each other [6][7][8][9].')
