In [19]:
# !pip install llama-index
# !pip install llama-index-embeddings-llamafile
# !pip install llama-index-llms-llamafil
# !pip install llama-index-readers-web


In [6]:
# ./TinyLlama-1.1B-Chat-v1.0.F16.llamafile --server --nobrowser --embedding --port 8080

In [16]:
# Configure LlamaIndex
from llama_index.core import Settings
from llama_index.embeddings.llamafile import LlamafileEmbedding
from llama_index.llms.llamafile import Llamafile
from llama_index.core.node_parser import SentenceSplitter

Settings.embed_model = LlamafileEmbedding(base_url="http://localhost:8080")

Settings.llm = Llamafile(
	base_url="http://localhost:8080",
	temperature=0,
	seed=0
)

# Also set up a sentence splitter to ensure texts are broken into semantically-meaningful chunks (sentences) that don't take up the model's entire
# context window (2048 tokens). Since these chunks will be added to LLM prompts as part of the RAG process, we want to leave plenty of space for both
# the system prompt and the user's actual question.
Settings.transformations = [
	SentenceSplitter(
    	chunk_size=256,
    	chunk_overlap=5
	)
]

In [20]:
# Load local data
from llama_index.core import SimpleDirectoryReader
local_doc_reader = SimpleDirectoryReader(input_dir='./data')
docs = local_doc_reader.load_data(show_progress=True)

# We'll load some Wikipedia pages as well
from llama_index.readers.web import SimpleWebPageReader
urls = [
	'https://en.wikipedia.org/wiki/Push-up',
]
web_reader = SimpleWebPageReader(html_to_text=True)
docs.extend(web_reader.load_data(urls))

# Build the index
from llama_index.core import VectorStoreIndex

index = VectorStoreIndex.from_documents(
	docs,
	show_progress=True,
)

# Save the index
index.storage_context.persist(persist_dir="./storage")

Loading files: 100%|██████████| 2/2 [00:00<00:00, 483.94file/s]
  from .autonotebook import tqdm as notebook_tqdm
Parsing nodes: 100%|██████████| 3/3 [00:00<00:00, 18.99it/s]
Generating embeddings: 100%|██████████| 706/706 [01:51<00:00,  6.34it/s]


In [21]:
query_engine = index.as_query_engine()


In [26]:
question = "what are some of the variations of push-ups?"

In [27]:
print(query_engine.query(question))

Sure, here are some variations of push-ups:

1. Hanuman push-up: This is a variation of the basic push-up that involves
holding the push-up position with the hands on the ground and the feet
together. The arms are extended upwards, and the body is in a straight
line from head to heels.

2. Judo push-up: This is a variation of the push-up that involves holding the
push-up position with the hands on the ground and the feet together. The
body is in a straight line from head to heels, and the arms are extended
upwards.

3. Dive-bomber push-up: This is a variation of the push-up that involves
holding the push-up position with the hands on the ground and the feet
together. The body is in a straight line from head to heels, and the arms
are extended upwards and downwards.

4. Cat stretch: This is a variation of the push-up that involves holding the
push-up position with the hands on the ground and the feet together. The
body is in a straight line from head to heels, and the arms are extended
