In [1]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.huggingface import HuggingFaceLLM

In [3]:
# Load documents
documents = SimpleDirectoryReader("./Data/").load_data()

In [4]:
print(documents)

[Document(id_='a879d22b-a207-44eb-b2f2-af74f2a508dc', embedding=None, metadata={'file_path': 'c:\\Users\\vikas\\OneDrive\\Desktop\\Generative-AI-Sunny-Sir\\RAG App using LLAMAINDEX & MistralAI\\Data\\MLDOC.txt', 'file_name': 'MLDOC.txt', 'file_type': 'text/plain', 'file_size': 22273, 'creation_date': '2025-02-12', 'last_modified_date': '2025-02-07'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='What is machine learning?\r\nMachine learning is a branch of artificial intelligence (AI) and computer science which\r\nfocuses on the use of data and algorithms to imitate the way that humans learn,\r\ngradually improving its accuracy.\r\nIBM ha

In [5]:
# Setup prompts -> Specific to StableLM
from llama_index.core import PromptTemplate

system_prompt = """<|SYSTEM|># You are a Q&A assistant. Your goal is answer questions as 
accurately as possible based on the instructions and context provided.
"""

# This will wrap the default prompts that are internal to llama-index
query_wrapper_prompt = PromptTemplate("<|USER|>{query_str}<|ASSISTANT|>")

In [6]:
import torch

llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=256,
    generate_kwargs={"temperature":0.7,"do_sample":False},
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name="mistralai/Mistral-7B-Instruct-v0.1",
    model_name="mistralai/Mistral-7B-Instruct-v0.1",
    device_map = "auto",
    stopping_ids=[50278,50279,50277,1,0],
    tokenizer_kwargs={"max_length":4096},
    
)

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]Error while downloading from https://cdn-lfs.hf.co/repos/ea/00/ea00943d992c7851ad9f4f4bd094a0397fb5087e0f7cba4ef003018963ea07e3/a464228d9c9427bf035cfd5a2e18bf0494d7231bfacc478ec5fc9f57a612d051?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-00001-of-00002.safetensors%3B+filename%3D%22model-00001-of-00002.safetensors%22%3B&Expires=1739344648&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTczOTM0NDY0OH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby9yZXBvcy9lYS8wMC9lYTAwOTQzZDk5MmM3ODUxYWQ5ZjRmNGJkMDk0YTAzOTdmYjUwODdlMGY3Y2JhNGVmMDAzMDE4OTYzZWEwN2UzL2E0NjQyMjhkOWM5NDI3YmYwMzVjZmQ1YTJlMThiZjA0OTRkNzIzMWJmYWNjNDc4ZWM1ZmM5ZjU3YTYxMmQwNTE%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=aKarGnICo1EK2Kf4IuS9ad6K2VEtxBoitGZeO9IlPb8j0UFMDHywJgxLVmHbvHKd-9ih3b34ifSSDD6VXEEVs3cQsM-quzme7aSnnCFYq%7EmhcUua5D6AFAGUhHNNpzg-X0HOLaPamjon5vHrJI1kDpHYpUVffwJJbb%7EtLm-LAH3YFnUZ

KeyboardInterrupt: 

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
embed_model = HuggingFaceEmbedding(model_name = "sentence-transformers/all-mpnet-base-v2")

In [None]:
from llama_index.core import VectorStoreIndex, ServiceContext

service_context = ServiceContext.from_defaults(
    chunk_size = 1024,
    llm = llm,
    embed_model = embed_model
)

In [None]:
index = VectorStoreIndex.from_documents(documents,service_context=service_context)

In [None]:
query_engine = index.as_query_engine()

In [None]:
query_engine.query("What is RAG?")

In [None]:
print("The End")