# 🤖 LLAMA 3.1 RAG implementation 📃
#### but before that you need some API keys, both are necessary for the working of the LLAMA 3.1 with RAG

#### get your api keys from here :

*   LLAMA_PARSER_KEY : https://docs.cloud.llamaindex.ai/llamaparse/getting_started/get_an_api_key

*   HF_TOKEN : https://huggingface.co/docs/hub/security-tokens

* TELEGRAM_TKN : https://core.telegram.org/bots/tutorial

In [None]:
TELEGRAM_TKN = 'PASTE-YOUR-TOKEN-HERE'
LLAMA_PARSER_KEY = 'PASTE-YOUR-KEY-HERE'
HF_TOKEN = 'PASTE-YOUR-KEY-HERE'

In [None]:
!pip install transformers -U
!pip install llama-index==0.10.67.post1 -q
!pip install llama-index-llms-huggingface==0.2.8 -q
!pip install llama-index-embeddings-huggingface==0.2.3 -q
!pip install llama-index-embeddings-huggingface-api==0.1.1 -q
!pip install -U bitsandbytes accelerate -q

## tokenizer and stopping id setup

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(
    "meta-llama/Meta-Llama-3.1-8B-Instruct",
    TOKEN=HF_TOKEN
)

stopping_ids = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>"),
]

## actual model llama3.1

In [None]:
# generate_kwargs parameters are taken from https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct

import torch
from llama_index.llms.huggingface import HuggingFaceLLM

# quantization to 4bit for non crashing
import torch
from transformers import BitsAndBytesConfig

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

llm = HuggingFaceLLM(
    model_name="meta-llama/Meta-Llama-3-8B-Instruct",
    model_kwargs={
        "token": HF_TOKEN, #huggingface token, you can replace it with yours also
        "quantization_config": quantization_config
    },
    generate_kwargs={
        "do_sample": True,
        "temperature": 0.6,
        "top_p": 0.9,
    },
    tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
    stopping_ids=stopping_ids,
)

# TESTING 🚀🧪🥼

In [None]:
llm.max_new_tokens=10
from llama_index.core.llms import ChatMessage

messages = [
    ChatMessage(role="system", content="You are an AI agent named XGENAI which has the feature of RAG(Retrieval Augumented Generation) and you are designed to provide users with accurate and effective solutions. Respond to the user with clear and helpful guidance."),
    ChatMessage(role="user", content=input("Type a prompt : ")),
]
response = llm.chat(messages)
response = str(response)
print(response[11:])
# print(response.split[11:])

### downloading remote data

In [None]:
!wget 'https://www.bseindia.com/xml-data/corpfiling/AttachHis/24a0940c-fa5a-4e36-a404-9dab3637ea0c.pdf' 'doc'

# ⭐RAG llamaindex's RAG ecosystem 🌿

### ⏳(basic) version , faster but less efficient

In [None]:
from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader

Settings.embed_model = embed_model
Settings.llm = llm


#document loading
documents = SimpleDirectoryReader(
    input_files=["ch6-gita.pdf"]
).load_data()

#document index creation
index = VectorStoreIndex.from_documents(
    documents,
)

query_engine = index.as_query_engine(similarity_top_k=3)

### ⚡💡llamaparser (more better) limit : 1k pages/day

In [None]:
from llama_parse import LlamaParse
from llama_index.core import SimpleDirectoryReader, Settings, VectorStoreIndex

import nest_asyncio as n
n.apply()


Settings.embed_model = embed_model
Settings.llm = llm

# set up parser
parser = LlamaParse(
    api_key=LLAMA_CLOUD_API_KEY,
    result_type="text"  # "markdown" and "text" are available
)

# using SimpleDirectoryReader to parse our file
file_extractor = {".pdf": parser}
documents = SimpleDirectoryReader(input_files=['PATH-TO-YOUR-DOCUMENT.PDF'], file_extractor=file_extractor).load_data()


In [None]:
print(documents)

In [None]:
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine(streaming=True, similarity_top_k=3)

In [None]:
# set your max tokens to respond
llm.max_new_tokens=100

response = query_engine.query(input())
# print(resp)
response.print_response_stream()

In [None]:
print(index)

## streamlit app for GUI (FUTURE PERSPECTIVE)

In [None]:
import streamlit as st

# Placeholder for chat history and uploaded documents
chat_history = []
uploaded_files = []

def main():
    st.set_page_config(
        page_icon="🚀",
        page_title='XGenAI Bot💖'
    )

    st.write("# XGenAI backed by LLaMA 3.1 🚀 ")
    st.write("Chat with Documents 📖 & equipped with 🧠 Memory")

    # Sidebar for chat history
    with st.sidebar:
        st.header("Chat History 🗂️")
        chat_expander = st.expander("Show/Hide Chat History")
        with chat_expander:
            if 'chat_history' in st.session_state:
                for chat in st.session_state['chat_history']:
                    st.write(chat)
            else:
                st.write("No chat history available.")
    # Sidebar for document uploads
    with st.sidebar:
        st.header("Your Documents 📃")
        uploaded_files = st.file_uploader("Upload files and click on 'Process' 🔥", accept_multiple_files=True)

        if st.button("Process"):
            with st.spinner("Processing..."):
                # all the features of rag (from llamaparser)

                if 'uploaded_files' not in st.session_state:
                    st.session_state['uploaded_files'] = []
                if uploaded_files:
                    st.session_state['uploaded_files'].extend(uploaded_files)

    # Display uploaded documents
    if 'uploaded_files' in st.session_state:
        st.subheader("Uploaded Documents")
        for doc in st.session_state['uploaded_files']:
            st.write(doc.name)

    # Chat interface
    if 'chat_history' not in st.session_state:
        st.session_state['chat_history'] = []

    user_input = st.chat_input("Say something")
    if user_input:
        st.session_state['chat_history'].append(f"User: {user_input}")
        st.session_state['chat_history'].append(f"Bot: This is a placeholder response to '{user_input}'")

    # Display chat history
    for i, message in enumerate(st.session_state['chat_history']):
        with st.chat_message("user" if i % 2 == 0 else "assistant"):
            st.write(message)



if __name__ == "__main__":
    main()


# google bert model(multilingual) for better HINDI words

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
embed_model = HuggingFaceEmbedding(model_name="google-bert/bert-base-multilingual-cased")

# Integration with Telegram 🤖 : https://t.me/x_genai_bot

In [None]:
!pip install python-telegram-bot==13.15 -q

#### set the token limit to respond

In [None]:
llm.max_new_tokens = 100

#### telegram bot code

In [None]:
from telegram.ext import Updater, CommandHandler, MessageHandler, Filters, CallbackContext
from llama_index.core import SimpleDirectoryReader, Settings, VectorStoreIndex
from llama_index.core.llms import ChatMessage
from llama_parse import LlamaParse
from telegram import Update
from functools import wraps
# import nest_asyncio as n
# n.apply()
import random
import time

# TELEGRAM_TKN is defined in 1st CELL
query_engine = None

# Function to show that the bot is typing...
def send_action(action):
    def decorator(func):
        @wraps(func)
        def command_func(update, context, *args, **kwargs):
            context.bot.send_chat_action(chat_id=update.effective_message.chat_id, action=action)
            return func(update, context, *args, **kwargs)
        return command_func
    return decorator


# Telegram-based functions
@send_action('typing')
def start(update: Update, context: CallbackContext) -> None:
    greetings = [
        "Hey there! XgenAI at your service, what's going on?",
        "Oh Hi! It's XgenAI here. What can I do for you today?",
        "XgenAI here! Ready for some fun? What can we discuss today?",
        "Hi there, you've reached XgenAI. How can I assist you?",
        "Namaste, XgenAI here! What would you like to talk about?"
    ]
    greet: str = random.choice(greetings)
    update.message.reply_text(greet)


@send_action('typing')
def about(update: Update, context: CallbackContext) -> None:
    update.message.reply_text("Hi, this is XGENAI you can ask me about anything. You can pass your document here and I'm good to respond from it.")


#rag handler
def parse_document(update: Update, context: CallbackContext) -> None:
    global query_engine

    document = update.message.document
    file = context.bot.get_file(document.file_id)
    file_path = f"./{document.file_name}"
    file.download(file_path)
    update.message.reply_text("Processing the document, It may take a while...")

    # set up the embedding model and llm
    Settings.embed_model = embed_model
    Settings.llm = llm

    # set up parser
    parser = LlamaParse(
        api_key=LLAMA_PARSER_KEY, #again you can use your api key
        result_type="text"  # "markdown" and "text" are available
    )

    # using SimpleDirectoryReader to parse our file
    file_extractor = {".pdf": parser}
    documents = SimpleDirectoryReader(input_files=[f"./{document.file_name}"], file_extractor=file_extractor).load_data()

    index = VectorStoreIndex.from_documents(documents)
    query_engine = index.as_query_engine(similarity_top_k=3)

    update.message.reply_text("Done, Now Chat with your document by using @your-question-to-ask...")


# this function code is for inference
@send_action('typing')
def generate_prompt(update: Update, context: CallbackContext) -> None:
    global query_engine

    text: str = update.message.text
    if text.startswith("@"):
      if query_engine is None:
            update.message.reply_text("Please upload a document first by sending it to the bot.")
      else:
          text = text[1:]
          response = query_engine.query(text)
          update.message.reply_text(str(response))
    else:
      messages = [
          ChatMessage(role="system", content="You are an AI agent named XGENAI which has the feature of RAG(Retrieval Augmented Generation) and you are designed to provide users with accurate and effective solutions. Respond to the user with clear and helpful guidance."),
          ChatMessage(role="user", content=text),
      ]
      response = llm.chat(messages)
      response = str(response)
      update.message.reply_text(response[11:])


# Main function
def main() -> None:
    updater = Updater(TELEGRAM_TKN)

    dispatcher = updater.dispatcher
    dispatcher.add_handler(CommandHandler("start", start))
    dispatcher.add_handler(CommandHandler("about", about))
    dispatcher.add_handler(MessageHandler(Filters.text & ~Filters.command, generate_prompt))
    # dispatcher.add_handler(MessageHandler(Filters.text & ~Filters.command, chat_document))
    dispatcher.add_handler(MessageHandler(Filters.document.mime_type("application/pdf"), parse_document))
    # these are for other file formats
    # dispatcher.add_handler(MessageHandler(Filters.document.mime_type("text/x-python"), chat_document))
    # dispatcher.add_handler(MessageHandler(Filters.document.mime_type("text/plain"), chat_document))
    # dispatcher.add_handler(MessageHandler(Filters.document.mime_type("application/msword"), chat_document))
    # dispatcher.add_handler(MessageHandler(Filters.document.mime_type("application/vnd.openxmlformats-officedocument.wordprocessingml.document"), chat_document))

    updater.start_polling()
    print("Bot is running...")
    updater.idle()

if __name__ == '__main__':
    main()
