# Retrieval Augmented Generation

- Install GPT4All to run locally LLMs: https://gpt4all.io/index.html

- Within GPT4All, setup `Llama 3 instruct` and `SBERT` for your RAG application.

- Set a folder as a database, and populate it with files from your choosing (.pdf or .txt.). 

- If you want to use the publication database of DIT, you can use papers published since 2022, as they are less likely to make the model rely on its learned vectors. (https://www.th-deg.de/publication-database)

- Deactivate the database and compare the quality of the retrieved information.

- Explain how Llama-3 is able to be run on your local machine.


### Advanced:
- Instead of using the GPT4All models, write Python code to retrieve the relevant context with SBERT, and use the DIT API for LLMs to send the context with a prompt and generate a text that answers the prompt with that relevant context. (DIT API: http://vnesim.th-deg.de:8080/). 

For example: you retrieve 3 paragraphs from your indexed local database, and then you send the model "(insert the 3 paragraphs). Taking this text as context, (insert the question)".


In [None]:
import json
import os
import sys
import boto3
import streamlit as st
from langchain.embeddings import BedrockEmbeddings
from langchain.llms.bedrock import Bedrock
import numpy as np
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

AWS_REGION = 'us-east-1'
## Initialize Bedrock Clients
bedrock = boto3.client(service_name="bedrock-runtime",region_name = AWS_REGION)
bedrock_embeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v1", client=bedrock)

## Data ingestion function
def data_ingestion():
    loader = PyPDFDirectoryLoader("data")
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
    docs = text_splitter.split_documents(documents)
    return docs

## Vector Embedding and vector store creation
def get_vector_store(docs):
    if not os.path.exists("faiss_index"):
        os.makedirs("faiss_index")
    vectorstore_faiss = FAISS.from_documents(docs, bedrock_embeddings)
    vectorstore_faiss.save_local("faiss_index")

## Function to get Llama3 LLM
def get_llama3_llm():
    llm = Bedrock(model_id="meta.llama3-8b-instruct-v1:0", client=bedrock, model_kwargs={'max_gen_len': 512})
    return llm

## Prompt template
prompt_template = """
Human: Use the following pieces of context to provide a concise answer to the question at the end but use at least summarize with 250 words with detailed explanations. If you don't know the answer, just say that you don't know, don't try to make up an answer.
<context>
{context}
</context>
Question: {question}
Assistant:
"""

PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

## Function to get response from LLM
def get_response_llm(llm, vectorstore_faiss, query):
    qa = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vectorstore_faiss.as_retriever(
            search_type="similarity", search_kwargs={"k": 3}
        ),
        return_source_documents=True,
        chain_type_kwargs={"prompt": PROMPT}
    )
    answer = qa({"query": query})
    return answer['result']

## Streamlit main function
def main():
    st.set_page_config("Chat PDF")
    st.header("Chat with PDF using AWS Bedrock")

    user_question = st.text_input("Ask a Question from the PDF Files")

    with st.sidebar:
        st.title("Update Or Create Vector Store:")
        if st.button("Vectors Update"):
            with st.spinner("Processing...."):
                docs = data_ingestion()
                get_vector_store(docs)
                st.success("Done")

    if st.button("Llama Output"):
        with st.spinner("Processing....."):
            if not os.path.exists("faiss_index/index.faiss"):
                st.error("FAISS index file not found. Please update vectors first.")
            else:
                faiss_index = FAISS.load_local("faiss_index", bedrock_embeddings, allow_dangerous_deserialization=True)
                llm = get_llama3_llm()
                st.write(get_response_llm(llm, faiss_index, user_question))
                st.success("Done")

if __name__ == "__main__":
    main()