### Similarity search and MMR search

In [4]:
# Import required libraries
import pandas as pd
import numpy as np

#for creating embeddings
import pandas as pd
import os
import shutil
import time
import json
import chromadb
from langchain.embeddings import OpenAIEmbeddings
from langchain.docstore.document import Document
from langchain.vectorstores import Chroma


#Semantic Search functionality
from langchain_openai.chat_models import ChatOpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo

#QA functionality
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.prompts import PromptTemplate

#Chat functionality
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

## Warning control
import warnings
warnings.filterwarnings("ignore")

In [5]:
embeddings_model = OpenAIEmbeddings()

client = chromadb.PersistentClient(path='assets/bible/chromadb')
vectorstore = Chroma(client=client, embedding_function=embeddings_model, persist_directory='assets/bible/chromadb')

#query function (similarity search)
def ask_bible_sr(question, k=3):
    docs = vectorstore.similarity_search(question, k=k)
    print(f"Question: {question}\n")
    for i, doc in enumerate(docs):
        print(f"Response {i+1}:")
        print(f"Text: {doc.page_content}")
        print(f"Book: {doc.metadata['book']}, Chapter: {doc.metadata['chapter']}, Verse: {doc.metadata['verse']}\n")

def ask_bible_mmr(question, k=3):
    docs = vectorstore.max_marginal_relevance_search(question, k=k)
    print(f"Question: {question}\n")
    for i, doc in enumerate(docs):
        print(f"Response {i+1}:")
        print(f"Text: {doc.page_content}")
        print(f"Book: {doc.metadata['book']}, Chapter: {doc.metadata['chapter']}, Verse: {doc.metadata['verse']}\n")

In [6]:
vectorstore.get()['metadatas'][0]

{'book': 'Genesis', 'chapter': 1, 'verse': 1}

In [20]:
import chromadb

print(chromadb.__version__)

0.4.0


In [7]:
ask_bible_sr("Who is adam?", k=3)

Question: Who is adam?

Response 1:
Text: For Adam was first formed, then Eve.
Book: 1 Timothy, Chapter: 2, Verse: 13

Response 2:
Text: Adam, Sheth, Enosh,
Book: 1 Chronicles, Chapter: 1, Verse: 1

Response 3:
Text: This [is] the book of the generations of Adam. In the day that God created man, in the likeness of God made he him;
Book: Genesis, Chapter: 5, Verse: 1



In [8]:
ask_bible_mmr("Who is adam?", k=3)

Question: Who is adam?

Response 1:
Text: For Adam was first formed, then Eve.
Book: 1 Timothy, Chapter: 2, Verse: 13

Response 2:
Text: Adam, Sheth, Enosh,
Book: 1 Chronicles, Chapter: 1, Verse: 1

Response 3:
Text: And the LORD God called unto Adam, and said unto him, Where [art] thou?
Book: Genesis, Chapter: 3, Verse: 9



In [9]:
#show example metadata
documents = vectorstore._collection.get()
print("Metadata examples:", documents["metadatas"][:3])  # Print first 3 metadata entries

# OR to see everything in a more readable format:
for doc, metadata in zip(documents["documents"][:3], documents["metadatas"][:3]):
    print("\nDocument:", doc[:100], "...")  # First 100 chars of document
    print("Metadata:", metadata)

Metadata examples: [{'book': 'Genesis', 'chapter': 1, 'verse': 1}, {'book': 'Genesis', 'chapter': 1, 'verse': 2}, {'book': 'Genesis', 'chapter': 1, 'verse': 3}]

Document: In the beginning God created the heaven and the earth. ...
Metadata: {'book': 'Genesis', 'chapter': 1, 'verse': 1}

Document: And the earth was without form, and void; and darkness [was] upon the face of the deep. And the Spir ...
Metadata: {'book': 'Genesis', 'chapter': 1, 'verse': 2}

Document: And God said, Let there be light: and there was light. ...
Metadata: {'book': 'Genesis', 'chapter': 1, 'verse': 3}


### Q&A Search

In [10]:
metadata_field_info = [
    AttributeInfo(
        name="book",
        description="The book of the Bible",
        type="string",
    ),
    AttributeInfo(
        name="chapter",
        description="The chapter from the book of the Bible",
        type="integer",
    ),
    AttributeInfo(
        name="verse",
        description="The verse from the chapter of the book of the Bible",
        type="integer",
    ),
]

In [11]:
# Define QA function and prompt template
def QA(question, k = 5, fetch_k = 30):

    #define llm
    llm = ChatOpenAI(model='gpt-4o-mini', temperature=0)

    #define retriever
    document_content_description = "The holy bible"
    llm = ChatOpenAI(model='gpt-4o-mini', temperature=0)
    retriever = SelfQueryRetriever.from_llm(
        llm,
        vectorstore,
        document_content_description,
        metadata_field_info,
        verbose=False
    )

    #define prompt template
    template = """Use the following pieces of context to answer the question at the end. \n
    If you don't know the answer, just say that you don't know, don't try to make up an answer. \n
    Use five sentences maximum. Keep the answer as concise as possible.  \n
    <context>
    {context}
    </context>

    Question: {input}
    Helpful Answer:"""
    retrieval_qa_chat_prompt = PromptTemplate.from_template(template)

    combine_docs_chain = create_stuff_documents_chain(llm, retrieval_qa_chat_prompt)
    rag_chain = create_retrieval_chain(vectorstore.as_retriever(search_type = 'mmr', search_kwargs={'k':k, 'fetch_k':fetch_k}), combine_docs_chain)
    output = rag_chain.invoke({"input": question})

    return output

question = "Who was jesus christ?"
QA(question, k=3, fetch_k= 30)


{'input': 'Who was jesus christ?',
 'context': [Document(page_content='The beginning of the gospel of Jesus Christ, the Son of God;', metadata={'book': 'Mark', 'chapter': 1, 'verse': 1}),
  Document(page_content='He answered and said, Who is he, Lord, that I might believe on him?', metadata={'book': 'John', 'chapter': 9, 'verse': 36}),
  Document(page_content='Is Christ divided? was Paul crucified for you? or were ye baptized in the name of Paul?', metadata={'book': '1 Corinthians', 'chapter': 1, 'verse': 13})],
 'answer': 'Jesus Christ is referred to as the Son of God and is central to the Christian faith. He is believed to be the Messiah and the one who brings salvation to humanity. The gospel accounts describe his teachings, crucifixion, and resurrection. In the context provided, he is distinguished from other figures like Paul, emphasizing his unique role in Christianity.'}

### QA functionality (type 2)

In [12]:
from langchain_core.prompts import ChatPromptTemplate

#define llm
llm = ChatOpenAI(model='gpt-4o-mini', temperature=0)

#define retriever
document_content_description = "The holy bible"

retriever = vectorstore.as_retriever(
    search_type = 'mmr', 
    search_kwargs={'k':10, 'fetch_k':30}
    )

#define prompt template
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer the question at the end. "
    "If you don't know the answer, just say that you don't know, do not try to make up an answer. "
    "Use five sentences maximum. "
    "Keep the answer as concise as possible. "
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}")
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [13]:
question = 'who was jesus?'
response = rag_chain.invoke({"input": question})
response


{'input': 'who was jesus?',
 'context': [Document(page_content='The beginning of the gospel of Jesus Christ, the Son of God;', metadata={'book': 'Mark', 'chapter': 1, 'verse': 1}),
  Document(page_content='And knew her not till she had brought forth her firstborn son: and he called his name JESUS.', metadata={'book': 'Matthew', 'chapter': 1, 'verse': 25}),
  Document(page_content='And I said, Who art thou, Lord? And he said, I am Jesus whom thou persecutest.', metadata={'book': 'Acts', 'chapter': 26, 'verse': 15}),
  Document(page_content='And the multitude said, This is Jesus the prophet of Nazareth of Galilee.', metadata={'book': 'Matthew', 'chapter': 21, 'verse': 11}),
  Document(page_content='Be it known unto you all, and to all the people of Israel, that by the name of Jesus Christ of Nazareth, whom ye crucified, whom God raised from the dead, [even] by him doth this man stand here before you whole.', metadata={'book': 'Acts', 'chapter': 4, 'verse': 10}),
  Document(page_content='

### Chat functionality

In [14]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

In [15]:
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)


In [18]:
from langchain_core.messages import AIMessage, HumanMessage

chat_history = []

question = "Who was john the baptist?"
ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})
chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_1["answer"]),
    ]
)

second_question = "How did he die?"
ai_msg_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})

print(ai_msg_2["answer"])

John the Baptist was executed by beheading. This occurred after he was imprisoned by King Herod Antipas, who was influenced by Herodias, the wife of his brother Philip. Herodias held a grudge against John for condemning her marriage to Herod. During a banquet, Herodias' daughter danced for Herod, and he promised to grant her any request, which she fulfilled by asking for John's head on a platter.


In [19]:
ai_msg_2

{'input': 'How did he die?',
 'chat_history': [HumanMessage(content='Who was john the baptist?'),
  AIMessage(content='John the Baptist was a prophet who preached in the wilderness of Judea and is known for baptizing Jesus. He was sent from God and is recognized as a significant figure in Christianity, often regarded as the forerunner to Christ. John baptized with water and called people to repentance, emphasizing the coming of one greater than himself. He is considered one of the greatest prophets, as stated in the scriptures. His ministry and eventual martyrdom are key events in the New Testament.')],
 'context': [Document(page_content='In those days came John the Baptist, preaching in the wilderness of Judaea,', metadata={'book': 'Matthew', 'chapter': 3, 'verse': 1}),
  Document(page_content='The baptism of John, was it from heaven, or of men?', metadata={'book': 'Luke', 'chapter': 20, 'verse': 4}),
  Document(page_content='And said unto his servants, This is John the Baptist; he is