In [1]:
!pip install jupyter openai minsearch requests python-dotenv



### RAG

In [4]:
import json

# The data was cleaned in extended-data-preprocessing.ipynb
with open("quran_with_tafsir.json", "r", encoding="utf-8") as f:
    documents = json.load(f)

In [5]:
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["surah_number", "surah_name", "surah_translation", "ayah_number", "reference", "text", "language", "tafsir_text", "tafsir_source"]
)

index.fit(documents)

<minsearch.append.AppendableIndex at 0x10611f6e0>

In [7]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        boost_dict=boost,
        num_results=5,
        output_ids=True
    )

    return results

In [8]:
prompt_template = """
You are an Imam and a teacher of the Qur’an. 
Answer the QUESTION using only the CONTEXT provided (which contains Qur’an verses and tafsir). 
Do not add information that is not in the CONTEXT. 
If the answer cannot be found in the CONTEXT, say you do not know.

When answering:
- Use clear, respectful, and simple language. 
- Quote directly from the Qur’an or tafsir when relevant. 
- Always include the surah and ayah reference (e.g., Surah Al-Fatiha 1:5).
- If the Qur’an text alone does not fully answer the QUESTION and you use tafsir (explanatory commentary) to clarify, explicitly label it as 'Tafsir clarification'.

Format your answer exactly like this:

Qur’an evidence:
<quote Quran verses used>

Tafsir clarification (if needed):
<quote tafsir used or write 'Not needed' if Quran text is enough>

Conclusion:
<Your answer in clear, concise language>

<QUESTION>
{question}
</QUESTION>

<CONTEXT>
{context}
</CONTEXT>

<ANSWER>
""".strip()


In [9]:
def build_prompt(query, search_results):
    context = ""

    for doc in search_results:
        context = context + f"surah_name: {doc['surah_name']}\nreference: {doc['reference']}\nquestion: {query}\nquran_text: {doc['text']}\ntafsir: {doc["tafsir_text"]}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [27]:
question = 'Which verse mentions fasting in Ramadan?'

In [29]:
search_results = search(question)

In [42]:
search_results

[{'surah_number': 19,
  'surah_name': 'Maryam',
  'surah_translation': 'Mary',
  'ayah_number': 80,
  'reference': '19:80',
  'text': 'And We will inherit him [in] what he mentions, and he will come to Us alone',
  'language': 'English',
  'tafsir_text': 'Refuting the Disbelievers Who claim that They will be given Wealth and Children in the Hereafter Imam Ahmad reported from Khabbab bin Al-Aratt that he said, "I was a blacksmith and Al-" ®256؛لs bin Wa\'il owed me a debt. So I went to him to collect my debt from him. He said to me, `No, by Allah, I will not pay my debt to you until you disbelieve in Muhammad ﷺ.\' I replied to him, `No, by Allah, I will not disbelieve in Muhammad ﷺ until you die and are resurrected again.\' He then said to me, `Verily, if I die and am resurrected, and you come to me, I will also have abundance of wealth and children and I will repay you then.\' Then, Allah revealed these Ayat, أَفَرَأَيْتَ الَّذِى كَفَرَ بِـَايَـتِنَا وَقَالَ لأوتَيَنَّ مَالاً وَوَلَداً

In [31]:
prompt = build_prompt(question, search_results)

In [34]:
from dotenv import load_dotenv
import os
load_dotenv()

True

In [36]:
from openai import OpenAI
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content

def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [38]:
answer = rag(question)
print(answer)

Qur’an evidence:
Not found in the provided context.

Tafsir clarification:
Not needed.

Conclusion:
I do not know.
