In [1]:
"""
This two-stage program answers Islam-related questions by leveraging a large language model and searching through authoritative Islamic sources, ensuring that all answers are according to the Sunni tradition. In the first stage, the program accepts a question, determines the language, and generates a relevant search query for Hadith, Quran, or the Encyclopedia of Islamic Jurisprudence (Mawsuah), if necessary. The search query is composed in the Arabic language to optimize the retrieval of relevant search results. If a direct answer can be provided without searching, it is returned at this stage. Otherwise, the search query is executed, and relevant results are gathered.

In the second stage, the original question, its language, the searched source, and the search results are used to generate a factually based final answer. The answer is provided in the same language as the input question, and it is concise, evidence-based, and cites classical and modern scholars from the Sunni tradition. When presenting the Qur'an, the program includes the ayah number, Arabic text, and translation (if the user's language is different from Arabic). The program only uses hadith that are the result of the search. If the hadith is from a function call, it is presented with the collection, LK id, text, and grade. Otherwise, the hadith is presented as 'I believe (though not 100% sure of the reference) there is a hadith that says: [text of hadith]'. The program emphasizes being specific about which scholars say something and is particularly careful about matters that are obligatory or prohibited.
"""

"\nThis two-stage program answers Islam-related questions by leveraging a large language model and searching through authoritative Islamic sources, ensuring that all answers are according to the Sunni tradition. In the first stage, the program accepts a question, determines the language, and generates a relevant search query for Hadith, Quran, or the Encyclopedia of Islamic Jurisprudence (Mawsuah), if necessary. The search query is composed in the Arabic language to optimize the retrieval of relevant search results. If a direct answer can be provided without searching, it is returned at this stage. Otherwise, the search query is executed, and relevant results are gathered.\n\nIn the second stage, the original question, its language, the searched source, and the search results are used to generate a factually based final answer. The answer is provided in the same language as the input question, and it is concise, evidence-based, and cites classical and modern scholars from the Sunni tra

In [2]:
# TODO:
# 4. provide ~50 example questions
# 5. define a metric for evaluation
# 6. run corpo or mirpo optimization

In [3]:
from env_vars import *

In [4]:
from enum import Enum
from pprint import pprint

import dspy
from pydantic import BaseModel, Field

from tools.search_hadith import SearchHadith
from tools.search_quran import SearchQuran
from tools.search_mawsuah import SearchMawsuah

In [5]:
# Stage 1
class SearchSource(str, Enum):
    HADITH = "Hadith"
    QURAN = "Quran"
    MAWSUAH = "Mawsuah"
    NONE = "None"

class Stage1InputQuery(BaseModel):
    query: str = Field(description="Islam-related question")

class Stage1OutputResult(BaseModel):
    language: str = Field(description="Language of the question")
    search_source: SearchSource = Field(description="Indicates the source for search: Hadith, Quran, Mawsuah, or None, based on the analysis of the question")
    search_query: str = Field(description="Optimized query for retrieving relevant information from the selected source in Arabic, excluding the source name")
    direct_answer: str = Field(description="Direct answer to the question in the same language as the question, if no search is required")

class GenerateStage1Output(dspy.Signature):
    """Formulate an optimized search query for retrieving relevant information from the selected Islamic source, only when a direct answer is not possible, based on a given question.

    Accept an Islam-related question, which may not be in Arabic, and determine the appropriate source (Hadith, Quran, or Mawsuah) for the search query which must be composed in the Arabic language, excluding the source name. Carefully read the user's question and write the search query or the direct answer, ensuring that it is according to the Sunni tradition.
    """
    input: Stage1InputQuery = dspy.InputField()
    output: Stage1OutputResult = dspy.OutputField()


In [6]:
class GenerateStage2FinalAnswer(dspy.Signature):
    """Produce a factually based final answer to an Islam-related question using search results.

    Accept an input containing the original question, its language, the searched source, and the search results. Carefully analyze and translate the content if necessary. Provide a concise, evidence-based answer that cites classical and modern scholars from the Sunni tradition, such as Al Ghazali, Ibn Al Qayyim, Ibn Taymiyah, Imam Shafiee, Imam Nawawi, Imam Abu Hanifah, Ibn Hajr al Asqalani, Imam Ahmad bin Hanbal, Imam Malik, Ibn Hazm, Yusuf Al Qaradawi, Yasir Qadhi, Ma'in Al Qudah, Shu'aib Al Arnaout, Hamza Yusuf, Zaid Shakir, Taqiuddin Usmani, Muhammad Shinqeeti, Ismail Menk, Omar Suleiman, Salman Al-Awdah, Jamaaluddin Zarabozo, and Yaser Birjas. Do not say 'Some scholars say' but rather be specific about which scholars say something. Be particularly careful about matters that are obligatory or prohibited.
    """
    question = dspy.InputField(desc="An Islam-related question")
    language = dspy.InputField(desc="Language of the question")
    searched_source = dspy.InputField(desc="The source that was searched: Hadith, Quran, or Mawsuah, based on the analysis of the question")
    search_results = dspy.InputField(desc="List of search results that might be relevant to the question")

    answer = dspy.OutputField(desc="Final answer to the question in the same language as the question, based on the relevant search results from Hadith, Quran, or Mawsuah. The answer is concise, evidence-based, and cites classical and modern scholars from the Sunni tradition. When presenting the Qur'an, include the ayah number, Arabic text, and translation (if the user's language is different from Arabic). Only use hadith that are the result of the search. If the hadith is from the search results, present it with the collection, LK id, text, and grade. Otherwise, present the hadith as 'I believe (though not 100% sure of the reference) there is a hadith that says: [text of hadith]'.")

In [17]:
class AnsariFlow(dspy.Module):
    def __init__(self,):
        self.llm = dspy.OpenAI(model='gpt-4o-2024-05-13', api_key=OPENAI_API_KEY)
        self.ms = SearchMawsuah(auth_token=VECTARA_AUTH_TOKEN, customer_id=VECTARA_CUSTOMER_ID, corpus_id=VECTARA_CORPUS_ID)
        self.hs = SearchHadith(api_key=KALEMAT_API_KEY)
        self.qs = SearchQuran(api_key=KALEMAT_API_KEY)
        self.stage1_predictor = dspy.TypedPredictor(GenerateStage1Output, max_retries=5, explain_errors=True)
        self.stage2_predictor = dspy.Predict(GenerateStage2FinalAnswer)
        

    def forward(self, question: str):
        # Stage 1: Generate search query or provide a direct answer if possible
        with dspy.context(lm=self.llm):
            stage1_input = Stage1InputQuery(query=question)
            stage1_prediction = self.stage1_predictor(input=stage1_input)
            pprint(stage1_prediction.output.dict())

        if stage1_prediction.output.direct_answer.strip():
            return stage1_prediction.output.direct_answer

        search_source = stage1_prediction.output.search_source
        search_query = stage1_prediction.output.search_query

        # Stage 1 search query results
        search_results = []
        if search_source == SearchSource.HADITH:
            search_results = self.hs.run_as_list(search_query)
            pprint(search_results)
        elif search_source == SearchSource.QURAN:
            search_results = self.qs.run_as_list(search_query)
            pprint(search_results)
        elif search_source == SearchSource.MAWSUAH:
            search_results = self.ms.run_as_list(search_query)
            pprint(search_results)

        # Stage 2: Generate the final answer using search results
        with dspy.context(lm=self.llm):
            stage2_prediction = self.stage2_predictor(question=question, language=stage1_prediction.output.language, searched_source=search_source.value, search_results="\n".join(search_results))
            pprint(stage2_prediction.answer)
        return stage2_prediction.answer

In [18]:
ansari = AnsariFlow()
question = "kadinlarin basortusu takmalari sart mi?"
ansari(question)

{'direct_answer': '',
 'language': 'Turkish',
 'search_query': 'الحجاب للنساء',
 'search_source': <SearchSource.QURAN: 'Quran'>}
Searching quran for "الحجاب للنساء"
['Ayah: 33:55\n'
 'Arabic Text: لَّا جُنَاحَ عَلَيْهِنَّ فِىٓ ءَابَآئِهِنَّ وَلَآ '
 'أَبْنَآئِهِنَّ وَلَآ إِخْوَٰنِهِنَّ وَلَآ أَبْنَآءِ إِخْوَٰنِهِنَّ '
 'وَلَآ أَبْنَآءِ أَخَوَٰتِهِنَّ وَلَا نِسَآئِهِنَّ وَلَا مَا مَلَكَتْ '
 'أَيْمَـٰنُهُنَّ ۗ وَٱتَّقِينَ ٱللَّهَ ۚ إِنَّ ٱللَّهَ كَانَ عَلَىٰ كُلِّ '
 'شَىْءٍۢ شَهِيدًا\n'
 '\n'
 'English Text: There is no blame on the Prophet’s wives ˹if they appear '
 'unveiled˺ before their fathers, their sons, their brothers, their brothers’ '
 'sons, their sisters’ sons, their fellow ˹Muslim˺ women, and those '
 '˹bondspeople˺ in their possession. And be mindful of Allah ˹O wives of the '
 'Prophet!˺ Surely Allah is a Witness over all things.\n'
 '\n',
 'Ayah: 33:53\n'
 'Arabic Text: يَـٰٓأَيُّهَا ٱلَّذِينَ ءَامَنُوا لَا تَدْخُلُوا بُيُوتَ '
 'ٱلنَّبِىِّ إِلَّآ أَن يُؤْذَنَ

'Question: Kadinlarin basortusu takmalari sart mi?\n\nLanguage: Turkish\n\nSearched Source: Quran\n\nSearch Results:\n1. Ayah: 33:55\n   Arabic Text: لَّا جُنَاحَ عَلَيْهِنَّ فِىٓ ءَابَآئِهِنَّ وَلَآ أَبْنَآئِهِنَّ وَلَآ إِخْوَٰنِهِنَّ وَلَآ أَبْنَآءِ إِخْوَٰنِهِنَّ وَلَآ أَبْنَآءِ أَخَوَٰتِهِنَّ وَلَا ن'

In [19]:
ansari.llm.inspect_history()




Produce a factually based final answer to an Islam-related question using search results.

    Accept an input containing the original question, its language, the searched source, and the search results. Carefully analyze and translate the content if necessary. Provide a concise, evidence-based answer that cites classical and modern scholars from the Sunni tradition, such as Al Ghazali, Ibn Al Qayyim, Ibn Taymiyah, Imam Shafiee, Imam Nawawi, Imam Abu Hanifah, Ibn Hajr al Asqalani, Imam Ahmad bin Hanbal, Imam Malik, Ibn Hazm, Yusuf Al Qaradawi, Yasir Qadhi, Ma'in Al Qudah, Shu'aib Al Arnaout, Hamza Yusuf, Zaid Shakir, Taqiuddin Usmani, Muhammad Shinqeeti, Ismail Menk, Omar Suleiman, Salman Al-Awdah, Jamaaluddin Zarabozo, and Yaser Birjas. Do not say 'Some scholars say' but rather be specific about which scholars say something. Be particularly careful about matters that are obligatory or prohibited.

---

Follow the following format.

Question: An Islam-related question

Language: Lan

"\n\n\nProduce a factually based final answer to an Islam-related question using search results.\n\n    Accept an input containing the original question, its language, the searched source, and the search results. Carefully analyze and translate the content if necessary. Provide a concise, evidence-based answer that cites classical and modern scholars from the Sunni tradition, such as Al Ghazali, Ibn Al Qayyim, Ibn Taymiyah, Imam Shafiee, Imam Nawawi, Imam Abu Hanifah, Ibn Hajr al Asqalani, Imam Ahmad bin Hanbal, Imam Malik, Ibn Hazm, Yusuf Al Qaradawi, Yasir Qadhi, Ma'in Al Qudah, Shu'aib Al Arnaout, Hamza Yusuf, Zaid Shakir, Taqiuddin Usmani, Muhammad Shinqeeti, Ismail Menk, Omar Suleiman, Salman Al-Awdah, Jamaaluddin Zarabozo, and Yaser Birjas. Do not say 'Some scholars say' but rather be specific about which scholars say something. Be particularly careful about matters that are obligatory or prohibited.\n\n---\n\nFollow the following format.\n\nQuestion: An Islam-related question\n\

In [None]:
# TODO:
# stage 2 must not be a typed predictor. because it is failing

# LLM:
# 1. speak in definitive terms when necessary: only when something is ما عُلِمَ من الدين بالضرورة
# 2. use your quran and hadith knowledge when searching in them. When searching in Mawsuah, use your common sense to craft queries. The queries must retrieve relevant results. Don't write queries that ignores the content of the Quran or Hadith, the query will be used to serch directly in the sources, not intelligent AI will preprocess them, so craft them accordingly.

In [20]:
"""
This two-stage program answers Islam-related questions by leveraging a large language model and searching through authoritative Islamic sources, ensuring that all answers are according to the Sunni tradition. In the first stage, the program accepts a question, determines the language, and generates a relevant search query for Hadith, Quran, or the Encyclopedia of Islamic Jurisprudence (Mawsuah), if necessary. The search query is composed in the Arabic language to optimize the retrieval of relevant search results. If a direct answer can be provided without searching, it is returned at this stage. Otherwise, the search query is executed, and relevant results are gathered.

In the second stage, the original question, its language, the searched source, and the search results are used to generate a factually based final answer. The answer is provided in the same language as the input question, and it is concise, evidence-based, and cites classical and modern scholars from the Sunni tradition. When presenting the Qur'an, the program includes the ayah number, Arabic text, and translation (if the user's language is different from Arabic). The program only uses hadith that are the result of the search. If the hadith is from a function call, it is presented with the collection, LK id, text, and grade. Otherwise, the hadith is presented as 'I believe (though not 100% sure of the reference) there is a hadith that says: [text of hadith]'. The program emphasizes being specific about which scholars say something and is particularly careful about matters that are obligatory or prohibited.
"""

from enum import Enum
from pprint import pprint

import dspy
from pydantic import BaseModel, Field

from env_vars import *
from tools.search_quran import SearchQuran
from tools.search_hadith import SearchHadith
from tools.search_mawsuah import SearchMawsuah

# Stage 1
class SearchSource(str, Enum):
    HADITH = "Hadith"
    QURAN = "Quran"
    MAWSUAH = "Mawsuah"
    NONE = "None"

class Stage1InputQuery(BaseModel):
    query: str = Field(description="Islam-related question")

class Stage1OutputResult(BaseModel):
    language: str = Field(description="Language of the question")
    search_source: SearchSource = Field(description="Indicates the source for search: Hadith, Quran, Mawsuah, or None, based on the analysis of the question. Uses definitive terms for established Islamic rulings (ما عُلِمَ من الدين بالضرورة)")
    search_query: str = Field(description="Optimized query for retrieving relevant information from the selected source in Arabic, crafted using Quranic knowledge, Hadith knowledge, or common sense for Mawsuah")
    direct_answer: str = Field(description="Direct answer to the question in the same language as the question, if no search is required, particularly for matters that are definitively known (ما عُلِمَ من الدين بالضرورة)")

class GenerateStage1Output(dspy.Signature):
    """Formulate an optimized search query for retrieving relevant information from the selected Islamic source, only when a direct answer is not possible, based on a given question.

    Accept an Islam-related question, which may not be in Arabic, and determine the appropriate source (Hadith, Quran, or Mawsuah) for the search query which must be composed in the Arabic language. Use definitive terms for well-known Islamic rulings (المعلوم من الدين بالضرورة). When crafting a Quran search query, use specific Quranic knowledge to choose keywords that are guaranteed to bring back relevant results. When crafting a search query for Hadith, use specific Hadith knowledge to ensure relevant Hadiths are retrieved. For Mawsuah, use common sense to craft queries. The queries must be designed to retrieve relevant results directly from the sources.
    """
    input: Stage1InputQuery = dspy.InputField()
    output: Stage1OutputResult = dspy.OutputField()

class GenerateStage2FinalAnswer(dspy.Signature):
    """Produce a factually based final answer to an Islam-related question using search results.

    Accept an input containing the original question, its language, the searched source, and the search results. Carefully analyze and translate the content if necessary. Provide a concise, evidence-based answer that cites specific classical and modern scholars from the Sunni tradition, such as Al Ghazali, Ibn Al Qayyim, Ibn Taymiyah, Imam Shafiee, Imam Nawawi, Imam Abu Hanifah, Ibn Hajr al Asqalani, Imam Ahmad bin Hanbal, Imam Malik, Ibn Hazm, Yusuf Al Qaradawi, Yasir Qadhi, Ma'in Al Qudah, Shu'aib Al Arnaout, Hamza Yusuf, Zaid Shakir, Taqiuddin Usmani, Muhammad Shinqeeti, Ismail Menk, Omar Suleiman, Salman Al-Awdah, Jamaaluddin Zarabozo, and Yaser Birjas. Avoid vague references such as 'Some scholars say.' Be specific about which scholars hold a particular view. Ensure the answer is definitive and evidence-based, including relevant details from Quran and Hadith when available. When presenting Quranic references, include the ayah number, Arabic text, and translation if the user's language is different from Arabic. Only use hadith that are the result of the search. If the hadith is from the search results, present it with the collection, LK id, text, and grade. Otherwise, state that you believe (though not 100% sure of the reference) there is a hadith that says: [text of hadith]. Ensure the answer is comprehensive and well-supported by evidence.
    """
    question = dspy.InputField(desc="An Islam-related question")
    language = dspy.InputField(desc="Language of the question")
    searched_source = dspy.InputField(desc="The source that was searched: Hadith, Quran, or Mawsuah, based on the analysis of the question")
    search_results = dspy.InputField(desc="List of search results that might be relevant to the question")

    answer = dspy.OutputField(desc="Final answer to the question in the same language as the question, based on the relevant search results from Hadith, Quran, or Mawsuah. The answer is concise, evidence-based, and cites specific classical and modern scholars from the Sunni tradition. When presenting the Qur'an, include the ayah number, Arabic text, and translation (if the user's language is different from Arabic). Only use hadith that are the result of the search. If the hadith is from the search results, present it with the collection, LK id, text, and grade. Otherwise, present the hadith as 'I believe (though not 100% sure of the reference) there is a hadith that says: [text of hadith]'. Ensure the answer is definitive and evidence-based, including relevant details from Quran and Hadith when available.")

class AnsariFlow(dspy.Module):
    def __init__(self,):
        self.llm = dspy.OpenAI(model='gpt-4o-2024-05-13', api_key=OPENAI_API_KEY)
        self.ms = SearchMawsuah(auth_token=VECTARA_AUTH_TOKEN, customer_id=VECTARA_CUSTOMER_ID, corpus_id=VECTARA_CORPUS_ID)
        self.hs = SearchHadith(api_key=KALEMAT_API_KEY)
        self.qs = SearchQuran(api_key=KALEMAT_API_KEY)
        self.stage1_predictor = dspy.TypedPredictor(GenerateStage1Output, max_retries=5, explain_errors=True)
        self.stage2_predictor = dspy.Predict(GenerateStage2FinalAnswer)

    def forward(self, question: str):
        # Stage 1: Generate search query or provide a direct answer if possible
        with dspy.context(lm=self.llm):
            stage1_input = Stage1InputQuery(query=question)
            stage1_prediction = self.stage1_predictor(input=stage1_input)
            pprint(stage1_prediction.output.dict())

        if stage1_prediction.output.direct_answer.strip():
            return stage1_prediction.output.direct_answer

        search_source = stage1_prediction.output.search_source
        search_query = stage1_prediction.output.search_query

        # Stage 1 search query results
        search_results = []
        if search_source == SearchSource.HADITH:
            search_results = self.hs.run_as_list(search_query)
            pprint(search_results)
        elif search_source == SearchSource.QURAN:
            search_results = self.qs.run_as_list(search_query)
            pprint(search_results)
        elif search_source == SearchSource.MAWSUAH:
            search_results = self.ms.run_as_list(search_query)
            pprint(search_results)

        # Stage 2: Generate the final answer using search results
        with dspy.context(lm=self.llm):
            stage2_prediction = self.stage2_predictor(question=question, language=stage1_prediction.output.language, searched_source=search_source.value, search_results="\n".join(search_results))
            pprint(stage2_prediction.answer)
        return stage2_prediction.answer

ansari = AnsariFlow()
question = "kadinlarin basortusu takmalari sart mi?"
answer = ansari(question)



{'direct_answer': '',
 'language': 'Turkish',
 'search_query': 'حجاب النساء في الإسلام',
 'search_source': <SearchSource.QURAN: 'Quran'>}
Searching quran for "حجاب النساء في الإسلام"
['Ayah: 33:55\n'
 'Arabic Text: لَّا جُنَاحَ عَلَيْهِنَّ فِىٓ ءَابَآئِهِنَّ وَلَآ '
 'أَبْنَآئِهِنَّ وَلَآ إِخْوَٰنِهِنَّ وَلَآ أَبْنَآءِ إِخْوَٰنِهِنَّ '
 'وَلَآ أَبْنَآءِ أَخَوَٰتِهِنَّ وَلَا نِسَآئِهِنَّ وَلَا مَا مَلَكَتْ '
 'أَيْمَـٰنُهُنَّ ۗ وَٱتَّقِينَ ٱللَّهَ ۚ إِنَّ ٱللَّهَ كَانَ عَلَىٰ كُلِّ '
 'شَىْءٍۢ شَهِيدًا\n'
 '\n'
 'English Text: There is no blame on the Prophet’s wives ˹if they appear '
 'unveiled˺ before their fathers, their sons, their brothers, their brothers’ '
 'sons, their sisters’ sons, their fellow ˹Muslim˺ women, and those '
 '˹bondspeople˺ in their possession. And be mindful of Allah ˹O wives of the '
 'Prophet!˺ Surely Allah is a Witness over all things.\n'
 '\n',
 'Ayah: 24:60\n'
 'Arabic Text: وَٱلْقَوَٰعِدُ مِنَ ٱلنِّسَآءِ ٱلَّـٰتِى لَا يَرْجُونَ '
 'نِكَاحًا فَلَي

'Question: Kadinlarin basortusu takmalari sart mi?\n\nLanguage: Turkish\n\nSearched Source: Quran\n\nSearch Results: \n1. Ayah: 33:55\n   Arabic Text: لَّا جُنَاحَ عَلَيْهِنَّ فِىٓ ءَابَآئِهِنَّ وَلَآ أَبْنَآئِهِنَّ وَلَآ إِخْوَٰنِهِنَّ وَلَآ أَبْنَآءِ إِخْوَٰنِهِنَّ وَلَآ أَبْنَآءِ أَخَوَٰتِهِنَّ وَلَا'