In [None]:
from env_vars import *

In [None]:
"""
This two-stage program answers Islam-related questions by leveraging a large language model and searching through authoritative Islamic sources. In the first stage, the program accepts a question, determines the language, and generates relevant search queries for Hadith, Quran, and the Encyclopedia of Islamic Jurisprudence (Mawsuah), if necessary. If a direct answer can be provided without searching, it is returned at this stage. Otherwise, the search queries are executed, and relevant results are gathered. In the second stage, the original question, its language, and the search results are used to generate a factually based final answer. The answer is provided in the same language as the input question.
"""

import dspy
from pydantic import BaseModel, Field

class Stage1InputQuery(BaseModel):
    query: str = Field(description="Islam-related question")

class Stage1OutputResult(BaseModel):    
    language: str = Field(description="Language of the question")
    requires_hadith_search: bool = Field(description="Indicates if a Hadith search is required")
    hadith_search_query: str = Field(description="Query for Hadith search in Arabic")
    requires_quran_search: bool = Field(description="Indicates if a Quran search is required")
    quran_search_query: str = Field(description="Query for Quran search in Arabic")
    requires_mawsuah_search: bool = Field(description="Indicates if a search in the Encyclopedia of Islamic Jurisprudence (Mawsuah) is required")
    mawsuah_search_query: str = Field(description="Query for Mawsuah search in Arabic")
    direct_answer: str = Field(description="Direct answer to the question in the same language as the question, if no search is required")

class GenerateStage1Output(dspy.Signature):
    """Formulate search queries for Islamic sources only when a direct answer is not possible, based on a given question, to optimize the retrieval of relevant search results.

    Accept an Islam-related question, which may not be in Arabic, and construct queries for Hadith, Quran, and Mawsuah in the Arabic language. The goal is to maximize the likelihood of obtaining pertinent search results by conducting searches only when a direct answer cannot be provided without them.
    """
    input: Stage1InputQuery = dspy.InputField()
    output: Stage1OutputResult = dspy.OutputField()

class Stage2InputData(BaseModel):
    question: str = Field(description="An Islam-related question")
    language: str = Field(description="Language of the question")
    hadith_results: list[str] = Field(description="List of Hadith search results that might be relevant to the question")
    quran_results: list[str] = Field(description="List of Quran search results that might be relevant to the question")
    mawsuah_results: list[str] = Field(description="List of Mawsuah (Encyclopedia of Islamic Jurisprudence) search results that might be relevant to the question")

class Stage2OutputAnswer(BaseModel):
    answer: str = Field(description="Final answer to the question, based on the relevant search results from Hadith, Quran, and Mawsuah, in the same language as the question")

class GenerateStage2FinalAnswer(dspy.Signature):
    """Produce a factually based final answer to an Islam-related question using search results.

    Accept an input containing the original question, its language, and search results from Hadith, Quran, and Al Mausu'ah Al Fiqhiyah Al Kuwaitiyah (Mawsuah). Return the final answer to the question, synthesized from the provided search results and articulated in the same language as the input question.
    """
    input: Stage2InputData = dspy.InputField()
    output: Stage2OutputAnswer = dspy.OutputField()

stage1_predictor = dspy.TypedPredictor(GenerateStage1Output, max_retries=5, explain_errors=True)
stage2_predictor = dspy.TypedPredictor(GenerateStage2FinalAnswer, max_retries=5, explain_errors=True)

from tools.search_hadith import SearchHadith
from tools.search_quran import SearchQuran
from tools.search_mawsuah import SearchMawsuah

llm_gpt4 = dspy.OpenAI(model='gpt-4', api_key=openai_key)
llm_gpt3 = dspy.OpenAI(model='gpt-3.5', api_key=openai_key)
llm_ollama_phi3 = dspy.OllamaLocal(model='phi3')
llm_ollama_llama3 = dspy.OllamaLocal(model='llama3:instruct')
ms = SearchMawsuah(auth_token=VECTARA_AUTH_TOKEN, customer_id=VECTARA_CUSTOMER_ID, corpus_id=VECTARA_CORPUS_ID)
hs = SearchHadith(api_key=kalimat_api_key)
qs = SearchQuran(api_key=kalimat_api_key)

from pprint import pprint

def process_question(question: str):
    # Stage 1: Generate search queries or provide a direct answer
    with dspy.context(lm=llm_gpt4):
        stage1_input = Stage1InputQuery(query=question)
        stage1_prediction = stage1_predictor(input=stage1_input)
        pprint(stage1_prediction.output.dict())

    is_direct_answer = not stage1_prediction.output.requires_hadith_search and not stage1_prediction.output.requires_quran_search and not stage1_prediction.output.requires_mawsuah_search and stage1_prediction.output.direct_answer.strip()

    if is_direct_answer:
        return stage1_prediction.output.direct_answer

    # Stage 1 search queries results
    relevant_hadiths = []
    if stage1_prediction.output.hadith_search_query:
        relevant_hadiths = hs.run_as_list(stage1_prediction.output.hadith_search_query)
        pprint(relevant_hadiths)

    relevant_quran = []
    if stage1_prediction.output.quran_search_query:
        relevant_quran = qs.run_as_list(stage1_prediction.output.quran_search_query)
        pprint(relevant_quran)

    relevant_mawsuah = []
    if stage1_prediction.output.mawsuah_search_query:
        relevant_mawsuah = ms.run_as_list(stage1_prediction.output.mawsuah_search_query)
        pprint(relevant_mawsuah)

    # Stage 2: Generate the final answer using search results
    with dspy.context(lm=llm_gpt4):
        stage2_input = Stage2InputData(question=question, language=stage1_prediction.output.language, hadith_results=relevant_hadiths, quran_results=relevant_quran, mawsuah_results=relevant_mawsuah)
        stage2_prediction = stage2_predictor(input=stage2_input)
        pprint(stage2_prediction.output.dict())
    return stage2_prediction.output.answer

question = "kadinlarin basortusu takmalari sart mi?"
final_answer = process_question(question)
print(final_answer)


In [None]:
llm_gpt4.inspect_history()