In [None]:
from env_vars import *

In [None]:
"""
This two-stage program answers Islam-related questions by leveraging a large language model and searching through authoritative Islamic sources. In the first stage, the program accepts a question, determines the language, and generates a relevant search query for Hadith, Quran, or the Encyclopedia of Islamic Jurisprudence (Mawsuah), if necessary. If a direct answer can be provided without searching, it is returned at this stage. Otherwise, the search query is executed, and relevant results are gathered. In the second stage, the original question, its language, and the search results are used to generate a factually based final answer. The answer is provided in the same language as the input question.
"""

import dspy
from enum import Enum, auto
from pydantic import BaseModel, Field

class SearchSource(str, Enum):
    HADITH = "Hadith"
    QURAN = "Quran"
    MAWSUAH = "Mawsuah"
    NONE = "None"

class Stage1InputQuery(BaseModel):
    query: str = Field(description="Islam-related question")

class Stage1OutputResult(BaseModel):
    language: str = Field(description="Language of the question")
    search_source: SearchSource = Field(description="Indicates the source for search: Hadith, Quran, Mawsuah, or None")
    search_query: str = Field(description="Query for the specified search source in Arabic")
    direct_answer: str = Field(description="Direct answer to the question in the same language as the question, if no search is required")

class GenerateStage1Output(dspy.Signature):
    """Formulate a search query for an Islamic source only when a direct answer is not possible, based on a given question, to optimize the retrieval of relevant search results.

    Accept an Islam-related question, which may not be in Arabic, and determine the appropriate source (Hadith, Quran, or Mawsuah) for the search query in the Arabic language. The goal is to maximize the likelihood of obtaining pertinent search results by conducting a search only when a direct answer cannot be provided without it.
    """
    input: Stage1InputQuery = dspy.InputField()
    output: Stage1OutputResult = dspy.OutputField()

class Stage2InputData(BaseModel):
    question: str = Field(description="An Islam-related question")
    language: str = Field(description="Language of the question")
    searched_source: SearchSource = Field(description="The source that was searched: Hadith, Quran, or Mawsuah")
    search_results: list[str] = Field(description="List of search results that might be relevant to the question")

class Stage2OutputAnswer(BaseModel):
    answer: str = Field(description="Final answer to the question in the same language as the question, based on the relevant search results from Hadith, Quran, or Mawsuah")

class GenerateStage2FinalAnswer(dspy.Signature):
    """Produce a factually based final answer to an Islam-related question using search results.

    Accept an input containing the original question, its language, the searched source, and the search results. Return the final answer to the question, synthesized from the provided search results and articulated in the same language as the input question.
    """
    input: Stage2InputData = dspy.InputField()
    output: Stage2OutputAnswer = dspy.OutputField()

stage1_predictor = dspy.TypedPredictor(GenerateStage1Output, max_retries=5, explain_errors=True)
stage2_predictor = dspy.TypedPredictor(GenerateStage2FinalAnswer, max_retries=5, explain_errors=True)

from tools.search_hadith import SearchHadith
from tools.search_quran import SearchQuran
from tools.search_mawsuah import SearchMawsuah

llm_gpt4 = dspy.OpenAI(model='gpt-4-turbo-2024-04-09', api_key=openai_key)
llm_gpt3 = dspy.OpenAI(model='gpt-3.5-turbo-0125', api_key=openai_key)
ms = SearchMawsuah(auth_token=VECTARA_AUTH_TOKEN, customer_id=VECTARA_CUSTOMER_ID, corpus_id=VECTARA_CORPUS_ID)
hs = SearchHadith(api_key=kalimat_api_key)
qs = SearchQuran(api_key=kalimat_api_key)

from pprint import pprint

def process_question(question: str):
    # Stage 1: Generate search query or provide a direct answer
    with dspy.context(lm=llm_gpt4):
        stage1_input = Stage1InputQuery(query=question)
        stage1_prediction = stage1_predictor(input=stage1_input)
        pprint(stage1_prediction.output.dict())

    if stage1_prediction.output.direct_answer.strip():
        return stage1_prediction.output.direct_answer

    search_source = stage1_prediction.output.search_source
    search_query = stage1_prediction.output.search_query

    # Stage 1 search query results
    search_results = []
    if search_source == SearchSource.HADITH:
        search_results = hs.run_as_list(search_query)
        pprint(search_results)
    elif search_source == SearchSource.QURAN:
        search_results = qs.run_as_list(search_query)
        pprint(search_results)
    elif search_source == SearchSource.MAWSUAH:
        search_results = ms.run_as_list(search_query)
        pprint(search_results)

    # Stage 2: Generate the final answer using search results
    with dspy.context(lm=llm_gpt4):
        stage2_input = Stage2InputData(question=question, language=stage1_prediction.output.language, searched_source=search_source, search_results=search_results)
        stage2_prediction = stage2_predictor(input=stage2_input)
        pprint(stage2_prediction.output.dict())
    return stage2_prediction.output.answer

question = "kadinlarin basortusu takmalari sart mi?"
final_answer = process_question(question)
pprint(final_answer)
