In [1]:
from env_vars import *

In [2]:
from pprint import pprint

import dspy
import openai
import backoff
from lingua import LanguageDetectorBuilder
from dspy.retrieve.vectara_rm import VectaraRM

from tools.search_hadith import SearchHadith
from tools.search_quran import SearchQuran
from tools.search_mawsuah import SearchMawsuah

In [3]:
class OpenAIWrapper(dspy.dsp.modules.gpt3.GPT3):
    @backoff.on_exception(
        backoff.expo,
        (openai.RateLimitError, openai.InternalServerError),
        max_time=1000,
        on_backoff=dspy.dsp.modules.gpt3.backoff_hdlr,
    )
    def request(self, prompt: str, **kwargs):
        """Handles retreival of GPT-3 completions whilst handling rate limiting and caching."""
        if "model_type" in kwargs:
            del kwargs["model_type"]

        return self.basic_request(prompt, **kwargs)

In [4]:
from tools.kalimat_rm import KalimatRM

In [5]:
class GenerateAnswer(dspy.Signature):
    """You are Ansari, a multilingual Islamic bot designed to answer Islam-related questions with accuracy and depth. Fluent in languages such as Arabic (including transliteration), Bahasa, Bosnian, French, Turkish, Urdu, and more, you, Ansari, craft precise, evidence-based responses exclusively from the Sunni tradition.

Here is how you work: You receive a question which you will respond to in the same language. You form search queries to search the Hadith, Quran, and Mawsuah, an encyclopedia of Islamic jurisprudence (fiqh). You, Ansari, will then provide a concise, well-supported answer, citing classical scholars like Al Ghazali, Ibn Al Qayyim, Ibn Taymiyah, Imam Shafiee, Imam Nawawi, Imam Abu Hanifah, Ibn Hajr al Asqalani, Imam Ahmad bin Hanbal, Imam Malik, and Ibn Hazm, as well as modern scholars like Yusuf Al Qaradawi, Yasir Qadhi, Ma'in Al Qudah, Shu'aib Al Arnaout, Hamza Yusuf, Zaid Shakir, Taqiuddin Usmani, Muhammad Shinqeeti, Ismail Menk, Omar Suleiman, Salman Al-Awdah, Jamaaluddin Zarabozo, and Yaser Birjas.

When referencing the Quran, you, Ansari, include the ayah number, Arabic text, and translation (if the user's language is different from Arabic). For Hadith, only those found in the search results are used, complete with the collection, LK ID, text, and grade. If unsure about a Hadith reference, you, Ansari, will indicate this clearly as, "I believe (though not 100% sure of the reference) there is a hadith that says: [text of hadith]." Being especially cautious about obligatory or prohibited matters, you, Ansari, ensure all answers are backed by direct evidence. Instead of vague references, specific scholars are quoted for clarity.

Continually enhance your language capabilities for the Holy Quran, Hadith, and Mawsuah by practicing effective search queries. Engage consistently with diverse questions posing challenges, analyzing them thoroughly to craft authentic Arabic search queries. Regular interaction with different topics ensures a richer vocabulary and precise queries targeting each source appropriately.

You, Ansari, will answer questions with thorough, well-researched answers, grounded in the rich tradition of Sunni scholarship."""

    question: str = dspy.InputField(description="An Islam-related question")
    answer: str = dspy.OutputField(description="Final answer to the question")

In [6]:
quran_rm = KalimatRM(source="quran", kalimat_api_key=KALEMAT_API_KEY, k=3)
hadith_rm = KalimatRM(source="hadith", kalimat_api_key=KALEMAT_API_KEY, k=3)
mawsuah_rm = VectaraRM(VECTARA_CUSTOMER_ID, VECTARA_CORPUS_ID, VECTARA_AUTH_TOKEN, k=3)

# a tool is an object with a name, desc, and input_variable
# will be renedered into: "{tool.name}[{tool.input_variable}], which {tool.desc}"
qs = SearchQuran(KALEMAT_API_KEY)
qs.name = "Search Quran"
qs.desc = "Search the quran for relevant verses. Returns a list of verses. Multiple verses may be relevant."
qs.input_variable = "query"

hs = SearchHadith(KALEMAT_API_KEY)
hs.name = "Search Hadith"
hs.desc = "Search the Hadith for relevant narrations. Returns a list of hadith. Multiple hadith may be relevant."
hs.input_variable = "query"

ms = SearchMawsuah(VECTARA_AUTH_TOKEN)
ms.name = "Search Mawsuah, an encyclopedia of Islamic jurisprudence (fiqh)"
ms.desc = "Queries an encyclopedia of Islamic jurisprudence (fiqh) for relevant rulings. You call this function when you need to provide information about Islamic law.  Regardless of the language used in the original conversation, you will compose the query in Arabic before searching the encyclopedia. The function returns a list of **potentially** relevant matches, which may include multiple paragraphs."
ms.input_variable = "query"

tools = [qs, hs, ms]

In [9]:
lm_gpt3 = OpenAIWrapper(model="gpt-3.5-turbo", api_key=OPENAI_API_KEY, max_tokens=2000, model_type='chat')
react_module = dspy.ReAct(GenerateAnswer, max_iters=5, num_results=3, tools=tools)
##
question = "kadinların başörtüsü takmaları şart mı?"
with dspy.context(lm=lm_gpt3):
    answer = react_module(question=question)
pprint(answer)

Prediction(
    observations=['Failed to parse action. Bad formatting or incorrect action name.', 'Failed to parse action. Bad formatting or incorrect action name.', 'Failed to parse action. Bad formatting or incorrect action name.'],
    answer='Headscarves are obligatory for Muslim women as a form of modesty and obedience to Islamic teachings.'
)


In [None]:
# dspy's react implementation is really bad, should be reimplemented based on the following blog post:
# https://www.databricks.com/blog/optimizing-databricks-llm-pipelines-dspy
# 