In [19]:
# from dotenv import load_dotenv
# load_dotenv("../../.venv")

import dspy
from pydantic import BaseModel, Field

turbo = dspy.OpenAI(model='gpt-3.5-turbo-1106', max_tokens=300, temperature=1)
colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')

dspy.settings.configure(lm=turbo, rm=colbertv2_wiki17_abstracts)

##### Pydantic : packing I/O

In [69]:
# Signature (with pydantic-tuned "input" and "output") ############################################################

class Input(BaseModel): # Packing 2 inputs into 1 object
    context: str = Field(description="The context for the question")
    query: str = Field(description="The question to be answered")

class Output(BaseModel): # Puting a 'confidence' output together with the answer
    answer: str = Field(description="The answer for the question")
    confidence: float = Field(ge=0, le=1, description="The confidence score for the answer") # Enforcing [0,1]

class QASignature(dspy.Signature): # Now, we compose a class for the signature
    """Answer the question based on the context and query provided, and on the scale of 10 tell how confident you are about the answer."""
    input: Input = dspy.InputField()
    output: Output = dspy.OutputField()

# Module ###########################################################################################################

predictor = dspy.TypedPredictor(QASignature) # 'dspy.TypedPredictor("input:Input -> output:Output")' = inline signature
# or predictor = dspy.TypedChainOfThought(QASignature)

doc_query_pair = Input(
    context="The quick brown fox jumps over the lazy dog",
    query="What does the fox jumps over?",
)

prediction = predictor(input=doc_query_pair) # predictor(doc_query_pair) fails because it expects a dictionary

prediction.output.answer
prediction.output.confidence
prediction.output

0.9

##### Pydantic I/O + Module via decorator

In [58]:
# Pydantic I/O #####################################################################

class Input(BaseModel): # Packing 2 inputs into 1 object
    context: str = Field(description="The context for the question")
    query: str = Field(description="The question to be answered")

class Output(BaseModel): # Puting a 'confidence' output together with the answer
    answer: str = Field(description="The answer for the question")
    confidence: float = Field(ge=0, le=1, description="The confidence score for the answer") # Enforcing [0,1]

# Module ###########################################################################################################

@dspy.predictor # or "@dspy.cot"
def predictor(doc_query_pair: Input) -> Output:
    """Answer the question based on the context and query provided, and on the scale of 0-1 tell how confident you are about the answer."""
    pass

# Run ##############################################################################################################

doc_query_pair = Input(
    context="The quick brown fox jumps over the lazy dog",
    query="What does the fox jumps over?",
)

prediction = predictor(doc_query_pair=doc_query_pair) # predictor(doc_query_pair) fails because it expects a dictionary

prediction

Output(answer='the lazy dog', confidence=0.9)

##### Basic 'Example' Usage

In [55]:
# These declarations are equivalent

article_summary = dspy.Example(
    context="This is an article.", question="This is a question?",
    answer="This is an answer.", rationale= "This is a rationale."
).with_inputs("context", "question")

article_summary.inputs() # inputs
article_summary.labels() # outputs

# print(article_summary.inputs().context)
# print(article_summary.context)

article_summary.new_attribute = True

# article_summary = dspy.Example(
#     context="This is an article.", question="This is a question?",
#     answer="This is an answer.", rationale= "This is a rationale."
# ).without("answer", "rationale")

##### Basic CoT + 'hint'

In [68]:
class BasicQA(dspy.Signature):
    """Answer questions with short factoid answers."""
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

generate_answer = dspy.ChainOfThoughtWithHint(BasicQA)

# Call the predictor alongside a hint.
question='What is the color of the sky?'
hint = "It's what you often see during a sunny day."
pred = generate_answer(question=question, hint=hint)

question
pred.rationale
pred.answer
pred

'Blue'

##### Factuality Metric

In [None]:
class FactJudge(dspy.Signature):
    """Judge if the answer is factually correct based on the context."""

    context = dspy.InputField(desc="Context for the prediciton")
    question = dspy.InputField(desc="Question to be answered")
    answer = dspy.InputField(desc="Answer for the question")
    factually_correct = dspy.OutputField(desc="Is the answer factually correct based on the context?", prefix="Factual[Yes/No]:")

judge = dspy.ChainOfThought(FactJudge)

def factuality_metric(example, pred):
    factual = judge(context=example.context, question=example.question, answer=pred.answer)
    return int(factual=="Yes")

##### Knowledge Management Section

In [None]:
https://colab.research.google.com/github/stanfordnlp/dspy/blob/main/examples/longformqa/longformqa_assertions.ipynb#scrollTo=fKc-ij_jopBs
https://colab.research.google.com/github/stanfordnlp/dspy/blob/main/examples/quiz/quiz_assertions.ipynb
https://colab.research.google.com/github/stanfordnlp/dspy/blob/main/examples/tweets/tweets_assertions.ipynb
https://colab.research.google.com/drive/1CpsOiLiLYKeGrhmq579_FmtGsD5uZ3Qe#scrollTo=hVrLgbZvbJ97
https://github.com/stanfordnlp/dspy/blob/main/examples/tweets/tweet_metric.py
https://github.com/saifulhaq95/DSPy-Indic/blob/main/indicxlni.ipynb
https://github.com/stanfordnlp/dspy/blob/main/examples/nli/scone/scone.ipynb
https://github.com/stanfordnlp/dspy/blob/main/skycamp2023.ipynb
https://github.com/stanfordnlp/dspy/blob/main/examples/qa/hotpot/multihop_finetune.ipynb


In [16]:
# sentence = "it's a charming and often affecting journey."  # example from the SST-2 dataset.
# classify = dspy.Predict('sentence -> sentiment')
# classify(sentence=sentence).sentiment

# question = "What's something great about the ColBERT retrieval model?"
# # 1) Declare with a signature, and pass some config.
# classify = dspy.ChainOfThought('question -> answer', n=5)
# # 2) Call with input argument.
# response = classify(question=question)
# # 3) Access the outputs.
# response.completions.answer

# import dspy
# context = ["Roses are red.", "Violets are blue"]
# question = "What color are roses?"
# @dspy.predictor # automaticaly generates a dspy.TypedPredictor
# def generate_answer(self, context: list[str], question) -> str:
#     """Answer questions with short factoid answers."""
#     pass
# generate_answer(context=context, question=question)

# import dspy

# context = ["Roses are red.", "Violets are blue"]
# question = "What color are roses?"
# @dspy.cot # automaticaly generates a dspy.TypedPredictor
# def generate_answer(self, context: list[str], question) -> str:
#     """Answer questions with short factoid answers."""
#     pass
# generate_answer(context=context, question=question)

# class SimplifiedBaleen(dspy.Module):
#     def __init__(self, passages_per_hop=2, max_hops=2):
#         super().__init__()

#         self.generate_query = [dspy.ChainOfThought(GenerateSearchQuery) for _ in range(max_hops)]
#         self.retrieve = dspy.Retrieve(k=passages_per_hop)
#         self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
#         self.max_hops = max_hops

#     def forward(self, question):
#         context = []
#         prev_queries = [question]

#         for hop in range(self.max_hops):
#             query = self.generate_query[hop](context=context, question=question).query
#             prev_queries.append(query)
#             passages = self.retrieve(query).passages
#             context = deduplicate(context + passages)
        
#         pred = self.generate_answer(context=context, question=question)
#         pred = dspy.Prediction(context=context, answer=pred.answer)
#         return pred

# baleen = SimplifiedBaleen()

# baleen(question = "Which award did Gary Zukav's first book receive?")


# parameters = {
#     "Assert": {
#         "parameter": {
#             "constraint": "Boolean Validation check Outcome",
#             "msg": "User Defined Error message for correction",
#             "backtrack": "Specifies target module (usually the last) for retry attempts upon failure."
#         },
#         "behaviour": "Initiates retry upon failure, dynamic signature modification of the pipeline. If no improvement, halts. Adds fields like 'past_output' that failed validation, 'instruction' which is provided by the user to recover.",
#         "application": "Useful as checkers / gate-keepers in production code"
#     },
#     "Suggest": {
#         "parameter": {
#             "constraint": "Boolean Validation check Outcome",
#             "msg": "User Defined Error message for correction",
#             "backtrack": "Specifies target module (usually the last) for retry attempts upon failure."
#         },
#         "behaviour": "Encourages self-refinement without hardstops, logs failure after backtracking attempts",
#         "application": "Useful as guide or helpers in evaluation phase."
#     }
# }


# qa_pred = dspy.Predict('question -> answer')
# print(qa_pred)

# class MultiClass(dspy.Signature):
#     """Classifiy the given data into Address, Human's Name and Age"""
#     sentence = dspy.InputField(desc="data to be classified")
#     data_type = dspy.OutputField(desc="falls in one of categories")

# pred_class = dspy.Predict(MultiClass)
# print(pred_class)

# get_json = Predict('required_data -> json_output')
# prompt = "provide one example of Address, Location, Human Name, Building Name and Amount"
# with context(lm=turbo):
#     resp = get_json(required_data=prompt)
#     print(resp)

In [10]:
# import dspy

# context = ["Roses are red.", "Violets are blue"]
# question = "What color are roses?"

# @dspy.cot
# def generate_answer(self, context: list[str], question) -> str:
#     """Answer questions with short factoid answers."""
#     pass

# print(generate_answer)

# generate_answer(context=context, question=question)

# # gpt3_turbo.inspect_history(n=1)

predictor.predictor = Predict(StringSignature(context, question -> reasoning, generate_answer
    instructions='Answer questions with short factoid answers.'
    context = Field(annotation=list[str] required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Context:', 'desc': '${context}'})
    question = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Question:', 'desc': '${question}'})
    reasoning = Field(annotation=str required=True json_schema_extra={'prefix': "Reasoning: Let's think step by step in order to", 'desc': '${produce the generate_answer}. We ...', '__dspy_field_type': 'output'})
    generate_answer = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'output', 'prefix': 'Generate Answer:', 'desc': '${generate_answer}'})
))


'Roses are red.'

In [None]:
# class Tweeter(dspy.Module):
#     def __init__(self):
#         super().__init__()
#         self.generate_tweet = dspy.ChainOfThought(GenerateTweet)

#     def forward(self, question, answer):
#         context = []
#         max_hops = 2
#         passages_per_hop = 3
#         generate_query = [dspy.ChainOfThought(GenerateSearchQuery) for _ in range(max_hops)]
#         retrieve = dspy.Retrieve(k=passages_per_hop)
#         for hop in range(max_hops):
#             query = generate_query[hop](context=context, question=question).query
#             passages = retrieve(query).passages
#             context = deduplicate(context + passages)
#         generated_tweet = self.generate_tweet(question=question, context=context).tweet
#         return dspy.Prediction(generated_tweet, context=context)

#     dspy.Suggest(has_no_hashtags(generated_tweet), "Please revise the tweet to remove hashtag phrases following it.")
#     dspy.Suggest(is_within_length_limit(generated_tweet, 280), "Please ensure the tweet is within {280} characters.")
#     dspy.Suggest(has_correct_answer(generated_tweet, answer), "The tweet does not include the correct answer to the question. Please revise accordingly.")
#     dspy.Suggest(is_assessment_yes(engaging_assessment.assessment_answer), "The text is not engaging enough. Please revise to make it more captivating.")
#     dspy.Suggest(is_assessment_yes(faithful_assessment.assessment_answer), "The text contains unfaithful elements or significant facts not in the content. Please revise for accuracy.")


In [5]:
# query='When was the first FIFA World Cup held?'

# # Call the retriever on a particular query.
# retrieve = dspy.Retrieve(k=3)
# topK_passages = retrieve(query).passages

# print(f"Top {retrieve.k} passages for question: {query} \n", '-' * 30, '\n')

# for idx, passage in enumerate(topK_passages):
#     print(f'{idx+1}]', passage, '\n')

AssertionError: No RM is loaded.

In [70]:
# class MultiHopQA(dspy.Module):
#     def __init__(self):
#         self.retrieve = dspy.Retrieve(k=3)
#         self.gen_query = dspy.ChainOfThought("context, question -> query")
#         self.gen_answer = dspy.ChainOfThought("context, query -> answer")
#     def forward(self, question):
#         context = []
#         for hop in range(2):
#             query = self.gen_query(context=context, question=question).query
#             context+= self.retrieve(query).passages
#         return self.gen_answer(context=context, question=question)