In [46]:
import dspy
import numpy as np

import pprint
pprint = pprint.PrettyPrinter(indent=4).pprint

turbo = dspy.OpenAI(model="gpt-3.5-turbo", max_tokens=1000)
dspy.settings.configure(lm=turbo)

# Setting our Examples

In [47]:
grug_text_sentences = [
    'this collection of thoughts on software development gathered by grug brain developer',
    'grug brain developer not so smart, but grug brain developer program many long year and learn some things\nalthough mostly still confused',
    'grug brain developer try collect learns into small, easily digestible and funny page, not only for you, the young grug, but also for him\nbecause as grug brain developer get older he forget important things, like what had for breakfast or if put pants on',
    'big brained developers are many, and some not expected to like this, make sour face',
    'THINK they are big brained developers many, many more, and more even definitely probably maybe not like this, many\nsour face (such is internet)',
    '(note: grug once think big brained but learn hard way)',
    'is fine!',
    'is free country sort of and end of day not really matter too much, but grug hope you fun reading and maybe learn from\nmany, many mistake grug make over long program life',
    'apex predator of grug is complexity',
    'complexity bad'
 ]

plain_english_translations = [
    "A collection of thoughts on software development gathered by a developer named Grug Brain.",
    "Grug Brain is not a genius, but has programmed for a long time and learned some things, although still often confused.",
    "Grug Brain is trying to collect his learnings into a short, funny, and easy-to-read page for both himself and others.",
    "Some experienced developers might not like this, and might make a sour face.",
    "Many experienced developers might not like this, and will probably make a sour face on the internet.",
    "Grug used to think he was smart, but learned the hard way that he's not.",
    "It is fine.",
    "It's okay, it's a free country, and in the end, it doesn't matter much, but Grug hopes you have fun reading and learn from his mistakes.",
    "Grug's biggest enemy is complexity.",
    "Complexity is bad."
]

dataset = []
for i in range(len(grug_text_sentences)):
    grug_sentence = grug_text_sentences[i]
    translated_sentence = plain_english_translations[i]
    dataset.append({"grug_text":grug_sentence, "plain_english":translated_sentence})
    
examples = [] # List[dspy.Example]
for pair in dataset:
    examples.append(
        dspy.Example(
            grug_text=pair["grug_text"],
            plain_english=pair["plain_english"]
            ).with_inputs("plain_english")
        )

# Divide train & test
def split_for_train_test(values, test_size=1/3.0):
    np.random.shuffle(values)
    split = int(len(values) * (1 - test_size))
    return values[:split], values[split:]

examples_for_training, examples_for_testing = split_for_train_test(examples)

print("List of 'examples' for training: ")
pprint(examples_for_training)
print("\nList of 'examples' for testing: ")
pprint(examples_for_testing)


List of 'examples' for training: 
[   Example({'grug_text': 'is free country sort of and end of day not really matter too much, but grug hope you fun reading and maybe learn from\nmany, many mistake grug make over long program life', 'plain_english': "It's okay, it's a free country, and in the end, it doesn't matter much, but Grug hopes you have fun reading and learn from his mistakes."}) (input_keys={'plain_english'}),
    Example({'grug_text': 'THINK they are big brained developers many, many more, and more even definitely probably maybe not like this, many\nsour face (such is internet)', 'plain_english': 'Many experienced developers might not like this, and will probably make a sour face on the internet.'}) (input_keys={'plain_english'}),
    Example({'grug_text': 'big brained developers are many, and some not expected to like this, make sour face', 'plain_english': 'Some experienced developers might not like this, and might make a sour face.'}) (input_keys={'plain_english'}),
    E

# Signatures

In [48]:
class GrugSignature(dspy.Signature):
    "Translate plain english to Grug text."
    plain_english = dspy.InputField()
    grug_text = dspy.OutputField()
    # grug_text = dspy.OutputField(prefix = "The Grug Text:", format = lambda x:"==="+x+"===") 

# extra (just to display)
# print(GrugSignature.signature)
# print(GrugSignature.with_instructions)

In [49]:
# extra (templating for visualisation)
# from dspy.signatures.signature import signature_to_template
# signature_to_template(GrugSignature).query(examples[0])

In [50]:
class TranslationModule(dspy.Module):
    def __init__(self):
        super().__init__
        self.prog = dspy.ChainOfThought(GrugSignature)
    def forward(self, plain_english):
        return self.prog(plain_english=plain_english)
    
# extra
# TranslationModule().forward("You should not construct complex systems.")

In [51]:
# High ARI = high syntacticall complexity (we seek low complexity for grug_text)

def get_ARI(text):
    import re
    characters = len(re.sub(r'\s+', '', text))
    words = len(text.split())
    sentences = len(re.findall(r'[.!?\n]', text))
    if words == 0 or sentences == 0:
        return 0
    ari = 4.71 * (characters / words) + 0.5 * (words / sentences) - 21.43
    return round(ari, 2)


In [52]:
def ari_demonstration ():
    
    for ex in examples:
        sources_ari = get_ARI(ex.plain_english)
        grug_ari = get_ARI(ex.grug_text)
        
        plain_english_str = f"Plain English ({sources_ari})".rjust(21)+f" -> {ex.plain_english if len(ex.plain_english)<70 else ex.plain_english[:70]+'...'}"
        grug_text_str = f"Grug Text ({grug_ari})".rjust(21)+f" -> {ex.grug_text if len(ex.grug_text)<70 else ex.grug_text[:70]+'...'}"
        
        print(plain_english_str.rjust(21))
        print(grug_text_str.rjust(21))
        print(f"/{'+' if grug_ari > sources_ari else '-'}{abs(round(grug_ari-sources_ari,2))}\n".rjust(21))
    
ari_demonstration()


Plain English (11.68) -> It's okay, it's a free country, and in the end, it doesn't matter much...
    Grug Text (14.62) -> is free country sort of and end of day not really matter too much, but...
              /+2.94

Plain English (10.34) -> Many experienced developers might not like this, and will probably mak...
    Grug Text (14.12) -> THINK they are big brained developers many, many more, and more even d...
              /+3.78

 Plain English (8.26) -> Some experienced developers might not like this, and might make a sour...
        Grug Text (0) -> big brained developers are many, and some not expected to like this, m...
              /-8.26

Plain English (10.27) -> Grug's biggest enemy is complexity.
        Grug Text (0) -> apex predator of grug is complexity
             /-10.27

Plain English (11.05) -> Grug Brain is not a genius, but has programmed for a long time and lea...
    Grug Text (13.98) -> grug brain developer not so smart, but grug brain developer program ma..

In [63]:
def ari_metric(real_example, prediction, trace=None):

    print(real_example)
    print(prediction)

    real_ari = get_ARI(real_example.grug_text)
    pred_ari = get_ARI(prediction.grug_text)

    # print(f"ARI {real_ari} -> {pred_ari}\n{real_example.grug_text} ->\n{prediction.grug_text}\n")

    ari_result = pred_ari <= 7.01

    return ari_result

In [64]:
gpt3 = dspy.OpenAI(model="gpt-3.5-turbo", max_tokens=100, model_type='chat')

class AssessBasedOnQuestion(dspy.Signature):
    """Given the assessed, provide a Yes or No to the assessment question."""
    text_being_assessed = dspy.InputField(format=str)
    assessment_question = dspy.InputField(format=str)
    assessment_answer = dspy.OutputField(desc='Yes or No')

def similarity_metric(real_example, prediction, trace=None):

    prompt = f"""
    Does the text being assessed have the same meaning as the gold_standard text?
    Gold Standard: "{real_example.grug_text}"
    Provide only yes or no answer.
    """

    with dspy.context(lm=gpt3):

        assessor = dspy.Predict(AssessBasedOnQuestion) # we dont make a 'module' class because this agent is outside of the pipeline, it just assesses

        raw_similarity_result = assessor(text_being_assessed=prediction.grug_text, assessment_question=prompt)
        print(raw_similarity_result)
        raw_similarity = raw_similarity_result.assessment_answer.lower().strip()
        same_meaning = raw_similarity == 'yes'
        return same_meaning

In [65]:
def overall_metric(provided_example, predicted, trace=None):
    similarity = similarity_metric(provided_example, predicted, trace)
    ari = ari_metric(provided_example, predicted, trace)

    if similarity and ari:
        return True
    return False

In [66]:
from dspy.teleprompt import BootstrapFewShot
# The tool we are going to use to optimise our prompt
# Uses examples we have to generate synthetic examples

config = dict(max_bootstrapped_demos=4, max_labeled_demos=4)
teleprompter = BootstrapFewShot(metric=ari_metric, **config)
teleprompter.max_errors = 0
optimized_cot = teleprompter.compile(TranslationModule(), trainset=examples_for_training) # valset
# compile to use some few-shot examples to actually try to improve

  0%|          | 0/6 [00:00<?, ?it/s]

Example({'grug_text': 'is free country sort of and end of day not really matter too much, but grug hope you fun reading and maybe learn from\nmany, many mistake grug make over long program life', 'plain_english': "It's okay, it's a free country, and in the end, it doesn't matter much, but Grug hopes you have fun reading and learn from his mistakes."}) (input_keys={'plain_english'})
Prediction(
    rationale="produce the grug_text. We start by acknowledging that it's okay, then we mention the concept of a free country, followed by the idea that it doesn't matter much. Finally, we express Grug's hope that the reader has fun reading and learns from his mistakes.",
    grug_text='okay, free country, not matter much, but grug hope fun read and learn from mistake'
)
Example({'grug_text': 'THINK they are big brained developers many, many more, and more even definitely probably maybe not like this, many\nsour face (such is internet)', 'plain_english': 'Many experienced developers might not lik

 67%|██████▋   | 4/6 [00:00<00:00, 158.64it/s]


In [62]:
from dspy.evaluate import Evaluate
individual_metrics = [similarity_metric, ari_metric]

In [60]:
for metric in individual_metrics:
    evaluate = Evaluate(metric=metric, devset=examples_for_testing, num_threads=1, display_progress=True, display_table=5)
    evaluate(optimized_cot)

    # We get a table for each metric tested
    

  0%|          | 0/4 [00:00<?, ?it/s]Prediction(
    assessment_answer='Text Being Assessed: thoughts on software development by grug brain\nAssessment Question:\n\n    Does the text being assessed have the same meaning as the gold_standard text?\n    Gold Standard: "this collection of thoughts on software development gathered by grug brain developer"\n    Provide only yes or no answer.\n    \nAssessment Answer: No'
)
Average Metric: 0 / 1  (0.0):   0%|          | 0/4 [00:00<?, ?it/s]Prediction(
    assessment_answer='Yes'
)
Average Metric: 1 / 2  (50.0):  25%|██▌       | 1/4 [00:00<00:00, 47.06it/s]Prediction(
    assessment_answer='Yes'
)
Average Metric: 2 / 3  (66.7):  50%|█████     | 2/4 [00:00<00:00, 79.27it/s]Prediction(
    assessment_answer='No'
)
Average Metric: 2 / 4  (50.0): 100%|██████████| 4/4 [00:00<00:00, 132.22it/s]


Unnamed: 0,example_grug_text,plain_english,rationale,pred_grug_text,similarity_metric
0,this collection of thoughts on software development gathered by grug brain developer,A collection of thoughts on software development gathered by a developer named Grug Brain.,produce the Grug text. We need to highlight that these are thoughts on software development collected by Grug Brain.,thoughts on software development by grug brain,False
1,is fine!,It is fine.,produce the Grug text. We need to simplify the sentence and convey the idea that everything is okay.,fine,✔️ [True]
2,complexity bad,Complexity is bad.,produce the Grug text. We need to simplify the statement and emphasize the negative aspect of complexity.,complexity bad,✔️ [True]
3,"grug brain developer try collect learns into small, easily digestible and funny page, not only for you, the young grug, but also for him because...","Grug Brain is trying to collect his learnings into a short, funny, and easy-to-read page for both himself and others.","produce the Grug text. We need to highlight Grug's brain working to gather his knowledge in a concise, humorous, and accessible format for himself and...","grug brain collect learnings, make short, funny, easy read page for self and others",False


Average Metric: 4 / 4  (100.0): 100%|██████████| 4/4 [00:00<00:00, 583.60it/s]


Unnamed: 0,example_grug_text,plain_english,rationale,pred_grug_text,ari_metric
0,this collection of thoughts on software development gathered by grug brain developer,A collection of thoughts on software development gathered by a developer named Grug Brain.,produce the Grug text. We need to highlight that these are thoughts on software development collected by Grug Brain.,thoughts on software development by grug brain,✔️ [True]
1,is fine!,It is fine.,produce the Grug text. We need to simplify the sentence and convey the idea that everything is okay.,fine,✔️ [True]
2,complexity bad,Complexity is bad.,produce the Grug text. We need to simplify the statement and emphasize the negative aspect of complexity.,complexity bad,✔️ [True]
3,"grug brain developer try collect learns into small, easily digestible and funny page, not only for you, the young grug, but also for him because...","Grug Brain is trying to collect his learnings into a short, funny, and easy-to-read page for both himself and others.","produce the Grug text. We need to highlight Grug's brain working to gather his knowledge in a concise, humorous, and accessible format for himself and...","grug brain collect learnings, make short, funny, easy read page for self and others",✔️ [True]


[Example({'grug_text': 'this collection of thoughts on software development gathered by grug brain developer', 'plain_english': 'A collection of thoughts on software development gathered by a developer named Grug Brain.'}) (input_keys={'plain_english'}), Example({'grug_text': 'is fine!', 'plain_english': 'It is fine.'}) (input_keys={'plain_english'}), Example({'grug_text': 'complexity bad', 'plain_english': 'Complexity is bad.'}) (input_keys={'plain_english'}), Example({'grug_text': 'grug brain developer try collect learns into small, easily digestible and funny page, not only for you, the young grug, but also for him\nbecause as grug brain developer get older he forget important things, like what had for breakfast or if put pants on', 'plain_english': 'Grug Brain is trying to collect his learnings into a short, funny, and easy-to-read page for both himself and others.'}) (input_keys={'plain_english'})]
<function ari_metric at 0x000002755BFFB880>


In [131]:
optimized_cot.forward("You should not construct complex systems.")

Prediction(
    rationale='Grug text: you not build complex systems',
    grug_text='you not build complex systems'
)