In [68]:
# %load_ext autoreload
# %autoreload 2

# import sys
# import os

# try: # When on google Colab, let's clone the notebook so we download the cache.
#     import google.colab
#     repo_path = 'dspy'
#     !git -C $repo_path pull origin || git clone https://github.com/stanfordnlp/dspy $repo_path
# except:
#     repo_path = '.'

# if repo_path not in sys.path:
#     sys.path.append(repo_path)

# # Set up the cache for this notebook
# os.environ["DSP_NOTEBOOK_CACHEDIR"] = os.path.join(repo_path, 'cache')

# import pkg_resources # Install the package if it's not installed
# if not "dspy-ai" in {pkg.key for pkg in pkg_resources.working_set}:
#     !pip install -U pip
#     !pip install dspy-ai
#     !pip install openai~=0.28.1
#     # !pip install -e $repo_path

In [1]:
import dspy
import pydantic
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import random

import pprint
pprint = pprint.PrettyPrinter(indent=4).pprint

In [3]:
gpt4_turbo = dspy.OpenAI(model='gpt-4-1106-preview', max_tokens=300)
gpt3_turbo = dspy.OpenAI(model='gpt-3.5-turbo-1106', max_tokens=300, temperature=1)

dspy.settings.configure(lm=gpt3_turbo, max_tokens=1024)

# Assessing Subject

### Retrieving Descriptions

In [64]:
import json

with open('../examples/off_topic_descriptions/data.json', 'r') as f1,\
     open('../examples/smart_contract_descriptions/data.json', 'r') as f2:
    d1 = json.load(f1)
    d2 = json.load(f2)

ot_descriptions = [description for description in d1['descriptions']]

sc_descriptions_q1 = [d2[key]['descriptions']['description_1'] for key in d2]
# sc_descriptions_q2 = [d2[key]['descriptions']['description_1'] for key in d2]
# sc_descriptions_q3 = [d2[key]['descriptions']['description_1'] for key in d2]
# sc_descriptions_q4 = [d2[key]['descriptions']['description_1'] for key in d2]

sc_descriptions = sc_descriptions_q1
# sc_descriptions += sc_descriptions_q2
# sc_descriptions += sc_descriptions_q3
# sc_descriptions += sc_descriptions_q4

def split_for_train_test(output_type="int", test_size=2/5):

    if output_type == "int":
        negative_output = random.randint(0,2)
        positive_output = random.randint(8,10)
    elif output_type == "bool":
        negative_output = False
        positive_output = True

    combined_descriptions = []
    for description in ot_descriptions:
        combined_descriptions.append((description, negative_output))
    for description in sc_descriptions:
        combined_descriptions.append((description, positive_output))

    # Arguments must have the same name as Signature input/output
    examples = [
        dspy.Example(smart_contract_description=desc, boolean_assessment=is_valid).with_inputs("smart_contract_description")
        for desc, is_valid in combined_descriptions
    ]

    np.random.shuffle(examples)
    split = int(len(examples) * (1 - test_size))

    return examples[:split], examples[split:]

# print("List of 'examples' for training: ")
# pprint(examples_for_training)
# print("\nList of 'examples' for testing: ")
# pprint(examples_for_testing)

### True/False Labeling

In [70]:
verbose_output = False # during training and assessment
examples_for_training, examples_for_testing = split_for_train_test("bool")

class AssessSubject(dspy.Signature):
    """Does the text delivers a detailed engineer's functional description of how one particular smart contract is designed to work programmatically post-deployment?"""
    smart_contract_description: str = dspy.InputField(desc="A description of a Smart Contract")
    boolean_assessment: bool = dspy.OutputField(desc="True/False indicating if text is about Smart Contracts")

class AssessSubjectModule(dspy.Module):
    """A module to verify if the description consists of a precise functional description of how a specific smart contract should work."""
    def __init__(self):
        super().__init__()
        self.generate_answer = dspy.functional.TypedChainOfThought(AssessSubject)

    def forward(self, smart_contract_description: str) -> bool:
        # It cannot be the boolean_assessment output parameter directly, or the teleprompting won't work.
        return self.generate_answer(smart_contract_description=smart_contract_description)

def metric(example, prediction, trace=None):

    if verbose_output:
        print(f"Defined function 'metric' called on\n{' '*4}{example}")
        print(f"Generated prediction is\n{' '*4}{prediction.reasoning}")
        print(f"{' '*4}>> Real-value/Predicted-value : {example.boolean_assessment} | {prediction.boolean_assessment}\n")
        
    return example.boolean_assessment == prediction.boolean_assessment # TODO: Moved from the forward prediction return

from dspy.teleprompt import BootstrapFewShot
config = dict(max_bootstrapped_demos=len(examples_for_training)) # max_labeled_demos=4
teleprompter = BootstrapFewShot(metric=metric, **config)
teleprompter.max_errors = 0
optimised_program = teleprompter.compile(AssessSubjectModule(), trainset=examples_for_training, valset=examples_for_testing)
# It doesnt use 'valset' for training (run in Jupyter loads automatically even if changed)

from dspy.evaluate import Evaluate
evaluate_program = Evaluate(metric=metric, devset=examples_for_testing, num_threads=1, display_progress=True, display_table=1)
evaluate_program(optimised_program)
# FIXME: argument 'bool' type is not iterable (for Evaluate); weird stuff.

  0%|          | 0/18 [00:00<?, ?it/s]

100%|██████████| 18/18 [00:18<00:00,  1.02s/it]


Average Metric: 11 / 12  (91.7): 100%|██████████| 12/12 [00:15<00:00,  1.26s/it]


Unnamed: 0,smart_contract_description,example_boolean_assessment,reasoning,pred_boolean_assessment,metric
0,"The Digital Canvas initiative leverages the concept of programmable interactions, much like a smart contract, but for artists and galleries. The platform allows digital creators...",False,produce the boolean assessment. We need to consider whether the text provides a detailed engineer's functional description of how a particular smart contract is designed...,False,✔️ [True]


91.67

In [66]:
optimised_program.forward("Estoy fingiendo ser una smart contract description")


Prediction(
    reasoning="produce the boolean assessment. We would need to determine if the text is delivering a detailed engineer's functional description of how a specific smart contract is designed to work programmatically post-deployment. Based on the provided reasoning, the assessment is false as the text does not provide a detailed smart contract description.",
    boolean_assessment=False
)

### 1-10 Labeling

In [71]:
verbose_output = False # during training and assessment
examples_for_training, examples_for_testing = split_for_train_test("int")

In [79]:
class AssessSubject(dspy.Signature):
    """To which degree are these descriptions oriented to describe a smart contract for implementation and production?"""
    smart_contract_description: str = dspy.InputField(desc="A description of a Smart Contract")
    boolean_assessment: int = dspy.OutputField(desc="1-10 rating indicating if text is a detailed engineers's functional smart contract description")

class AssessSubjectModule(dspy.Module):
    """A module to verify if the description consists of a precise functional description of how a specific smart contract should work."""
    def __init__(self):
        super().__init__()
        self.generate_answer = dspy.functional.TypedChainOfThought(AssessSubject)

    def forward(self, smart_contract_description: str) -> int:
        # It cannot be the boolean_assessment output parameter directly, or the teleprompting won't work.
        return self.generate_answer(smart_contract_description=smart_contract_description)

def metric(example, prediction, trace=None):

    if verbose_output:
        print(f"Defined function 'metric' called on\n{' '*4}{example}")
        print(f"Generated prediction is\n{' '*4}{prediction.reasoning}")
        print(f"{' '*4}>> Real-value/Predicted-value : {example.boolean_assessment} | {prediction.boolean_assessment}\n")
        
    return example.boolean_assessment == prediction.boolean_assessment # TODO: Moved from the forward prediction return

from dspy.teleprompt import BootstrapFewShot
config = dict(max_bootstrapped_demos=len(examples_for_training)) # max_labeled_demos=4
teleprompter = BootstrapFewShot(metric=metric, **config)
teleprompter.max_errors = 0
optimised_program = teleprompter.compile(AssessSubjectModule(), trainset=examples_for_training, valset=examples_for_testing)
# It doesnt use 'valset' for training (run in Jupyter loads automatically even if changed)

from dspy.evaluate import Evaluate
evaluate_program = Evaluate(metric=metric, devset=examples_for_testing, num_threads=1, display_progress=True, display_table=1)
evaluate_program(optimised_program)
# FIXME: argument 'bool' type is not iterable (for Evaluate); weird stuff.

100%|██████████| 18/18 [00:17<00:00,  1.01it/s]


Average Metric: 12 / 12  (100.0): 100%|██████████| 12/12 [00:14<00:00,  1.23s/it]


Unnamed: 0,smart_contract_description,example_boolean_assessment,reasoning,pred_boolean_assessment,metric
0,The Smart Scheduler for conference management could draw from the principles of smart contracts to automate the planning and execution of events. Based on predefined...,0,produce the boolean assessment. We would need to provide a detailed engineers's functional smart contract description that includes specific details about how the Smart Scheduler...,0,✔️ [True]


100.0

# Requirement Generation

### Retrieving description-requirement pairs

In [None]:
import json

with open('../examples/off_topic_descriptions/data.json', 'r') as f1,\
     open('../examples/smart_contract_descriptions/data.json', 'r') as f2:
    d1 = json.load(f1)
    d2 = json.load(f2)

ot_descriptions = [description for description in d1['descriptions']]

sc_descriptions_q1 = [d2[key]['descriptions']['description_1'] for key in d2]
# sc_descriptions_q2 = [d2[key]['descriptions']['description_1'] for key in d2]
# sc_descriptions_q3 = [d2[key]['descriptions']['description_1'] for key in d2]
# sc_descriptions_q4 = [d2[key]['descriptions']['description_1'] for key in d2]

sc_descriptions = sc_descriptions_q1
# sc_descriptions += sc_descriptions_q2
# sc_descriptions += sc_descriptions_q3
# sc_descriptions += sc_descriptions_q4

def split_for_train_test(output_type="int", test_size=2/5):

    if output_type == "int":
        negative_output = random.randint(0,2)
        positive_output = random.randint(8,10)
    elif output_type == "bool":
        negative_output = False
        positive_output = True

    combined_descriptions = []
    for description in ot_descriptions:
        combined_descriptions.append((description, negative_output))
    for description in sc_descriptions:
        combined_descriptions.append((description, positive_output))

    # Arguments must have the same name as Signature input/output
    examples = [
        dspy.Example(smart_contract_description=desc, boolean_assessment=is_valid).with_inputs("smart_contract_description")
        for desc, is_valid in combined_descriptions
    ]

    np.random.shuffle(examples)
    split = int(len(examples) * (1 - test_size))

    return examples[:split], examples[split:]

# print("List of 'examples' for training: ")
# pprint(examples_for_training)
# print("\nList of 'examples' for testing: ")
# pprint(examples_for_testing)

In [75]:
import pydantic
from typing import List

class ExtractRequirements(dspy.Signature):
    """Extract requirements from a Smart Contract Description"""
    smart_contract_description: str = dspy.InputField(desc="A description of a Smart Contract")
    requirements_list: List[str] = dspy.OutputField(desc="A list object with extracted requirements")

class ExtractRequirementsModule(dspy.Module):
    """A module to extract requirements from a Smart Contract Description"""
    def __init__(self):
        super().__init__()
        self.generate_answer = dspy.functional.TypedPredictor(ExtractRequirements)

    def forward(self, description: str) -> List[str]:
        pred = self.generate_answer(smart_contract_description=description)
        return pred.requirements_list

module = ExtractRequirementsModule()
list_of_requirements = module.forward(description)
print(list_of_requirements)

# gpt3_turbo.inspect_history(n=1)

['Implement a blockchain contract to manage 50,000 tokens for a concert', 'Each token represents one ticket', 'Users can purchase one ticket each', 'Golden status users can buy up to three tickets to transfer to others', 'Ticket sales divided into two phases', 'First phase lasts 5 minutes', 'Second phase is triggered one week after the first phase ends', 'Compensation for cancelled event includes extra 25% for Golden ticket holders, 5% for Platinum, and no extra for Bronze']


In [42]:
import pydantic
from typing import List

class Requirement(pydantic.BaseModel):
    Name: str
    Scope: str
    Input: List[str]
    Constraints: str
    Output: str
    PrimaryScenario: str
    AlternativeScenario: str

class generate_attributes(dspy.Signature):
    """Generate attributes for the given attribute description."""
    smart_contract_description: str = dspy.InputField(desc="Context of the requirement")
    requirement_description: str = dspy.InputField()
    structured_requirement: Requirement = dspy.OutputField(desc="Structured list of requirement attributes")

class GenerateAttributesModule(dspy.Module):
    """A module to process multiple requirement descriptions into structured JSON."""

    def __init__(self):
        super().__init__()
        self.generate_answer = dspy.functional.TypedPredictor(generate_attributes)
    
    def forward(self, description: str, requirement: str) -> Requirement:
        pred = self.generate_answer(
            smart_contract_description=description,
            requirement_description=requirement
            )
        return pred.structured_requirement

req_module = GenerateAttributesModule()
# List of requirements has multiple strings inside.
for requirement in list_of_requirements:
    attributes = req_module.forward(description, requirement)
    print(attributes.dict())

# gpt3_turbo.inspect_history(n=1)

{'Name': 'Buyer Refund Request', 'Scope': 'Smart contract for refund process', 'Input': ['Buyer request', 'Sale status'], 'Constraints': 'Only seller can initiate refund process', 'Output': 'Fair compensation for buyer', 'PrimaryScenario': 'If sale in progress, refund of twice the initial value', 'AlternativeScenario': 'No refund if sale not in progress'}
{'Name': 'Compensation based on current state of sale', 'Scope': 'Smart contract refund process', 'Input': ['Buyer request for refund', 'Current state of the sale (in progress or cancelled)'], 'Constraints': 'Only seller authorized to initiate refund process', 'Output': 'Fair compensation for buyer if sale is cancelled', 'PrimaryScenario': 'If sale is in progress and seller decides not to proceed, buyer receives a refund of twice the value paid initially', 'AlternativeScenario': 'No refund issued if sale is not in progress'}
{'Name': 'Seller Authorization for Refund Initiation', 'Scope': 'Smart Contract for Buyer Refunds', 'Input': ['