1. Load splited dataset, divide the dataset into trainset and devset
2. Check if the text can be modified to argumentation.

In [3]:
import os
os.chdir('../..')

import dspy
from dspy.evaluate.evaluate import Evaluate
from dspy.predict import Retry
from dspy.teleprompt import BootstrapFewShotWithRandomSearch
from dspy.primitives.assertions import assert_transform_module, backtrack_handler
from dspy.datasets import DataLoader
import random
import json
from dotenv import load_dotenv

from icecream import ic

load_dotenv()
colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')
# os.getenv("OPENAI_API_KEY")
# dspy.settings.configure(rm=colbertv2_wiki17_abstracts)  
turbo = dspy.OpenAI(model='gpt-3.5-turbo-0125', api_key = os.getenv("OPENAI_API_KEY"))
dspy.settings.configure(lm=turbo, trace=[], temperature=0.7, rm=colbertv2_wiki17_abstracts)

dl = DataLoader()

dataset = dl.from_csv("data/wikibooks_splited.csv", fields=("text",), input_keys=("text",))

def can_be_modified(example):
    # TODO: Check if the text can be modified to argumentation.
    raise NotImplementedError

splits = dl.train_test_split(dataset, train_size=0.8) 
trainset = splits['train']
devset = splits['test']

print(len(trainset), len(devset))
print(trainset[0], devset[0], sep="\n")


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4387.35it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 239.54it/s]
Generating train split: 1923 examples [00:00, 15716.96 examples/s]


1538 385
Example({'text': '["Ada Distilled by Richard Riehle\\nPage 108 of 113\\nJohnston, Simon, Ada 95 for C and C++ Programmers, Addison-Wesley, 1997\\nKain, Richard Y., Computer Architecture, Prentice-Hall, 1989 (because software examples are in Ada)\\nLoftus, Chris (editor), Ada Yearbook - 1994, IOS Press, 1994\\nMeyer, Bertrand, Object-Oriented Software Construction, 2nd Editon, Prentice-Hall PTR, 1997 (Not friendly to Ada, but an excellent treatment of object-oriented programming)\\nNaiditch, David, Rendezvous with Ada 95, John Wiley & Sons, 1995 (0-471-01276-9)\\nRosen, Jean Pierre, ……………………….. HOOD …………………..\\nPressman, Roger, Software Engineering, A Practitioner’s Approach, Fourth Edition, McGraw-Hill, 1997\\nSalus, Peter H, Handbook of Programming Languages, Vol 1, Object-Oriented Programming Languages, MacMillan Technical Publishing, 1998 , ISBN 1-57870-009-4\\nSigfried, Stefan, Understanding Object-Oriented Software Engineering, IEEE Press, 1995\\nSkansholm, Jan, Ada From 

1. dspy.Signature: AssessSatementQuality
   1. format_valid_metric
   2. meaningful_metric
   3. dependence_metric
   4. overall_metric
2. dspy.Signature: GenerateStatements
3. dspy.Module: StatementsGenerator
   1. dspy.ChainOfThought
   2. dspy.Suggest
4. statements_generator with assertion

In [5]:
def format_checker(statements_string):
    try:
        statements = json.loads(statements_string)
        return isinstance(statements, list) and all(isinstance(item, str) for item in statements)
    except json.JSONDecodeError:
        # print("\n\n\nformat_checker Error: Invalid format.\n\n\n")
        return False
    
def yes_in_string(s):
    # return "yes" in s.split('\n').lower()
    # return s.split()[0].lower() == "yes"
    return "yes" in s.lower()

def format_valid_metric(example, pred, trace = None):
    score  = format_checker(pred.statements)
    return score

class AssessSatementQuality(dspy.Signature):
    text = dspy.InputField()
    statements = dspy.InputField()
    assessment_question = dspy.InputField()
    assessment_answer = dspy.OutputField(desc = "yes or no")

# def relevant_metric(example, pred, trace = None):
#     text, statements = example.text, json.loads(pred.statements)
#     question = "Are these statements relevant to the text and accurate?"
#     relevant_assessment = dspy.Predict(AssessSatementQuality, text=text, statements=statements, assessment_question=question)
#     return yes_in_string(meaningful_assessment.assessment_answer)

def meaningful_metric(example, pred, trace = None):
    text, statements = example.text, pred.statements
    question = "Are these statements sufficiently meaningful and specific and can serve as a central argument in an argumentative essay (shoubld be a point to prove, not a fact that has been defined)?"
    meaningful_assessment = dspy.Predict(AssessSatementQuality)(text=text, statements=statements, assessment_question=question)
    score = yes_in_string(meaningful_assessment.assessment_answer)
    return score


def dependence_metric(example, pred, trace = None):
    text, statements = example.text, pred.statements
    question = "Are these statements independent from the text (Don't mention the text itself directly. Can use concept in text)? \nFor example, \"The text provides...\" is \"no\""
    dependence_assessment = dspy.Predict(AssessSatementQuality)(text=text, statements=statements, assessment_question=question)
    # print(text, statements, dependence_assessment.assessment_answer,sep="\n")
    # print("\n")
    score = yes_in_string(dependence_assessment.assessment_answer)
    return score
    
class AssessSatementQuality(dspy.Signature):
    text = dspy.InputField()
    statements = dspy.InputField()
    assessment_question = dspy.InputField()
    assessment_answer = dspy.OutputField(desc = "yes or no")

    
def overall_metric(example, pred, trace = None):
    return (format_valid_metric(example, pred, trace) + meaningful_metric(example, pred) + dependence_metric(example, pred)) / 3

metrics = [format_valid_metric, meaningful_metric, dependence_metric,overall_metric]

class GenerateStatements(dspy.Signature):
    text = dspy.InputField()
    number_of_statement = dspy.InputField(desc = "The length of the JSON list")
    statements = dspy.OutputField(desc = "One independent statement(s) JSON list with the format like [\"statement\"]. The length of the JSON list is 1. Each statement is a brief opinion relevant to the text. Your response should begin with '[' and end with ']'.")

class StatementsGenerator(dspy.Module):
    def __init__(self):
        super().__init__()
        self.generate_statements = dspy.ChainOfThought(GenerateStatements)
        
    def forward(self, text):
        statements = self.generate_statements(text=text, number_of_statement = 'number_of_statement').statements
        
        # ic(statements)
        
        dspy.Suggest(format_checker(statements), "The output should be one JSON list like [\"statement\"]. Begin with '[' and end with ']'. Please revise accordingly", target_module = GenerateStatements)
        
        # dspy.Suggest(relevant_metric(text, dspy.Prediction(statements=statements)), "Statements should be relevant to the context. Please revise accordingly", target_module = GenerateStatements)
        
        question = "Are these statements sufficiently meaningful and specific and can serve as a central argument in an argumentative essay (shoubld be a point to prove, not a fact that has been defined)?"
        meaningful_assessment = dspy.Predict(AssessSatementQuality)(text=text, statements=statements, assessment_question=question)        
        dspy.Suggest(yes_in_string(meaningful_assessment.assessment_answer), "The statements should be meaningful and specific and should be a poin to prove instead of a fact that has been defined. Please revise accordingly", target_module = GenerateStatements) # TODO: Improve the metric and assertion.
        
        question = "Are these statements independent from the text (Don't mention the text itself directly. Can use concept in text)? \nFor example, \"The text provides...\" is \"no\""
        dependence_assessment = dspy.Predict(AssessSatementQuality)(text=text, statements=statements, assessment_question=question)
        dspy.Suggest(yes_in_string(dependence_assessment.assessment_answer), "The statements cannot mention the text itself itself directly like \"The text provides...\". Please revise accordingly to make the statements understandable without the text", target_module = GenerateStatements)
        
        return dspy.Prediction(statements=statements)

statements_generator = assert_transform_module(StatementsGenerator().map_named_predictors(Retry), backtrack_handler)

<class '__main__.StatementsGenerator'>


Evaluate statements_generator_with_assertion.

In [None]:
statements_generator(text=devset[0].text) # The ussage of statements_generator.
for metric in metrics:
    evaluate = Evaluate(metric=metric, devset=devset[:10], num_threads=1, display_progress=True, display_table=10)
    evaluate(statements_generator)

Compile the statements_generator by using BootstrapFewShotWithRandomSearch

In [None]:
teleprompter = BootstrapFewShotWithRandomSearch(metric = overall_metric,  max_bootstrapped_demos=2, num_candidate_programs=3) 

compiled_statements_generator = teleprompter.compile(student=statements_generator, teacher = statements_generator, trainset=trainset, valset=devset[:100])

compiled_statements_generator.save("result/genre_transformation/compiled_statements_generator")

Going to sample between 1 and 2 traces per predictor.
Will attempt to train 3 candidate sets.



[A
[A
[A

Error for example in dev set: 		 Request timed out.



[A
[A

Error for example in dev set: 		 Request timed out.



[A
[A

Error for example in dev set: 		 Request timed out.



[A
[A

Error for example in dev set: 		 Request timed out.



[A
[A
[A

Error for example in dev set: 		 Request timed out.



[A
[A




[A
[A
[A

Error for example in dev set: 		 Request timed out.



[A
[A

Error for example in dev set: 		 Request timed out.



[A
[A

Error for example in dev set: 		 Request timed out.



[A
[A

Error for example in dev set: 		 Request timed out.


Average Metric: 0.0 / 9  (0.0):   9%|▉         | 9/100 [3:15:49<33:00:04, 1305.54s/it]




APITimeoutError: Request timed out.

In [None]:
for metric in metrics:
    evaluate = Evaluate(metric=metric, devset=devset[:10], num_threads=1, display_progress=True, display_table=10)
    evaluate(compiled_statements_generator)

In [None]:
class GenerateEssay(dspy.Signature):
    text = dspy.InputField()
    statement = dspy.InputField()
    reference = dspy.InputField("may contatin information about the statement to help you write the essay")
    essay = dspy.OutputField(desc = "One argumentative essay based on text and statement.")

class AssessEssayQuality(dspy.Signature):
    text = dspy.InputField()
    statement = dspy.InputField()
    essay = dspy.InputField()
    assessment_question = dspy.InputField()
    assessment_answer = dspy.OutputField(desc = "yes or no")
    
# def fomat_checker(essay_string):
#     return essay_string.count("---") >= 2

def quality_metric(example, pred, trace = None):
    text, statement, essay = example.text, example.essay, pred.statements
    question = "Are the essay's arguments well-supported by evidence from the text and presented with a coherent structure?"
    quality_assessment = dspy.Predict(AssessSatementQuality)(text=text, statement=statement, essay = essay, assessment_question=question)
    score = yes_in_string(quality_assessment.assessment_answer)
    return score

class EssayGenerator(dspy.Module):
    def __init__(self, num_passages = 2):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_essay = dspy.ChainOfThought(GenerateEssay)
        
    def forward(self, text):
        statements_list = json.loads(compiled_statements_generator(text=text).statements)
        statement = json.loads(statements_list[0])
        reference = self.retrieve(statement).passages
        essay = self.generate_essay(text=text, statement=statement, reference = reference).essay
        
        # dspy.Suggest(format_checker(essay), "The output should be enclosed by two \"---\". Please revise accordingly", target_module = GenerateEssay)
        
        question = "Are the essay's arguments well-supported by evidence from the text and presented with a coherent structure?"
        quality_assessment = dspy.Predict(AssessSatementQuality)(text=text, statement=statement, essay = essay, assessment_question=question)
        dspy.Suggest(yes_in_string(quality_assessment.assessment_answer), "The essay's arguments should be well-supported by evidence from the text and presented with a coherent structure. Please revise accordingly", target_module = GenerateEssay)
            
        return dspy.Prediction(essay=essay)

essay_generator = assert_transform_module(StatementsGenerator().map_named_predictors(Retry), backtrack_handler)

In [None]:
essay_evaluate = Evaluate(metric=quality_metric, devset=devset[:10], num_threads=1, display_progress=True, display_table=10)

essay_evaluate(essay_generator)

In [None]:
teleprompter = BootstrapFewShotWithRandomSearch(metric = quality_metric, max_bootstrapped_demos=1, num_candidate_programs=3) 

compiled_essay_generator = teleprompter.compile(student=essay_generator, teacher = essay_generator, trainset=trainset, valset=devset[:100])