# Generate example conversations to fine-tune (and test) the LLM for BibleAssistant agent

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
from pathlib import Path

current_dir = Path().resolve()

#sys.path.append(str(current_dir))
sys.path.append(str(current_dir.parent))

In [3]:
import os
import numpy as np
import json
import random
import sefaria_code as sef
import bibleAssistant.agent as bagent
import bibleAssistant.bible_tools as bblt
import typo

In [4]:
dummy_agent = bagent.Agent("dummy")
system_prompt = dummy_agent.system_instructions
print(system_prompt)

You are a research assistant that always responds using a JSON object with fields "tool" and "arguments".

To respond normally to the user, use:
{"tool": "respond_to_user", "arguments":{"text": "<text to show the user>"}}

To call a tool, use:
{"tool": "<tool_name>", "arguments":{ ... }}

After you call a tool, you will receive a message with role "user" containing a JSON object.
The tool message always includes "tool_name" and "status".

If "status" is "ok":
- The message will include a "result" object.
- Read "result.text".
- Respond using "respond_to_user" and copy "result.text" exactly as-is.

If "status" is "error":
- The message will include an "error_message".
- If the error message is clear enough (e.g., if the user spelled a book name wrong and it is clear which book the user intended), you can call the tool again with the corrected arguments.
- Otherwise, respond using "respond_to_user" and copy "error_message" exactly as-is (to let the user tell you what to do next).

Rules:

In [5]:
def get_user_lookup_variations():
    variations = [
        "Please get me the biblical verse from the book of {book}, version '{version}', chapter {chapter_num} verse {verse_num}",
        "Give me verse {verse_num} from chapter {chapter_num} in the '{version}' version of {book}.",
        "Get me {book} {chapter_num}:{verse_num} ('{version}' version).",
        "Show me {book} chapter {chapter_num}, verse {verse_num}, in the '{version}' version.",
        "I want to read {book} {chapter_num}:{verse_num} from the '{version}' version.",
        "Lookup {book} {chapter_num}:{verse_num} in the '{version}' text.",
        "Fetch the verse {chapter_num}:{verse_num} from {book} ({version}).",
        "Could you retrieve {book} chapter {chapter_num} verse {verse_num} in '{version}'?",
        "Please provide {book} {chapter_num}:{verse_num} from the '{version}' edition.",
        "Give me the text of {book} {chapter_num}:{verse_num} in '{version}'.",
        "Retrieve the verse {verse_num} in chapter {chapter_num} of {book}, '{version}' version.",
        "I'd like to see {book} {chapter_num}:{verse_num} in the '{version}' translation.",
        "Pull up {book} chapter {chapter_num}, verse {verse_num} ('{version}').",
        "Can you get me {book} {chapter_num}:{verse_num} from the '{version}' version?",
        "Please show {book} {chapter_num}:{verse_num} using the '{version}' version.",
        "What does {book} {chapter_num}:{verse_num} say in the '{version}' version?",
        "Give me the verse located at {book} {chapter_num}:{verse_num} ('{version}').",
        "I'd like the '{version}' text for {book} {chapter_num}:{verse_num}."
    ]
    return variations

def get_user_lookup_corrected_version_variations():
    variations = [
        "oh sorry. try version {version}",
        "i misspelled it should be {version}",
        "use '{version}'",
        "let me correct: {book} {chapter_num}:{verse_num} version '{version}'",
        "oh then pick '{version}' version"
    ]
    return variations

def synth_lookup_verse_version_typo(book, version, chapter_num, verse_num):
    typo_options = [
        (typo.StrErrer(version).char_swap().result, 'char_swap'),
        (typo.StrErrer(version).extra_char().result, 'extra_char'),
        (typo.StrErrer(version).missing_char().result, 'missing_char'),
        (typo.StrErrer(version).nearby_char().result, 'nearby_char'),
        (typo.StrErrer(version).repeated_char().result, 'repeated_char')
    ]
    (wrong_version, typo_class) = random.choice(typo_options)

    tool_name = dummy_agent.TOOL_LOOKUP_VERSE
    wrong_args = {
            "version": wrong_version,
            "book": book,
            "chapter_num": chapter_num,
            "verse_num": verse_num
        }
    right_args = {
            "version": version,
            "book": book,
            "chapter_num": chapter_num,
            "verse_num": verse_num
        }

    user_req_variations = get_user_lookup_variations()
    variation1 = int(np.random.choice(len(user_req_variations), 1)[0])
    user_msg1 = user_req_variations[variation1].format_map(wrong_args)
    llm_msg1 = json.dumps({
        dummy_agent.KEY_TOOL: dummy_agent.TOOL_LOOKUP_VERSE,
        dummy_agent.KEY_ARGS: wrong_args}, ensure_ascii=False)
    try:
        lookup_result_obj = bblt.lookup_verse(**wrong_args)
        print(f"!!! Suspicious. We expected this trial to fail because of wrong args ({typo_class}) {wrong_args}")
        return None
    except Exception as ex:
        error_msg = str(ex)

    tool_msg1 = json.dumps({
        "tool_name": tool_name,
        "status": "error",
        "error_message": error_msg
    }, ensure_ascii=False)
    llm_msg2 = json.dumps({
        dummy_agent.KEY_TOOL: dummy_agent.TOOL_RESPOND_TO_USER,
        dummy_agent.KEY_ARGS: {"text": error_msg}}, ensure_ascii=False)
    
    user_correct_variations = get_user_lookup_corrected_version_variations()
    variation2 = int(np.random.choice(len(user_correct_variations), 1)[0])
    user_msg2 = user_correct_variations[variation2].format_map(right_args)

    llm_msg3 = json.dumps({
        dummy_agent.KEY_TOOL: dummy_agent.TOOL_LOOKUP_VERSE,
        dummy_agent.KEY_ARGS: right_args}, ensure_ascii=False)

    try:
        lookup_result_obj = bblt.lookup_verse(**right_args)
        verse_text = lookup_result_obj["text"]
        if not isinstance(verse_text, str):
            raise ValueError("Verse text must be a string")
    except Exception as ex:
        print(f"!!! Suspicious. We expected this to succeed, but got {str(ex)}")
        return None

    tool_resp = json.dumps({
        "tool_name": tool_name,
        "status": "ok",
        "result": lookup_result_obj
    } ,ensure_ascii=False)
    llm_msg4 = json.dumps({
        dummy_agent.KEY_TOOL: dummy_agent.TOOL_RESPOND_TO_USER,
        dummy_agent.KEY_ARGS: {"text": verse_text}}, ensure_ascii=False)

    messages = [
        {"role": dummy_agent.ROLE_SYSTEM, "content": dummy_agent.system_instructions},
        {"role": dummy_agent.ROLE_USER, "content": user_msg1},
        {"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg1},
        {"role": dummy_agent.ROLE_TOOL, "content": tool_msg1},
        {"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg2},
        {"role": dummy_agent.ROLE_USER, "content": user_msg2},
        {"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg3},
        {"role": dummy_agent.ROLE_TOOL, "content": tool_resp},
        {"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg4}
    ]
    metadata = {
        "scenario": "lookup_verse_typo_version",
        "variation1": variation1,
        "variation2": variation2,
        "typo_class": typo_class,
        "wrong_args": wrong_args,
        "right_args": right_args
    }
    example = {"metadata": metadata, "messages": messages}
    return example

def synth_lookup_verse_book_typo(book, version, chapter_num, verse_num):
    typo_options = [
        (typo.StrErrer(book).char_swap().result, 'char_swap'),
        (typo.StrErrer(book).extra_char().result, 'extra_char'),
        (typo.StrErrer(book).missing_char().result, 'missing_char'),
        (typo.StrErrer(book).nearby_char().result, 'nearby_char'),
        (typo.StrErrer(book).repeated_char().result, 'repeated_char')
    ]
    typo_options = list(filter(lambda book_name:book_name not in bblt.supported_books, typo_options))
    if not typo_options:
        return None # All the typos by chance are valid book names
    (wrong_book, typo_class) = random.choice(typo_options)
    # More variations cap/small:
    (wrong_book, typo_class) = random.choice([
        (wrong_book, typo_class),
        (wrong_book[0].upper() + wrong_book[1:], typo_class + "_cap"),
        (wrong_book.upper(), typo_class + "_allcaps"),
    ])

    tool_name = dummy_agent.TOOL_LOOKUP_VERSE
    wrong_args = {
            "version": version,
            "book": wrong_book,
            "chapter_num": chapter_num,
            "verse_num": verse_num
        }
    right_args = {
            "version": version,
            "book": book,
            "chapter_num": chapter_num,
            "verse_num": verse_num
        }

    user_req_variations = get_user_lookup_variations()
    variation1 = int(np.random.choice(len(user_req_variations), 1)[0])
    user_msg1 = user_req_variations[variation1].format_map(wrong_args)
    llm_msg1 = json.dumps({
        dummy_agent.KEY_TOOL: dummy_agent.TOOL_LOOKUP_VERSE,
        dummy_agent.KEY_ARGS: wrong_args}, ensure_ascii=False)
    try:
        lookup_result_obj = bblt.lookup_verse(**wrong_args)
        print(f"!!! Suspicious. We expected this trial to fail because of wrong args ({typo_class}) {wrong_args}")
        return None
    except Exception as ex:
        error_msg = str(ex)

    tool_msg1 = json.dumps({
        "tool_name": tool_name,
        "status": "error",
        "error_message": error_msg
    }, ensure_ascii=False)

    # Book name is easy. Skip surfacing error to user - the LLM should immediately interpret the error message and initiate another tool call with the right arguments:
    llm_msg3 = json.dumps({
        dummy_agent.KEY_TOOL: dummy_agent.TOOL_LOOKUP_VERSE,
        dummy_agent.KEY_ARGS: right_args}, ensure_ascii=False)

    try:
        lookup_result_obj = bblt.lookup_verse(**right_args)
        verse_text = lookup_result_obj["text"]
        if not isinstance(verse_text, str):
            raise ValueError("Verse text must be a string")
    except Exception as ex:
        print(f"!!! Suspicious. We expected this to succeed, but got {str(ex)}")
        return None

    tool_resp = json.dumps({
        "tool_name": tool_name,
        "status": "ok",
        "result": lookup_result_obj
    } ,ensure_ascii=False)
    llm_msg4 = json.dumps({
        dummy_agent.KEY_TOOL: dummy_agent.TOOL_RESPOND_TO_USER,
        dummy_agent.KEY_ARGS: {"text": verse_text}}, ensure_ascii=False)

    messages = [
        {"role": dummy_agent.ROLE_SYSTEM, "content": dummy_agent.system_instructions},
        {"role": dummy_agent.ROLE_USER, "content": user_msg1},
        {"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg1},
        {"role": dummy_agent.ROLE_TOOL, "content": tool_msg1},
        {"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg3},
        {"role": dummy_agent.ROLE_TOOL, "content": tool_resp},
        {"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg4}
    ]
    metadata = {
        "scenario": "lookup_verse_typo_book",
        "variation1": variation1,
        "typo_class": typo_class,
        "wrong_args": wrong_args,
        "right_args": right_args
    }
    example = {"metadata": metadata, "messages": messages}
    return example

def synth_lookup_verse_ok(book, version, chapter_num, verse_num):
    tool_name = dummy_agent.TOOL_LOOKUP_VERSE
    tool_args = {
            "version": version,
            "book": book,
            "chapter_num": chapter_num,
            "verse_num": verse_num
        }
    
    variations = get_user_lookup_variations()
    variation = int(np.random.choice(len(variations), 1)[0])
    user_msg = variations[variation].format_map(tool_args)
    llm_msg1 = json.dumps({
        dummy_agent.KEY_TOOL: dummy_agent.TOOL_LOOKUP_VERSE,
        dummy_agent.KEY_ARGS: tool_args}, ensure_ascii=False)
    try:
        lookup_result_obj = bblt.lookup_verse(**tool_args)
        verse_text = lookup_result_obj["text"]
        if not isinstance(verse_text, str):
            raise ValueError("Verse text must be a string")
    except Exception as ex:
        print(f"!!! Suspicious. We expected this to succeed, but got {str(ex)}")
        return None

    tool_resp = json.dumps({
        "tool_name": tool_name,
        "status": "ok",
        "result": lookup_result_obj
    } ,ensure_ascii=False)
    llm_msg2 = json.dumps({
        dummy_agent.KEY_TOOL: dummy_agent.TOOL_RESPOND_TO_USER,
        dummy_agent.KEY_ARGS: {"text": verse_text}}, ensure_ascii=False)
    
    messages = [
        {"role": dummy_agent.ROLE_SYSTEM, "content": dummy_agent.system_instructions},
        {"role": dummy_agent.ROLE_USER, "content": user_msg},
        {"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg1},
        {"role": dummy_agent.ROLE_TOOL, "content": tool_resp},
        {"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg2}
    ]
    metadata = {
        "scenario": "lookup_verse_ok",
        "variation": variation,
        "args": tool_args
    }
    example = {"metadata": metadata, "messages": messages}
    return example

def synth_chitchat_examples():
    pairs = [
        ("Hi there, what can you help me with?", "I can assist you with analyzing biblical texts."),
        ("Hello", "Hi. How can I help you?"),
        ("Hello there!", "Hello to you. How can I assist you?"),
        ("How do I ask you for a biblical verse?", "Just tell me which biblical book you want, in which version/translation, the chapter number and verse number.")
    ]
    examples = [
        {
            "messages":[
                {"role": dummy_agent.ROLE_SYSTEM, "content": dummy_agent.system_instructions},
                {"role": dummy_agent.ROLE_USER, "content": pair[0]},
                {"role": dummy_agent.ROLE_ASSISTANT, "content": pair[1]}
            ],
            "metadata": {"scenario": "chitchat"}
        }
        for pair in pairs]
    return examples

def create_examples():
    ratio = 0.05
    examples = []
    examples.extend(synth_chitchat_examples())
    synth_functions = [
        synth_lookup_verse_ok, 
        synth_lookup_verse_version_typo,
        synth_lookup_verse_book_typo
        ]
    for book in bblt.supported_books:
        # Load the whole book just once, then make individual calls to the tool to get authentic tool responses
        book_verses = sef.sefaria_read_verses_and_metadata(book, bblt.supported_versions[0], strip_html=True)
        book_verse_index = [{'chapter': item['chapter_num'], 'verse': item['verse_num']} for item in book_verses] # keep only verse "index"
        bn = len(book_verses)
        for version in bblt.supported_versions:
            print(f"book '{book}' ({bn} verses), version '{version}'")
            sample_verse_dicts = np.random.choice(book_verse_index, size=int(ratio*bn), replace=False)
            for verse_index in sample_verse_dicts:
                synth_func = random.choice(synth_functions)
                example = synth_func(book, version, verse_index['chapter'], verse_index['verse'])
                if example:
                    examples.append(example)

    return examples

In [6]:
examples = create_examples()
print(len(examples))

book 'genesis' (1533 verses), version 'he.text_only'
!!! Suspicious. We expected this trial to fail because of wrong args (nearby_char) {'version': 'he.text_only', 'book': 'genesis', 'chapter_num': 27, 'verse_num': 26}
book 'genesis' (1533 verses), version 'he.masorah'
book 'genesis' (1533 verses), version 'en.new.jps1917'
book 'genesis' (1533 verses), version 'en.koren'
book 'exodus' (1210 verses), version 'he.text_only'
book 'exodus' (1210 verses), version 'he.masorah'
book 'exodus' (1210 verses), version 'en.new.jps1917'
book 'exodus' (1210 verses), version 'en.koren'
book 'leviticus' (859 verses), version 'he.text_only'
book 'leviticus' (859 verses), version 'he.masorah'
book 'leviticus' (859 verses), version 'en.new.jps1917'
book 'leviticus' (859 verses), version 'en.koren'
book 'numbers' (1288 verses), version 'he.text_only'
book 'numbers' (1288 verses), version 'he.masorah'
book 'numbers' (1288 verses), version 'en.new.jps1917'
book 'numbers' (1288 verses), version 'en.koren'
bo

In [7]:
set([ex['metadata']['scenario'] for ex in examples])

{'chitchat',
 'lookup_verse_ok',
 'lookup_verse_typo_book',
 'lookup_verse_typo_version'}

In [8]:
[(ex['metadata']['typo_class'],ex['metadata']['wrong_args']['book']) for ex in examples if ex['metadata']['scenario']=='lookup_verse_typo_book']

[('char_swap', 'egnesis'),
 ('nearby_char_allcaps', 'GENEZIS'),
 ('extra_char_cap', 'Gensesis'),
 ('missing_char_cap', 'Gnesis'),
 ('missing_char_allcaps', 'GENEIS'),
 ('char_swap_cap', 'Geensis'),
 ('char_swap', 'geneiss'),
 ('missing_char_allcaps', 'GENEIS'),
 ('missing_char_cap', 'Geesis'),
 ('char_swap', 'geensis'),
 ('nearby_char_allcaps', 'HENESIS'),
 ('nearby_char_cap', 'Genesiz'),
 ('repeated_char_allcaps', 'GENESSIS'),
 ('nearby_char', 'fenesis'),
 ('missing_char_cap', 'Gnesis'),
 ('char_swap_allcaps', 'GENESSI'),
 ('nearby_char', 'gsnesis'),
 ('missing_char_cap', 'Gnesis'),
 ('extra_char_cap', 'Gsenesis'),
 ('extra_char_allcaps', 'GWENESIS'),
 ('extra_char_allcaps', 'GENESIXS'),
 ('repeated_char_cap', 'Genesiss'),
 ('extra_char_allcaps', 'GENESOIS'),
 ('nearby_char_cap', 'Gebesis'),
 ('char_swap', 'gneesis'),
 ('extra_char', 'gejnesis'),
 ('extra_char_allcaps', 'GENESOIS'),
 ('repeated_char', 'genessis'),
 ('missing_char_allcaps', 'GENESS'),
 ('char_swap', 'genseis'),
 ('repe

In [9]:
[(ex['metadata']['typo_class'],ex['metadata']['wrong_args']['version']) for ex in examples if ex['metadata']['scenario']=='lookup_verse_typo_version']

[('nearby_char', 'he.text_omly'),
 ('missing_char', 'e.text_only'),
 ('extra_char', 'he.text_ohnly'),
 ('repeated_char', 'he.texxt_only'),
 ('char_swap', 'he.tetx_only'),
 ('nearby_char', 'je.text_only'),
 ('repeated_char', 'he.texxt_only'),
 ('missing_char', 'e.text_only'),
 ('missing_char', 'he.text_ony'),
 ('missing_char', 'he.tet_only'),
 ('char_swap', 'he.text_onyl'),
 ('missing_char', 'he.ext_only'),
 ('nearby_char', 'he.text_onky'),
 ('char_swap', 'he.text_onyl'),
 ('repeated_char', 'he.textt_only'),
 ('nearby_char', 'he.text_onoy'),
 ('nearby_char', 'he.text_onky'),
 ('extra_char', 'he.text__only'),
 ('nearby_char', 'he.gext_only'),
 ('nearby_char', 'be.text_only'),
 ('repeated_char', 'he.text_onnly'),
 ('nearby_char', 'he.tsxt_only'),
 ('missing_char', 'he.text_ony'),
 ('repeated_char', 'he.texxt_only'),
 ('missing_char', 'he.tex_only'),
 ('missing_char', 'he.txt_only'),
 ('char_swap', 'he.tetx_only'),
 ('char_swap', 'he.masroah'),
 ('repeated_char', 'hhe.masorah'),
 ('nearby_

In [13]:
set([ex['messages'][0]['role'] for ex in examples])

{'system'}

In [21]:
examples[:2]

[{'metadata': {'scenario': 'lookup_verse_typo_book',
   'variation1': 15,
   'typo_class': 'extra_char',
   'wrong_args': {'version': 'he.text_only',
    'book': 'bgenesis',
    'chapter_num': 43,
    'verse_num': 34},
   'right_args': {'version': 'he.text_only',
    'book': 'genesis',
    'chapter_num': 43,
    'verse_num': 34}},
  'messages': [{'role': 'system',
    'content': 'You are a research assistant that always responds using a JSON object with fields "tool" and "arguments".\n\nTo respond normally to the user, use:\n{"tool": "respond_to_user", "arguments":{"text": "<text to show the user>"}}\n\nTo call a tool, use:\n{"tool": "<tool_name>", "arguments":{ ... }}\n\nAfter you call a tool, you will receive a message with role "user" containing a JSON object.\nThe tool message always includes "tool_name" and "status".\n\nIf "status" is "ok":\n- The message will include a "result" object.\n- Read "result.text".\n- Respond using "respond_to_user" and copy "result.text" exactly as-is.

## Save data

In [10]:
dev_folder = os.path.abspath("../data/dev")
os.listdir(dev_folder)

['lookup_verse.1.test.jsonl',
 'lookup_verse.1.train.jsonl',
 'translate1.test1.jsonl',
 'translate1.train1.jsonl']

In [11]:
trainset_file = os.path.join(dev_folder, "lookup_verse.2.train.jsonl")
testset_file = os.path.join(dev_folder, "lookup_verse.2.test.jsonl")
cutoff = int(len(examples)*0.75)

with open(trainset_file, "w", encoding="utf-8") as f:
    for ex in examples[:cutoff]:
        f.write(json.dumps(ex, ensure_ascii=False) + "\n")

with open(testset_file, "w", encoding="utf-8") as f:
    for ex in examples[cutoff:]:
        f.write(json.dumps(ex, ensure_ascii=False) + "\n")

In [12]:
cutoff

1264