# Generate example conversations to fine-tune (and test) the LLM for BibleAssistant agent

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
from pathlib import Path
current_dir = Path().resolve()
sys.path.append(str(current_dir.parent))

import os
import numpy as np
import json
import random
import sefaria.sefaria_code as sef
import bibleAssistant.agent as bagent
import bibleAssistant.bible_tools as bblt
import typo

## Supporting functions

In [4]:
class MetaField:
    SCENARIO = "scenario"
    TOOL_LIST = "tool_list_in_sys_prompt"
    SEQ_OF_SCENARIOS = "sequence_of_scenarios"
    SEQ_LEN = "sequence_length"

In [5]:
dummy_agent = bagent.Agent("dummy")

def add_system_message(example, needed_tools=None):
    (tool_list, system_prompt) = get_rand_system_prompt_variation(needed_tools)
    example['messages'] = [{"role": dummy_agent.ROLE_SYSTEM, "content": system_prompt}] + example['messages']
    example['metadata'][MetaField.TOOL_LIST] = tool_list
    return example

def format_llm_response_to_user(assistant_response):
    llm_msg = json.dumps({
        dummy_agent.KEY_TOOL: dummy_agent.TOOL_RESPOND_TO_USER,
        dummy_agent.KEY_ARGS: {dummy_agent.SUBKEY_TEXT: assistant_response}}, ensure_ascii=False)
    return llm_msg

def get_rand_system_prompt_variation(needed_tool_names:list[str]=None):
    if (not needed_tool_names) or (np.random.rand() > 0.5):
        # Include all the tools registered with the agent:
        tool_names = list(dummy_agent.tools.keys())
        tool_names.remove(dummy_agent.TOOL_RESPOND_TO_USER)
    else:
        tool_names = needed_tool_names

    random.shuffle(tool_names) # Get more variations this way
    system_prompt = dummy_agent._generate_system_instructions(tool_names=tool_names)
    return (tool_names, system_prompt)

In [6]:
print("Take a look at a few variations for the system prompt:\n\n")
for tool_names in [None, None, [dummy_agent.TOOL_LOOKUP_VERSE], [dummy_agent.TOOL_SEARCH_PHRASE], [dummy_agent.TOOL_SEARCH_PHRASE], [dummy_agent.TOOL_SEARCH_PHRASE]]:
    (tool_list, system_prompt) = get_rand_system_prompt_variation(tool_names)
    print("-"*40)
    print(f"Requested {tool_names}. Shuffled list {tool_list}. Got system prompt variation:")
    print(system_prompt)

Take a look at a few variations for the system prompt:


----------------------------------------
Requested None. Shuffled list ['lookup_verse', 'search_phrase']. Got system prompt variation:
You are a research assistant for biblical texts that always responds using a JSON object with fields "tool" and "arguments".

To respond normally to the user, use:
{"tool": "respond_to_user", "arguments":{"text": "<text to show the user>"}}

To call a tool, you need to indicate which tool to use and what arguments to send to it - use the structure:
{"tool": "<tool_name>", "arguments":{ ... }}

After you call a tool, you will receive a tool-response message from role "user" containing a JSON object.
The tool-response object always includes fields "tool_name" and "status".

If the tool call succeeded, the tool response object will have the structure:
{"tool_name": "<tool_name>", "status": "ok", "result": { ... }}
Different tools have different structures of the returned data inside the dictionary "r

### Generic scenarios:

In [18]:
def synth_chitchat_examples(n_examples, add_system_msg=True):
    print(f"... Generating {n_examples} chitchat scenarios ...")
    pairs = [
        ("Hi there, what can you help me with?", "I can assist you with analyzing biblical texts."),
        ("Hello", "Hi. How can I help you?"),
        ("how are you doing today?", "I am well, thank you."),
        ("hi", "Hello!"),
        ("I want your help.", "Sure. Let me know how I can assist you."),
        ("Hey can you help me?", "Sure, I can help you with Biblical texts."),
        ("Hello there!", "Hello to you. How can I assist you?"),
        ("How do I ask you for a biblical verse?", "Just tell me which biblical book you want, in which version/translation, the chapter number and verse number.")
    ]

    examples = []
    for i in range(n_examples):
        pair = random.choice(pairs)
        llm_msg = format_llm_response_to_user(pair[1])
        example = {
            "messages":[
                {"role": dummy_agent.ROLE_USER, "content": pair[0]},
                {"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg}
            ],
            "metadata": {
                MetaField.SCENARIO: "chitchat",
                MetaField.TOOL_LIST: tool_list
                }
                }
        if add_system_msg:
            example = add_system_message(example, [])
        examples.append(example)
    return examples

def synth_tool_menu(n_examples):
    """
    TODO
    User can ask things like "what can you do?", "what tools can you use to help me research?", "can you find verses with a certain word in them?"
    Perhaps such examples will make the LLM better at deciding on the right tool at a given scenario?
    Perhaps this will help later, when the LLM needs to plan a sequence of tool calls.
    """
    return

In [19]:
chitchat_examples = synth_chitchat_examples(5)
bagent.AgentUI().display_convo(chitchat_examples[0]["messages"])

... Generating 5 chitchat scenarios ...


### Scenarios with lookup_verse tool:

In [21]:
def get_user_lookup_variations():
    variations = [
        "Please get me the biblical verse from the book of {book}, version '{version}', chapter {chapter_num} verse {verse_num}",
        "Give me verse {verse_num} from chapter {chapter_num} in the '{version}' version of {book}.",
        "Get me {book} {chapter_num}:{verse_num} ('{version}' version).",
        "Show me {book} chapter {chapter_num}, verse {verse_num}, in the '{version}' version.",
        "I want to read {book} {chapter_num}:{verse_num} from the '{version}' version.",
        "Lookup {book} {chapter_num}:{verse_num} in the '{version}' text.",
        "Fetch the verse {chapter_num}:{verse_num} from {book} ({version}).",
        "Could you retrieve {book} chapter {chapter_num} verse {verse_num} in '{version}'?",
        "Please provide {book} {chapter_num}:{verse_num} from the '{version}' edition.",
        "Give me the text of {book} {chapter_num}:{verse_num} in '{version}'.",
        "Retrieve the verse {verse_num} in chapter {chapter_num} of {book}, '{version}' version.",
        "I'd like to see {book} {chapter_num}:{verse_num} in the '{version}' translation.",
        "Pull up {book} chapter {chapter_num}, verse {verse_num} ('{version}').",
        "Can you get me {book} {chapter_num}:{verse_num} from the '{version}' version?",
        "Please show {book} {chapter_num}:{verse_num} using the '{version}' version.",
        "What does {book} {chapter_num}:{verse_num} say in the '{version}' version?",
        "Give me the verse located at {book} {chapter_num}:{verse_num} ('{version}').",
        "I'd like the '{version}' text for {book} {chapter_num}:{verse_num}."
    ]
    return variations

def get_user_lookup_corrected_version_variations():
    variations = [
        "oh sorry. try version {version}",
        "i misspelled it should be {version}",
        "use '{version}'",
        "let me correct: {book} {chapter_num}:{verse_num} version '{version}'",
        "oh then pick '{version}' version"
    ]
    return variations

def synth_lookup_verse_version_typo(book, version, chapter_num, verse_num):
    typo_options = [
        (typo.StrErrer(version).char_swap().result, 'char_swap'),
        (typo.StrErrer(version).extra_char().result, 'extra_char'),
        (typo.StrErrer(version).missing_char().result, 'missing_char'),
        (typo_nearby_char(version), 'nearby_char'),
        (typo.StrErrer(version).repeated_char().result, 'repeated_char')
    ]
    (wrong_version, typo_class) = random.choice(typo_options)

    tool_name = dummy_agent.TOOL_LOOKUP_VERSE
    wrong_args = {
            "version": wrong_version,
            "book": book,
            "chapter_num": chapter_num,
            "verse_num": verse_num
        }
    right_args = {
            "version": version,
            "book": book,
            "chapter_num": chapter_num,
            "verse_num": verse_num
        }

    user_req_variations = get_user_lookup_variations()
    variation1 = int(np.random.choice(len(user_req_variations), 1)[0])
    user_msg1 = user_req_variations[variation1].format_map(wrong_args)
    llm_msg1 = json.dumps({
        dummy_agent.KEY_TOOL: dummy_agent.TOOL_LOOKUP_VERSE,
        dummy_agent.KEY_ARGS: wrong_args}, ensure_ascii=False)
    try:
        lookup_result_obj = bblt.lookup_verse(**wrong_args)
        print(f"!!! Suspicious. We expected this trial to fail because of wrong args ({typo_class}) {wrong_args}")
        return None
    except Exception as ex:
        error_msg = str(ex)

    tool_msg1 = json.dumps({
        dummy_agent.KEY_RESP_TOOL_NAME: tool_name,
        dummy_agent.KEY_STATUS: dummy_agent.STATUS_ER,
        dummy_agent.KEY_ERROR: error_msg
    }, ensure_ascii=False)
    llm_msg2 = format_llm_response_to_user(error_msg)
    
    user_correct_variations = get_user_lookup_corrected_version_variations()
    variation2 = int(np.random.choice(len(user_correct_variations), 1)[0])
    user_msg2 = user_correct_variations[variation2].format_map(right_args)

    llm_msg3 = json.dumps({
        dummy_agent.KEY_TOOL: dummy_agent.TOOL_LOOKUP_VERSE,
        dummy_agent.KEY_ARGS: right_args}, ensure_ascii=False)

    try:
        lookup_result_obj = bblt.lookup_verse(**right_args)
        verse_text = lookup_result_obj["text"]
        if not isinstance(verse_text, str):
            raise ValueError("Verse text must be a string")
    except Exception as ex:
        print(f"!!! Suspicious. We expected this to succeed, but got {str(ex)}")
        return None

    tool_resp = json.dumps({
        dummy_agent.KEY_RESP_TOOL_NAME: tool_name,
        dummy_agent.KEY_STATUS: dummy_agent.STATUS_OK,
        dummy_agent.KEY_RESULT: lookup_result_obj
    } ,ensure_ascii=False)
    llm_msg4 = format_llm_response_to_user(verse_text)

    messages = [
        {"role": dummy_agent.ROLE_USER, "content": user_msg1},
        {"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg1},
        {"role": dummy_agent.ROLE_TOOL, "content": tool_msg1},
        {"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg2},
        {"role": dummy_agent.ROLE_USER, "content": user_msg2},
        {"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg3},
        {"role": dummy_agent.ROLE_TOOL, "content": tool_resp},
        {"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg4}
    ]
    metadata = {
        MetaField.SCENARIO: "lookup_verse_typo_version",
        "variation1": variation1,
        "variation2": variation2,
        "typo_class": typo_class,
        "wrong_args": wrong_args,
        "right_args": right_args
    }
    example = {"metadata": metadata, "messages": messages}
    return example

def typo_nearby_char(text):
    for attempt in range(3):
        text2 = typo.StrErrer(text).nearby_char().result
        if text2 != text:
            return text2
    return text

def synth_lookup_verse_book_typo(book, version, chapter_num, verse_num):
    typo_options = [
        (typo.StrErrer(book).char_swap().result, 'char_swap'),
        (typo.StrErrer(book).extra_char().result, 'extra_char'),
        (typo.StrErrer(book).missing_char().result, 'missing_char'),
        (typo_nearby_char(book), 'nearby_char'),
        (typo.StrErrer(book).repeated_char().result, 'repeated_char')
    ]
    typo_options = list(filter(lambda book_name:book_name not in bblt.supported_books, typo_options))
    if not typo_options:
        return None # All the typos by chance are valid book names
    (wrong_book, typo_class) = random.choice(typo_options)
    # More variations cap/small:
    (wrong_book, typo_class) = random.choice([
        (wrong_book, typo_class),
        (wrong_book[0].upper() + wrong_book[1:], typo_class + "_cap"),
        (wrong_book.upper(), typo_class + "_allcaps"),
    ])

    tool_name = dummy_agent.TOOL_LOOKUP_VERSE
    wrong_args = {
            "version": version,
            "book": wrong_book,
            "chapter_num": chapter_num,
            "verse_num": verse_num
        }
    right_args = {
            "version": version,
            "book": book,
            "chapter_num": chapter_num,
            "verse_num": verse_num
        }

    user_req_variations = get_user_lookup_variations()
    variation1 = int(np.random.choice(len(user_req_variations), 1)[0])
    user_msg1 = user_req_variations[variation1].format_map(wrong_args)
    llm_msg1 = json.dumps({
        dummy_agent.KEY_TOOL: dummy_agent.TOOL_LOOKUP_VERSE,
        dummy_agent.KEY_ARGS: wrong_args}, ensure_ascii=False)
    try:
        lookup_result_obj = bblt.lookup_verse(**wrong_args)
        print(f"!!! Suspicious. We expected this trial to fail because of wrong args ({typo_class}) {wrong_args}")
        return None
    except Exception as ex:
        error_msg = str(ex)

    tool_msg1 = json.dumps({
        dummy_agent.KEY_RESP_TOOL_NAME: tool_name,
        dummy_agent.KEY_STATUS: dummy_agent.STATUS_ER,
        dummy_agent.KEY_ERROR: error_msg
    }, ensure_ascii=False)

    # Book name is easy. Skip surfacing error to user - the LLM should immediately interpret the error message and initiate another tool call with the right arguments:
    llm_msg3 = json.dumps({
        dummy_agent.KEY_TOOL: dummy_agent.TOOL_LOOKUP_VERSE,
        dummy_agent.KEY_ARGS: right_args}, ensure_ascii=False)

    try:
        lookup_result_obj = bblt.lookup_verse(**right_args)
        verse_text = lookup_result_obj["text"]
        if not isinstance(verse_text, str):
            raise ValueError("Verse text must be a string")
    except Exception as ex:
        print(f"!!! Suspicious. We expected this to succeed, but got {str(ex)}")
        return None

    tool_resp = json.dumps({
        dummy_agent.KEY_RESP_TOOL_NAME: tool_name,
        dummy_agent.KEY_STATUS: dummy_agent.STATUS_OK,
        dummy_agent.KEY_RESULT: lookup_result_obj
    } ,ensure_ascii=False)
    llm_msg4 = format_llm_response_to_user(verse_text)

    messages = [
        {"role": dummy_agent.ROLE_USER, "content": user_msg1},
        {"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg1},
        {"role": dummy_agent.ROLE_TOOL, "content": tool_msg1},
        {"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg3},
        {"role": dummy_agent.ROLE_TOOL, "content": tool_resp},
        {"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg4}
    ]
    metadata = {
        MetaField.SCENARIO: "lookup_verse_typo_book",
        "variation1": variation1,
        "typo_class": typo_class,
        "wrong_args": wrong_args,
        "right_args": right_args
    }
    example = {"metadata": metadata, "messages": messages}
    return example

def synth_lookup_verse_ok(book, version, chapter_num, verse_num):
    tool_name = dummy_agent.TOOL_LOOKUP_VERSE
    tool_args = {
            "version": version,
            "book": book,
            "chapter_num": chapter_num,
            "verse_num": verse_num
        }
    
    variations = get_user_lookup_variations()
    variation = int(np.random.choice(len(variations), 1)[0])
    user_msg = variations[variation].format_map(tool_args)
    llm_msg1 = json.dumps({
        dummy_agent.KEY_TOOL: dummy_agent.TOOL_LOOKUP_VERSE,
        dummy_agent.KEY_ARGS: tool_args}, ensure_ascii=False)
    try:
        lookup_result_obj = bblt.lookup_verse(**tool_args)
        verse_text = lookup_result_obj["text"]
        if not isinstance(verse_text, str):
            raise ValueError("Verse text must be a string")
    except Exception as ex:
        print(f"!!! Suspicious. We expected this to succeed, but got {str(ex)}")
        return None

    tool_resp = json.dumps({
        dummy_agent.KEY_RESP_TOOL_NAME: tool_name,
        dummy_agent.KEY_STATUS: dummy_agent.STATUS_OK,
        dummy_agent.KEY_RESULT: lookup_result_obj
    } ,ensure_ascii=False)
    llm_msg2 = format_llm_response_to_user(verse_text)
    
    messages = [
        {"role": dummy_agent.ROLE_USER, "content": user_msg},
        {"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg1},
        {"role": dummy_agent.ROLE_TOOL, "content": tool_resp},
        {"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg2}
    ]
    metadata = {
        MetaField.SCENARIO: "lookup_verse_ok",
        "variation": variation,
        "args": tool_args
    }
    example = {"metadata": metadata, "messages": messages}
    return example

def synth_lookup_and_another_version(book, version, chapter_num, verse_num):
    other_versions = list(bblt.supported_versions)
    other_versions.remove(version)
    version2 = random.choice(other_versions)

    synth_functions = [
        synth_lookup_verse_ok, 
        synth_lookup_verse_version_typo,
        synth_lookup_verse_book_typo,
    ]
    func1 = random.choice(synth_functions)
    func2 = random.choice(synth_functions)
    example1 = func1(book, version, chapter_num, verse_num)
    if not example1:
        return None
    example2 = func2(book, version2, chapter_num, verse_num)
    if not example2:
        return None
    segue_phrases = [
        f"Great, now from '{version2}'",
        f"thnk you. also I want it from {version2}",
        f"good, I also want another version. {version2}.",
        f"thx. please now same verse from {version2} version.",
        f"and from {version2}.",
        f"good. give me also {chapter_num}:{verse_num} from the '{version2}' version",
        f"I want also the {version2} translation."
    ]
    seg_variation = int(np.random.choice(len(segue_phrases), 1)[0])
    whole_convo = example1["messages"]
    whole_convo.append({"role": dummy_agent.ROLE_USER, "content": segue_phrases[seg_variation]})
    whole_convo.extend(example2["messages"][1:]) # skip the original first user msg
    
    metadata = {
        MetaField.SCENARIO: "lookup_verse_ok_then_another_version",
        "part1": example1["metadata"],
        "segue_variation": seg_variation,
        "part2": example2["metadata"]
    }
    example = {
        "metadata": metadata,
        "messages": whole_convo
    }
    return example

def create_lookup_verse_examples(n_examples, add_system_msg=True):
    print(f"... Generating {n_examples} lookup_verse scenarios ...")
    examples = []
    synth_functions = [
        synth_lookup_verse_ok, 
        synth_lookup_verse_version_typo,
        synth_lookup_verse_book_typo,
        synth_lookup_and_another_version
        ]
    use_books = [sef.BookCode.GENESIS, sef.BookCode.EXODUS, sef.BookCode.DEUTERONOMY, sef.BookCode.JEREMIAH]
    n_ex_per_book = max([1, int(n_examples / len(use_books))]) # How many examples to generate per book to approximate the desired total n_example
    
    for book in use_books:
        # Load the whole book just once, then make individual calls to the tool to get authentic tool responses
        book_verses = sef.sefaria_read_verses_and_metadata(book, bblt.supported_versions[0], strip_html=True)
        book_verse_index = [{'chapter': item['chapter_num'], 'verse': item['verse_num']} for item in book_verses] # keep only verse "index"

        sample_verse_dicts = np.random.choice(book_verse_index, size=n_ex_per_book, replace=False)
        for verse_index in sample_verse_dicts:
            synth_func = random.choice(synth_functions)
            version = random.choice(bblt.supported_versions)
            example = synth_func(book, version, verse_index['chapter'], verse_index['verse'])

            if example:
                if add_system_msg:
                    example = add_system_message(example, [dummy_agent.TOOL_LOOKUP_VERSE])
                examples.append(example)

    return examples

In [22]:
lookup_examples = create_lookup_verse_examples(100)
bagent.AgentUI().display_convo(lookup_examples[0]["messages"])

... Generating 100 lookup_verse scenarios ...


### Scenarios with search_phrase tool:

* This tool is less straight forward than lookup_verse.
* How do I come up with phrases to search? Maybe myself manually create a finite list.
* What does the user want the agent to do with the found verses?
    * Show me N examples (reference and text / only text / only reference) (which N out of M? random? first? last? filter by condition?)
    * Are any of these verses from book X?


In [31]:
import time

def user_instruct_how_to_show_quotes(show_how):
    if show_how == 'ref_and_text':
        show_how_str = random.choice([
            "",
            "text and reference",
            "book, chapter:verse, then text",
            "both the index (book, chapter, verse) and the text of each verse"
        ])
    elif show_how == 'text':
        show_how_str = random.choice([
            "just the text",
            "text of the verse",
            "just text",
            "text only"
        ])
    elif show_how == "ref":
        show_how_str = random.choice([
            "just the reference",
            "book, chapter, verse reference"
        ])
    else:
        raise ValueError(f"!! Unsupported value for parameter show_how: '{show_how}'")
    return show_how_str

def synth_user_request_search_phrase(phrase, n_results_show, show_how, instruct_before):
    phrase_or_word = 'phrase' if (' ' in phrase) else 'word'
    show_how_str = user_instruct_how_to_show_quotes(show_how)
    s = "s" if (n_results_show>1) else ""
    if np.random.rand() > 0.5:
        if n_results_show == 1:
            n_results_show = 'one'
        elif n_results_show == 2:
            n_results_show = 'two'
        elif n_results_show == 3:
            n_results_show = 'three'
    if instruct_before:
        variations = [
            f"Please search for the {phrase_or_word} {phrase}. Then show me {n_results_show} example{s} with it {show_how_str}",
            f"Please find me example verses with '{phrase}'. I want to see {n_results_show} verse{s} ({show_how_str})",
            f"show me {n_results_show} verse{s} ({show_how_str}) that contain the {phrase_or_word} {phrase}.",
            f"search for '{phrase}' and show me {show_how_str} example{s} for {n_results_show} result{s}"
        ]
    else:
        variations = [
            f"where can we see the {phrase_or_word} {phrase}?",
            f"Show me ocurrences of '{phrase}' in the bible",
            f"I want verses that contain the {phrase_or_word} {phrase}"
        ]
    
    user_msg = random.choice(variations)
    user_msg = user_msg.replace("()", "").strip()
    return user_msg

def synth_llm_search_phrase_response(phrase, search_results, n_results_show, show_how):
    lines = []
    for i in range(n_results_show):
        if i >= len(search_results["results"]):
            break
        res = search_results["results"][i]
        ref_str = f"{res["book_name"]} {res["chapter_num"]}:{res["verse_num"]}"
        text_str = res["text"]
        if show_how == 'ref_and_text':
            line = f"[{ref_str}]: {text_str}"
        elif show_how == 'text':
            line = text_str
        elif show_how == "ref":
            line = ref_str
        else:
            raise ValueError(f"!! Unsupported value for parameter show_how: '{show_how}'")
        lines.append(line)

    if len(lines) > 0:
        llm_msg = '\n'.join(lines)
    else:
        llm_msg = f"Sorry. I didn't find any verses containing the phrase '{phrase}'."
    return llm_msg

def synth_search_phrase_user_add_instructions(n_results_show, show_how):
    show_how_str = user_instruct_how_to_show_quotes(show_how)
    s = "s" if (n_results_show>1) else ""
    if np.random.rand() > 0.5:
        if n_results_show == 1:
            n_results_show = 'one'
        elif n_results_show == 2:
            n_results_show = 'two'
        elif n_results_show == 3:
            n_results_show = 'three'
    
    variations = [
        f"Great. Now show me {n_results_show} example{s} ({show_how_str})",
        f"Thank you. Now let me see {n_results_show} example{s} from what you found. {show_how_str}",
        f"i wanna see {show_how_str} data from {n_results_show} verses"
    ]
    user_msg = random.choice(variations)
    return user_msg

def create_search_phrase_examples(n_examples, add_system_msg=True):
    print(f"... Generating {n_examples} search_phrase scenarios ...")

    # Grab a source for possible search phrases:
    book_verses = sef.sefaria_read_content(only_book=sef.BookCode.GENESIS, only_version=sef.VersionCode.HE_TEXT_ONLY)
    examples = []
    for i in range(n_examples):
        # Randomly pick a phrase to search:
        rand_verse = random.choice(book_verses)
        words = rand_verse.split(' ')
        phrase_len = int(np.random.choice([1,2,3], 1, p=[0.6, 0.3, 0.1])[0])
        n_gram = random.choice([words[i:(i+phrase_len)] for i in range(len(words)+1-phrase_len)])
        phrase = ' '.join(n_gram)
        #phrase = 'לשבר אכל' ## For debugging: this is a phrase that resulted in 0 finds
        MAX_RESULTS = 20
        try:
            search_results = bblt.search_phrase(phrase, n_max_results=MAX_RESULTS)
            time.sleep(0.2)
        except Exception as ex:
            print(f"!! Failed tool search for '{phrase}'. Error: {str(ex)}")
            continue
        n_results_found = len(search_results["results"])
        max_show = n_results_found if (n_results_found > 0) else 3
        n_results_show = random.choice(range(1, max_show+1))
        show_how = random.choice(["ref_and_text", "ref", "text"])
        instruct_before = (np.random.rand() > 0.5)

        user_msg1 = synth_user_request_search_phrase(phrase, n_results_show, show_how, instruct_before)
        
        llm_msg1 = json.dumps({
            dummy_agent.KEY_TOOL: dummy_agent.TOOL_SEARCH_PHRASE,
            dummy_agent.KEY_ARGS: {"phrase": phrase}}, ensure_ascii=False)
        tool_resp = json.dumps({
            dummy_agent.KEY_RESP_TOOL_NAME: dummy_agent.TOOL_SEARCH_PHRASE,
            dummy_agent.KEY_STATUS: dummy_agent.STATUS_OK,
            dummy_agent.KEY_RESULT: search_results
            } ,ensure_ascii=False)
        
        messages = [
            {"role": dummy_agent.ROLE_USER, "content": user_msg1},
            {"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg1},
            {"role": dummy_agent.ROLE_TOOL, "content": tool_resp}
        ]

        llm_show_results = synth_llm_search_phrase_response(phrase, search_results, n_results_show, show_how)
        llm_msg_show_results = format_llm_response_to_user(llm_show_results)

        if instruct_before or (n_results_found <= 0):
            messages.append({"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg_show_results})
        else:
            llm_ask_instruct = f"O.K. I found {n_results_found} verses with the phrase '{phrase}'. Now what?"
            llm_msg_ask_instruct = format_llm_response_to_user(llm_ask_instruct)
            user_give_instruct = synth_search_phrase_user_add_instructions(n_results_show, show_how)
            messages.append({"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg_ask_instruct})
            messages.append({"role": dummy_agent.ROLE_USER, "content": user_give_instruct})
            messages.append({"role": dummy_agent.ROLE_ASSISTANT, "content": llm_msg_show_results})

        metadata = {
            MetaField.SCENARIO: "search_phrase",
            "phrase_len": phrase_len,
            "phrase": phrase,
            "n_results_found": n_results_found,
            "n_results_show": n_results_show,
            "show_how": show_how,
            "instruct_when": instruct_before
        }
        example = {
            "metadata": metadata,
            "messages": messages
        }
        if add_system_msg:
            example = add_system_message(example, [dummy_agent.TOOL_SEARCH_PHRASE])
        examples.append(example)
    
    return examples

In [32]:
search_examples = create_search_phrase_examples(2)
bagent.AgentUI().display_convo(search_examples[0]["messages"])

... Generating 2 search_phrase scenarios ...
++ 1533 from genesis (he.text_only)


### Scenarios with sequences of user requests:

In [33]:
def create_examples_sequence_of_requests(n_examples, add_system_msg=True):
    print(f"... Generating {n_examples} sequence_of_requests scenarios ...")

    scenario2func = {
        "chitchat": synth_chitchat_examples,
        "lookup_verse": create_lookup_verse_examples,
        "search_phrase": create_search_phrase_examples
    }
    # Generate a pool of atomic scenario examples:
    n_per_scenario = int(n_examples * 0.75)
    scenario_examples = {}
    for (scenario_name, scenario_func) in scenario2func.items():
        print(f"++ Preparing {n_per_scenario} examples from scenarios of {scenario_name}...")
        scenario_i_examples = scenario_func(n_per_scenario, add_system_msg=False)
        print(f"Generated {len(scenario_i_examples)} examples of scenario {scenario_name}")
        scenario_examples[scenario_name] = scenario_i_examples

    # Now create combinations:
    examples = []
    print(f"Now combining scenarios to generate ~{n_examples} examples...")
    for _ in range(n_examples):
        seq_len = random.choice([2, 3]) # how many consecutive scenarios (each initiated by a seprate user message)
        scenario_seq = []
        metadata_seq = []
        messages = []
        for _ in range(seq_len):
            scenario_name = random.choice(list(scenario2func.keys()))
            ex_i = random.choice(scenario_examples[scenario_name])
            scenario_seq.append(scenario_name)
            metadata_seq.append(ex_i['metadata'])
            messages.extend(ex_i['messages'])

        example = {
            "metadata": {
                MetaField.SCENARIO: "sequence_of_user_requests",
                MetaField.SEQ_LEN: seq_len,
                MetaField.SEQ_OF_SCENARIOS: scenario_seq,
                "metadata_sequence": metadata_seq,
            },
            "messages": messages
        }
        if add_system_msg:
            example = add_system_message(example, None)
        examples.append(example)

    return examples

In [34]:
examples = create_examples_sequence_of_requests(10)
examples

... Generating 10 sequence_of_requests scenarios ...
++ Preparing 7 examples from scenarios of chitchat...
... Generating 7 chitchat scenarios ...
Generated 7 examples of scenario chitchat
++ Preparing 7 examples from scenarios of lookup_verse...
... Generating 7 lookup_verse scenarios ...
Generated 4 examples of scenario lookup_verse
++ Preparing 7 examples from scenarios of search_phrase...
... Generating 7 search_phrase scenarios ...
++ 1533 from genesis (he.text_only)
!! Failed tool search for 'לו'. Error: 'list' object has no attribute 'get'
Generated 6 examples of scenario search_phrase
Now combining scenarios to generate ~10 examples...


[{'metadata': {'scenario': 'sequence_of_user_requests',
   'sequence_length': 2,
   'sequence_of_scenarios': ['chitchat', 'chitchat'],
   'metadata_sequence': [{'scenario': 'chitchat',
     'tool_list_in_sys_prompt': ['lookup_verse', 'search_phrase']},
    {'scenario': 'chitchat',
     'tool_list_in_sys_prompt': ['lookup_verse', 'search_phrase']}],
   'tool_list_in_sys_prompt': ['lookup_verse', 'search_phrase']},
  'messages': [{'role': 'system',
    'content': 'You are a research assistant for biblical texts that always responds using a JSON object with fields "tool" and "arguments".\n\nTo respond normally to the user, use:\n{"tool": "respond_to_user", "arguments":{"text": "<text to show the user>"}}\n\nTo call a tool, you need to indicate which tool to use and what arguments to send to it - use the structure:\n{"tool": "<tool_name>", "arguments":{ ... }}\n\nAfter you call a tool, you will receive a tool-response message from role "user" containing a JSON object.\nThe tool-response ob

In [35]:
bagent.AgentUI().display_convo(examples[1]["messages"])

### Scenarios with sequences of tool calls for single user request (step by step instructions):

### Scenarios with sequences of tool calls for single user request (autonomous planning):

### Collect various scenarios:

In [36]:
def create_examples():
    examples = []
    examples.extend(synth_chitchat_examples(100))
    examples.extend(create_lookup_verse_examples(300))
    examples.extend(create_search_phrase_examples(300))
    examples.extend(create_examples_sequence_of_requests(300))
    return examples

In [37]:
examples = create_examples()
print(len(examples))

... Generating 100 chitchat scenarios ...
... Generating 300 lookup_verse scenarios ...
!!! Suspicious. We expected this trial to fail because of wrong args (nearby_char) {'version': 'he.text_only', 'book': 'deuteronomy', 'chapter_num': 33, 'verse_num': 24}
... Generating 300 search_phrase scenarios ...
++ 1533 from genesis (he.text_only)
!! Failed tool search for 'את'. Error: 'list' object has no attribute 'get'
!! Failed tool search for 'כי'. Error: 'list' object has no attribute 'get'
!! Failed tool search for 'שר'. Error: 'list' object has no attribute 'get'
!! Failed tool search for 'בן'. Error: 'list' object has no attribute 'get'
!! Failed tool search for 'חן'. Error: 'list' object has no attribute 'get'
!! Failed tool search for 'כי'. Error: 'list' object has no attribute 'get'
!! Failed tool search for 'שם'. Error: 'list' object has no attribute 'get'
!! Failed tool search for 'את'. Error: 'list' object has no attribute 'get'
!! Failed tool search for 'את'. Error: 'list' objec

In [38]:
len(examples)

966

In [39]:
bagent.AgentUI().display_convo(examples[100]["messages"])

In [41]:
set([ex['metadata'][MetaField.SCENARIO] for ex in examples])

{'chitchat',
 'lookup_verse_ok',
 'lookup_verse_ok_then_another_version',
 'lookup_verse_typo_book',
 'lookup_verse_typo_version',
 'search_phrase',
 'sequence_of_user_requests'}

In [42]:
set([str(ex['metadata'][MetaField.TOOL_LIST]) for ex in examples])

{"['lookup_verse', 'search_phrase']",
 "['lookup_verse']",
 "['search_phrase', 'lookup_verse']",
 "['search_phrase']"}

In [43]:
[(ex['metadata']['typo_class'],ex['metadata']['wrong_args']['book']) for ex in examples if ex['metadata']['scenario']=='lookup_verse_typo_book']

[('missing_char_allcaps', 'GENESI'),
 ('nearby_char_cap', 'Genwsis'),
 ('char_swap_allcaps', 'GENSEIS'),
 ('extra_char_cap', 'Genesids'),
 ('repeated_char_cap', 'Geneesis'),
 ('repeated_char', 'gennesis'),
 ('extra_char_cap', 'Grenesis'),
 ('char_swap', 'genessi'),
 ('char_swap_cap', 'Gneesis'),
 ('char_swap_cap', 'Genessi'),
 ('missing_char_allcaps', 'GENEIS'),
 ('extra_char_allcaps', 'GEHNESIS'),
 ('nearby_char_cap', 'Exoxus'),
 ('nearby_char_cap', 'Exocus'),
 ('repeated_char', 'exoduus'),
 ('char_swap', 'xeodus'),
 ('missing_char', 'exous'),
 ('extra_char_cap', 'Exodyus'),
 ('char_swap_allcaps', 'EXDOUS'),
 ('char_swap', 'eoxdus'),
 ('nearby_char_allcaps', 'EXODUD'),
 ('nearby_char', 'rxodus'),
 ('missing_char_cap', 'Eodus'),
 ('repeated_char', 'exoddus'),
 ('nearby_char_allcaps', 'EXLDUS'),
 ('char_swap', 'xeodus'),
 ('missing_char_allcaps', 'EODUS'),
 ('nearby_char_cap', 'Edodus'),
 ('missing_char', 'exous'),
 ('char_swap_allcaps', 'EXODSU'),
 ('extra_char_allcaps', 'EXODJUS'),
 (

In [44]:
[(ex['metadata']['typo_class'],ex['metadata']['wrong_args']['version']) for ex in examples if ex['metadata']['scenario']=='lookup_verse_typo_version']

[('char_swap', 'en.koern'),
 ('missing_char', 'he.maorah'),
 ('extra_char', 'en.new.jpxs1917'),
 ('char_swap', 'en.okren'),
 ('nearby_char', 'eh.koren'),
 ('repeated_char', 'he.teext_only'),
 ('char_swap', 'he.masorha'),
 ('extra_char', 'he.masotrah'),
 ('missing_char', 'en.ne.jps1917'),
 ('repeated_char', 'he.masoorah'),
 ('extra_char', 'he.masporah'),
 ('extra_char', 'en.korejn'),
 ('repeated_char', 'he.masorrah'),
 ('missing_char', 'en.oren'),
 ('extra_char', 'en.korejn'),
 ('missing_char', 'he.tex_only'),
 ('extra_char', 'en.lkoren'),
 ('nearby_char', 'he.masirah'),
 ('extra_char', 'en.koremn'),
 ('nearby_char', 'en.new.jos1917'),
 ('repeated_char', 'he.masorrah'),
 ('missing_char', 'he.masora'),
 ('nearby_char', 'he.maeorah'),
 ('missing_char', 'he.txt_only'),
 ('char_swap', 'en.korne'),
 ('char_swap', 'he.masoarh'),
 ('char_swap', 'he.text_noly'),
 ('missing_char', 'he.text_oly'),
 ('nearby_char', 'he.jasorah'),
 ('repeated_char', 'he.massorah'),
 ('missing_char', 'en.koen'),
 ('

In [46]:
# Do all examples start with a system message?
set([ex['messages'][0]['role'] for ex in examples])

{'system'}

In [50]:
# Do we have any leaked "system" role after the first message?
set([msg['role'] for ex in examples for msg in ex["messages"][1:]])

{'assistant', 'user'}

## Save data

In [51]:
dev_folder = os.path.abspath("../data/dev")
os.listdir(dev_folder)

['lookup_verse.3.test.jsonl', 'lookup_verse.3.train.jsonl']

In [52]:
trainset_file = os.path.join(dev_folder, "two_tools.1.train.jsonl")
testset_file = os.path.join(dev_folder, "two_tools.1.test.jsonl")
cutoff = int(len(examples)*0.75)
print(f"Cutoff: {cutoff}")

with open(trainset_file, "w", encoding="utf-8") as f:
    for ex in examples[:cutoff]:
        f.write(json.dumps(ex, ensure_ascii=False) + "\n")

with open(testset_file, "w", encoding="utf-8") as f:
    for ex in examples[cutoff:]:
        f.write(json.dumps(ex, ensure_ascii=False) + "\n")

Cutoff: 724


In [53]:
cutoff

724