# Setup

In [6]:
from google import genai
from google.genai import types

# The client gets the API key from the environment variable `GEMINI_API_KEY`.
client = genai.Client()

response = client.models.generate_content(
    model="gemini-2.5-flash", 
    contents="Explain how AI works in a few words", 
    config=types.GenerateContentConfig(
        top_p=1.0,
    ),
)

In [7]:
print(response.text)  # Print the first candidate response

AI learns patterns from data to perform intelligent tasks.


In [9]:
import json
import sys

folder = './prompts/'
prompt_file = 'prompts_naive.json'
with open(folder + prompt_file, 'r') as f:
    prompt_dict = json.load(f)

webthink_examples = prompt_dict['webthink_simple6']
instruction = """Solve a question answering task with interleaving Thought, Action, Observation steps. Thought can reason about the current situation, and Action can be three types: 
(1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
(2) Lookup[keyword], which returns the next sentence containing keyword in the current passage.
(3) Finish[answer], which returns the answer and finishes the task.

IMPORTANT: (1) always return you thought if there is any; (2) returns only one step at a time.

Here are some examples.
"""
webthink_prompt = instruction + webthink_examples

In [30]:
def llm(prompt, stop=["\n"]):
    response = client.models.generate_content(
        model="gemini-2.5-flash",
        contents=prompt,
        config=types.GenerateContentConfig(
            system_instruction=webthink_prompt,
            top_p=1.0,
            temperature=0.0,  # Lower temperature for more deterministic output
            max_output_tokens=100,  # Limit the output length
            stop_sequences=stop,  # Stop sequences to end generation
            
        ),
    )
    return response.text

In [27]:
import re

def webthink2(idx=None, prompt='', to_print=True):
    question = env.reset(idx=idx)
    if to_print:
        print(idx, question)
    prompt += question + "\n"
    n_calls, n_badcalls = 0, 0
    done = False
    for i in range(1, 8):
        n_calls +=1
        llm_result = llm(prompt)
        print(f'prompt:\n{prompt}\nresult:\n{llm_result}\n\n')
        prompt += llm_result + '\n'
        if (llm_result.startswith('Thought')):
            continue
        elif (llm_result.startswith('Action')):
            delimiter_pattern = r"Action \d+: "
            action = re.split(delimiter_pattern, llm_result)[1]
            step_input = action[0].lower() + action[1:]
            obs, r, done, info = step(env, step_input)

            if (done):
                break

            obs = obs.replace('\\n', '')
            prompt += f'Observation {i}: {obs}\n'
        else:
            print(f'I don\'t understand this:\n {llm_result}')
    if not done:
        obs, r, done, info = step(env, "finish[]")
    info.update({'n_calls': n_calls, 'n_badcalls': n_badcalls, 'traj': prompt})
    return r, info

In [31]:
result, info = webthink2(34)

34 Question: Which band, Letters to Cleo or Screaming Trees, had more members?
prompt:
Question: Which band, Letters to Cleo or Screaming Trees, had more members?

result:
Action 1: Search[Letters to Cleo]


prompt:
Question: Which band, Letters to Cleo or Screaming Trees, had more members?
Action 1: Search[Letters to Cleo]
Observation 1: Letters to Cleo is an American alternative rock band originating from Boston, Massachusetts, best known for the 1994 single, "Here & Now", from their full-length debut album, Aurora Gory Alice which topped the Billboard Modern Rock Tracks chart and peaked at 56 on the Billboard Hot 100. The band's members are Kay Hanley, Greg McKenna, Michael Eisenstein, Stacy Jones, Scott Riebling, and later, Tom Polce and Joe Klompus.[1]. The group disbanded in 2000 but reunited for a small tour in 2008.[2] In 2016, the band reunited, and on October 14, 2016, released an EP, titled Back to Nebraska.[3]. Guitarist Greg McKenna and singer Kay Hanley formed the band Le

In [22]:
print(result)
print(info)

1
{'steps': 3, 'answer': 'Letters to Cleo', 'gt_answer': 'Letters to Cleo', 'question_idx': 34, 'reward': True, 'em': True, 'f1': 1.0, 'n_calls': 6, 'n_badcalls': 0, 'traj': 'Question: Which band, Letters to Cleo or Screaming Trees, had more members?\nAction 1: Search[Letters to Cleo]\nObservation 1: Letters to Cleo is an American alternative rock band originating from Boston, Massachusetts, best known for the 1994 single, "Here & Now", from their full-length debut album, Aurora Gory Alice which topped the Billboard Modern Rock Tracks chart and peaked at 56 on the Billboard Hot 100. The band\'s members are Kay Hanley, Greg McKenna, Michael Eisenstein, Stacy Jones, Scott Riebling, and later, Tom Polce and Joe Klompus.[1]. The group disbanded in 2000 but reunited for a small tour in 2008.[2] In 2016, the band reunited, and on October 14, 2016, released an EP, titled Back to Nebraska.[3]. Guitarist Greg McKenna and singer Kay Hanley formed the band Letters to Cleo in 1990. The band was in

In [64]:
import re

# Example strings with different numbers
text1 = 'Action 1: Search[Freakonomics]'
text2 = 'Action 123: Lookup[keyword]'
text3 = 'Action 5: Finish[answer]'

# The regular expression pattern for the delimiter:
# r"Action \d+: "
#   - r""       : Denotes a raw string, good for regex to avoid backslash issues.
#   - "Action " : Matches the literal string "Action "
#   - \d+       : Matches one or more digits (this handles any number 'n')
#   - ": "      : Matches the literal string ": "
delimiter_pattern = r"Action \d+: "

# Split text1
result1 = re.split(delimiter_pattern, text1)
print(f"'{text1}' split: {result1}")

# Split text2
result2 = re.split(delimiter_pattern, text2)
print(f"'{text2}' split: {result2}")

# Split text3
result3 = re.split(delimiter_pattern, text3)
print(f"'{text3}' split: {result3}")

# Example where the pattern might not be at the very beginning
text4 = 'Some preamble. Action 7: Another action.'
result4 = re.split(delimiter_pattern, text4)
print(f"'{text4}' split: {result4}")

# Example where the pattern is not found
text5 = 'Just a regular sentence.'
result5 = re.split(delimiter_pattern, text5)
print(f"'{text5}' split: {result5}")

'Action 1: Search[Freakonomics]' split: ['', 'Search[Freakonomics]']
'Action 123: Lookup[keyword]' split: ['', 'Lookup[keyword]']
'Action 5: Finish[answer]' split: ['', 'Finish[answer]']
'Some preamble. Action 7: Another action.' split: ['Some preamble. ', 'Another action.']
'Just a regular sentence.' split: ['Just a regular sentence.']


# ReAct

In [14]:
import wikienv, wrappers
env = wikienv.WikiEnv()
env = wrappers.HotPotQAWrapper(env, split="dev")
env = wrappers.LoggingWrapper(env)

def step(env, action):
    attempts = 0
    while attempts < 10:
        try:
            return env.step(action)
        except requests.exceptions.Timeout:
            attempts += 1

In [8]:
import json
import sys

folder = './prompts/'
prompt_file = 'prompts_naive.json'
with open(folder + prompt_file, 'r') as f:
    prompt_dict = json.load(f)

webthink_examples = prompt_dict['webthink_simple6']
instruction = """Solve a question answering task with interleaving Thought, Action, Observation steps. Thought can reason about the current situation, and Action can be three types: 
(1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
(2) Lookup[keyword], which returns the next sentence containing keyword in the current passage.
(3) Finish[answer], which returns the answer and finishes the task.

IMPORTANT: returns only one step at a time.

Here are some examples.
"""
webthink_prompt = instruction + webthink_examples

def webthink(idx=None, prompt=webthink_prompt, to_print=True):
    question = env.reset(idx=idx)
    if to_print:
        print(idx, question)
    prompt += question + "\n"
    n_calls, n_badcalls = 0, 0
    for i in range(1, 8):
        n_calls += 1
        # thought_action = llm(prompt + f"Thought {i}:", stop=[f"\nObservation {i}:"])
        thought_action = llm(prompt + f"Thought {i}:", stop=[f"\nObservation {i}:"])
        print(f'llm returned: {thought_action}')
        try:
            thought, action = thought_action.strip().split(f"\nAction {i}: ")
        except:
            print('ohh...', thought_action)
            n_badcalls += 1
            n_calls += 1
            thought = thought_action.strip().split('\n')[0]
            action = llm(prompt + f"Thought {i}: {thought}\nAction {i}:", stop=[f"\n"]).strip()
        obs, r, done, info = step(env, action[0].lower() + action[1:])
        obs = obs.replace('\\n', '')
        step_str = f"Thought {i}: {thought}\nAction {i}: {action}\nObservation {i}: {obs}\n"
        prompt += step_str
        if to_print:
            print(step_str)
        if done:
            break
    if not done:
        obs, r, done, info = step(env, "finish[]")
    if to_print:
        print(info, '\n')
    info.update({'n_calls': n_calls, 'n_badcalls': n_badcalls, 'traj': prompt})
    return r, info

In [32]:
import random
import time
idxs = list(range(7405))
random.Random(233).shuffle(idxs)

# k = 500  # Number of examples to run
k = 5

rs = []
infos = []
old_time = time.time()
for i in idxs[:k]:
    r, info = webthink2(i, to_print=True)
    rs.append(info['em'])
    infos.append(info)
    print(sum(rs), len(rs), sum(rs) / len(rs), (time.time() - old_time) / len(rs))
    print('-----------')
    print()

3687 Question: What movie did actress Irene Jacob complete before the American action crime thriller film directed by Stuart Bird?
prompt:
Question: What movie did actress Irene Jacob complete before the American action crime thriller film directed by Stuart Bird?

result:
Action 1: Search[Stuart Bird]


prompt:
Question: What movie did actress Irene Jacob complete before the American action crime thriller film directed by Stuart Bird?
Action 1: Search[Stuart Bird]
Observation 1: Could not find Stuart Bird. Similar: ['Stuart McQuarrie', 'State Bird Provisions', 'Stuart Little', 'Stuart Keith', 'Birdwatching (redirect from Bird watching)'].

result:
Action 2: Search[Irene Jacob]


prompt:
Question: What movie did actress Irene Jacob complete before the American action crime thriller film directed by Stuart Bird?
Action 1: Search[Stuart Bird]
Observation 1: Could not find Stuart Bird. Similar: ['Stuart McQuarrie', 'State Bird Provisions', 'Stuart Little', 'Stuart Keith', 'Birdwatching (r

TypeError: unsupported operand type(s) for +: 'NoneType' and 'str'

In [11]:
'Action 1: Search[Stuart Bird]'.strip().split(f"\nAction {i}: ")

['Action 1: Search[Stuart Bird]']

In [24]:
p = """
Question: What movie did actress Irene Jacob complete before the American action crime thriller film directed by Stuart Bird?
Action 1: Search[Stuart Bird]
Observation 1: Could not find Stuart Bird. Similar: ['Stuart McQuarrie', 'State Bird Provisions', 'Stuart Little', 'Stuart Keith', 'Birdwatching (redirect from Bird watching)'].
Action 2: Search[Irene Jacob]
Observation 2: Irène Marie Jacob (born 15 July 1966) is a French-Swiss actress known for her work with Polish film director Krzysztof Kieślowski. She won the 1991 Cannes Film Festival Award for Best Actress for the Kieślowski film The Double Life of Veronique, and was nominated for the BAFTA Award for Best Actress in a Leading Role for her 1994 film Three Colours: Red. Her other film appearances include The Secret Garden (1993), Beyond the Clouds (1995), U.S. Marshals (1998), and Eternity (2016).. Irène Jacob was born in Suresnes, Hauts-de-Seine, a western suburb of Paris.
"""

llm(p)