#### Notebook for running Chain-of-Thought with no supporting context experiments

In [13]:
import sys, os
sys.path.append('..')
root = '../root/'

from llm import AnyOpenAILLM

self_reflect_llm = AnyOpenAILLM(
                        temperature=0,
                        max_tokens=250,
                        model_name="chatgpt-16k",
                        deployment_id="chatgpt-16k",
                        model_kwargs={"stop": "\n"},
                        openai_api_key=os.environ['OPENAI_API_KEY'])

In [3]:
from util import summarize_trial, log_trial, save_agents
import joblib
from agents import CoTAgent, ReflexionStrategy

#### Load the HotPotQA Sample

In [4]:
hotpot = joblib.load('../data/hotpot-qa-distractor-sample.joblib').reset_index(drop = True)

#### Define the Reflexion Strategy

In [5]:
print(ReflexionStrategy.__doc__)


    NONE: No reflection
    LAST_ATTEMPT: Use last reasoning trace in context 
    REFLEXION: Apply reflexion to the next reasoning trace 
    LAST_ATTEMPT_AND_REFLEXION: Use last reasoning trace in context and apply reflexion to the next reasoning trace 
    


In [6]:
strategy: ReflexionStrategy = ReflexionStrategy.REFLEXION

#### Initialize a CoTAgent for each question

In [7]:
from prompts import cot_simple_reflect_agent_prompt, cot_simple_reflect_prompt, cot_simple_agent_prompt
from fewshots import COTQA_SIMPLE6, COT_SIMPLE_REFLECTION


In [8]:
row = next(hotpot.iterrows())[1]
COTQA_SIMPLE6.split('\n')

['',
 'Question: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?',
 "Thought: Let's think step by step. The eastern sector of Colorado orogeny extends into the High Plains. High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.",
 'Action: Finish[1,800 to 7,000 ft]',
 '',
 'Question: Musician and satirist Allie Goertz wrote a song about the "The Simpsons" character Milhouse, who Matt Groening named after who?',
 "Thought: Let's think step by step. Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.",
 'Action: Finish[Richard Nixon]',
 '',
 'Question: Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?',
 "Thought: Let's think step by step. Adam Clayton Powell (film) is a documentary about an African-American politician, not Finnish rock groups. So the documentary about Finnish rock groups must instead be The Saimaa Ge

In [9]:
COT_SIMPLE_REFLECTION.split('\n')

['',
 'Question: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?',
 "Thought: Let's think step by step. The eastern sector of Colorado orogeny extends into the Rocky Mountains. The Rocky Mountains rise in elevation from around 1,800 to 14,000 ft, so the answer is 1,800 to 14,000 ft.",
 'Action: Finish[1,800 to 14,000 ft]',
 '',
 'Reflection: My reasoning for the elevation range of the eastern sector of the Colorado orogeny failed because I incorrectly associated it with the Rocky Mountains instead of the High Plains. In the future, when attempting this question, I should focus on better understanding the geography and geological formations involved in the Colorado orogeny to avoid confusion.',
 '',
 'Question: Musician and satirist Allie Goertz wrote a song about the "The Simpsons" character Milhouse, who Matt Groening named after who?',
 "Thought: Let's think step by step. Milhouse was named after a prominent cartoon character, Mi

In [10]:

agents = [CoTAgent(question = row['question'],
                   context = '',
                   key = row['answer'],
                   agent_prompt=cot_simple_agent_prompt if strategy == ReflexionStrategy.NONE else cot_simple_reflect_agent_prompt,
                   cot_examples = COTQA_SIMPLE6,
                   reflect_prompt = cot_simple_reflect_prompt,
                   reflect_examples = COT_SIMPLE_REFLECTION,
                   self_reflect_llm=self_reflect_llm
                      ) for _, row in hotpot.iterrows()]

In [11]:
strategy

<ReflexionStrategy.REFLEXION: 'reflexion'>

#### Run `n` trials

In [14]:
n = 5
trial = 0
log = ''
for i in range(n):
    for agent in [a for a in agents if not a.is_correct()]:
        agent.run(reflexion_strategy = strategy)
        print(f'Answer: {agent.key}')
        
    trial += 1
    log += log_trial(agents, trial)
    correct, incorrect = summarize_trial(agents)
    print(f'Finished Trial {trial}, Correct: {len(correct)}, Incorrect: {len(incorrect)}')

InvalidRequestError: The API deployment for this resource does not exist. If you created the deployment within the last 5 minutes, please wait a moment and try again.

#### Save the result log

In [27]:
with open(os.path.join(root, 'CoT', 'no_context', strategy.value, f'{len(agents)}_questions_{trial}_trials.txt'), 'w') as f:
    f.write(log)
save_agents(agents, os.path.join(root, 'CoT', 'no_context', strategy.value, 'agents'))