# setup data

In [13]:
from glob import glob
import pandas as pd
import random

# https://huggingface.co/datasets/cais/mmlu
files_names = glob("../data/*.csv")
print('file_names[0]: ', files_names[0])

db = pd.read_csv(files_names[0])
# sampled_indexes = random.sample(list(range(len(db))), 10)
# print(sampled_indexes)
sampled_indexes = [135, 277, 144, 292, 186, 22, 118, 165, 172, 300]

def parse_question_answer(df, ix):
    question = df.iloc[ix, 0]
    a = df.iloc[ix, 1]
    b = df.iloc[ix, 2]
    c = df.iloc[ix, 3]
    d = df.iloc[ix, 4]
    answer = df.iloc[ix, 5]
    return (question, a, b, c, d, answer)

prompts = []
answers = []
for ix in sampled_indexes:
    question, a, b, c, d, answer  = parse_question_answer(db, ix)
    prompt = "Can you answer the following question as accurately as possible? {}: A) {}, B) {}, C) {}, D) {} Explain your answer, putting the answer in the form (X) at the end of your response."
    prompts.append(prompt.format(question, a, b, c, d))
    answers.append(answer)
print("prompt length: ", len(prompts))

file_names[0]:  ../data/high_school_biology_test.csv
prompt length:  10


# mmlu with GPT3.5 

In [15]:
import openai
from dotenv import load_dotenv
import os

load_dotenv(verbose=True)

api_key = os.environ.get("OPENAI_API_KEY")
openai.api_key = api_key

class Agent:
    def __init__(self, question):
      self.memories = [
        {
          'role': 'user', 
          'content': 'Imagine you are an expert in biology, chemistry, computer science, mathematics, physics and are confident in your answer and often persuades other agents to believe in you. Please keep this in mind. If you understand please say ok only.'
        },
        {'role': 'assistant', 'content': ' Ok'},
        {
          'role': 'user', 
          'content': question
        }
      ]
    
    def think_and_update_memory(self):
      result = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=self.memories,
        n=1
      )
      self.memories.append(result['choices'][0]['message'])
      return result['choices'][0]['message']['content']


In [17]:
import csv

responses = []

for i, question in enumerate(prompts):
  print(f'question{i}: ', question)
  agent1 = Agent(question)
  agent2 = Agent(question)
  agent3 = Agent(question)

  print('think_and_update_memory...')
  agent1.think_and_update_memory()
  agent2.think_and_update_memory()
  agent3.think_and_update_memory()

  response = {'answer': answers[i], 'agent1': agent1.memories[-1]['content'], 'agent2': agent2.memories[-1]['content'], 'agent3': agent3.memories[-1]['content']}
  responses.append(response)
  print(response)
  print('========================================================================================================================================================')

with open("../results/gpt3.5_oneshot.csv", "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=["answer", "agent1", "agent2", "agent3"])
    writer.writeheader()
    writer.writerows(responses)

question0:  Can you answer the following question as accurately as possible? Which of the following characteristics would allow you to distinguish a prokaryotic cell from an animal cell?: A) Ribosomes, B) Cell membrane, C) Chloroplasts, D) Cell wall Explain your answer, putting the answer in the form (X) at the end of your response.
think_and_update_memory...
{'answer': 'D', 'agent1': "To distinguish a prokaryotic cell from an animal cell, we need to look for characteristics that are specific to one type of cell and absent in the other. \n\n- Ribosomes are present in both prokaryotic and animal cells. They are responsible for protein synthesis, and therefore, the presence of ribosomes does not allow us to distinguish between the two types of cells.\n\n- The cell membrane is also present in both prokaryotic and animal cells. It serves as a boundary between the cell and its external environment, regulating the entry and exit of substances. Hence, the presence of a cell membrane doesn't h

# result
score by GPT-4

|      |  result  |
| ---- | ---- |
|  agent1  | 100% (10/10)    |
|  agent2  | 85% (8.5/10)    |
|  agent3  | 100% (10/10)    |