# setup data

In [1]:
from glob import glob
import pandas as pd
import random

# https://huggingface.co/datasets/cais/mmlu
files_names = glob("../data/*.csv")
print('file_names[0]: ', files_names[0])

db = pd.read_csv(files_names[0])
# sampled_indexes = random.sample(list(range(len(db))), 10)
# print(sampled_indexes)
sampled_indexes = [135, 277, 144, 292, 186, 22, 118, 165, 172, 300]

def parse_question_answer(df, ix):
    question = df.iloc[ix, 0]
    a = df.iloc[ix, 1]
    b = df.iloc[ix, 2]
    c = df.iloc[ix, 3]
    d = df.iloc[ix, 4]
    answer = df.iloc[ix, 5]
    return (question, a, b, c, d, answer)

prompts = []
answers = []
for ix in sampled_indexes:
    question, a, b, c, d, answer  = parse_question_answer(db, ix)
    prompt = "Can you answer the following question as accurately as possible? {}: A) {}, B) {}, C) {}, D) {} Explain your answer, putting the answer in the form (X) at the end of your response."
    prompts.append(prompt.format(question, a, b, c, d))
    answers.append(answer)
print("prompt length: ", len(prompts))

file_names[0]:  ../data/high_school_biology_test.csv
prompt length:  10


# mmlu with llama2 collaboration(debate -> debate -> reflection)

In [3]:
from llama_cpp import Llama

llm = Llama(
    model_path="../models/ELYZA-japanese-Llama-2-7b-instruct-q5_K_M.gguf", 
    device="mps", 
    n_ctx=3000,
    verbose=False
)

llama_model_loader: loaded meta data with 21 key-value pairs and 291 tensors from ../models/ELYZA-japanese-Llama-2-7b-instruct-q5_K_M.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q5_K     [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    1:              blk.0.attn_q.weight q5_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    2:              blk.0.attn_k.weight q5_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    3:              blk.0.attn_v.weight q6_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    4:         blk.0.attn_output.weight q5_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_gate.weight q5_K     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    6:            blk.0.ffn_down.weight q6_K     [ 11008,  4096,     1,     1 ]
llama_model_loader: - tensor    7:              blk.0.ffn_up.weight q5_K     [

In [2]:
class Agent:
    def __init__(self, question):
      self.memories = [
        {
          'role': 'user', 
          'content': 'Imagine you are an expert in biology, chemistry, computer science, mathematics, physics and are confident in your answer and often persuades other agents to believe in you. Please keep this in mind. If you understand please say ok only.'
        },
        {'role': 'assistant', 'content': ' Ok'},
        {
          'role': 'user', 
          'content': question
        }
      ]
    
    def think_and_update_memory(self):
      result = llm.create_chat_completion(self.memories)
      self.memories.append(result['choices'][0]['message'])
      return result['choices'][0]['message']['content']

    def debate_and_update_memory(self, idea1, idea2):
      debate_prompt = "These are the solutions to the problem from other agents: \n\n One agent response: ```{}```\n\n One agent response: ```{}```\n\n Using the reasoning from other agents as additional advice, can you give an updated answer? Examine your solution and that other agents step by step. Put your answer in the form (X) at the end of your response."
      content = debate_prompt.format(idea1, idea2)
      self.memories.append({'role': 'user', 'content': content})
      result = llm.create_chat_completion(self.memories)
      self.memories.append(result['choices'][0]['message'])
      return result['choices'][0]['message']['content']

    def reflect_and_update_memory(self):
      reflection_prompt = "Can you double check that your answer is correct. Put your final answer in the form (X) at the end of your response."
      self.memories.append({'role': 'user', 'content': reflection_prompt})
      result = llm.create_chat_completion(self.memories)
      self.memories.append(result['choices'][0]['message'])
      return result['choices'][0]['message']['content']

In [4]:
import csv

responses = []

for i, question in enumerate(prompts):
  print(f'question{i}: ', question)
  agent1 = Agent(question)
  agent2 = Agent(question)
  agent3 = Agent(question)

  print('think_and_update_memory...')
  agent1.think_and_update_memory()
  agent2.think_and_update_memory()
  agent3.think_and_update_memory()

  print('debate_and_update_memory...')
  agent1.debate_and_update_memory(agent2.memories[-1]['content'], agent3.memories[-1]['content'])
  agent2.debate_and_update_memory(agent1.memories[-1]['content'], agent3.memories[-1]['content'])
  agent3.debate_and_update_memory(agent1.memories[-1]['content'], agent2.memories[-1]['content'])

  print('debate_and_update_memory...')
  agent1.debate_and_update_memory(agent2.memories[-1]['content'], agent3.memories[-1]['content'])
  agent2.debate_and_update_memory(agent1.memories[-1]['content'], agent3.memories[-1]['content'])
  agent3.debate_and_update_memory(agent1.memories[-1]['content'], agent2.memories[-1]['content'])

  print('reflect_and_update_memory...')
  agent1.reflect_and_update_memory()
  agent2.reflect_and_update_memory()
  agent3.reflect_and_update_memory()

  response = {'answer': answers[i], 'agent1': agent1.memories[-1]['content'], 'agent2': agent2.memories[-1]['content'], 'agent3': agent3.memories[-1]['content']}
  responses.append(response)
  print(response)
  print('========================================================================================================================================================')

with open("../results/llama2_collaboration.csv", "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=["answer", "agent1", "agent2", "agent3"])
    writer.writeheader()  # ヘッダーを書き込む
    writer.writerows(responses)

question0:  Can you answer the following question as accurately as possible? Which of the following characteristics would allow you to distinguish a prokaryotic cell from an animal cell?: A) Ribosomes, B) Cell membrane, C) Chloroplasts, D) Cell wall Explain your answer, putting the answer in the form (X) at the end of your response.
think_and_update_memory...
debate_and_update_memory...
debate_and_update_memory...
reflect_and_update_memory...
{'answer': 'D', 'agent1': ' Ok, I will double check my answer.\n\nThe correct answer is: (X) Cell wall.', 'agent2': ' Ok, I will double check my answer.\n\nThe correct answer is: (X) Cell wall', 'agent3': ' Ok, I can double check my answer. My final answer is:\n\n(X) Cell wall'}
question1:  Can you answer the following question as accurately as possible? Crossing-over occurs during which of the following phases in meiosis?: A) Prophase I, B) Metaphase I, C) Anaphase I, D) Prophase II Explain your answer, putting the answer in the form (X) at the e

# result
score by GPT-4

|      |  result  |
| ---- | ---- |
|  agent1  | 80% (8/10)    |
|  agent2  | 80% (8/10)    |
|  agent3  | 90% (9/10)    |