In [None]:
!pip install -r requirements.txt
!pip install pydantic>=2.7.0


In [None]:
from dotenv import load_dotenv

load_dotenv()
import aixplain.utils.config as aixplain_config

from agentification.utilities.models import TeamAgentExecuteInput, AgentResponse
from agentification.team_agent import TeamAgentService
from datasets import load_dataset

In [3]:
dataset = load_dataset("openai/gsm8k", "main", split="test")
import re
modified_samples = []

for i, sample in enumerate(dataset):
    last_number = re.findall(r'\d+', sample['answer'])[-1]

    new_sample = {
        'question': sample['question'],
        'answer': last_number
    }

    modified_samples.append(new_sample)



In [4]:
modified_samples[0]

{'question': "Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?",
 'answer': '18'}

## Run team agent without mentalist

In [5]:
input_data = {
    "agent": {
        "id": "66758985e39f1c11f9102700",
        "name": "Test Community",
        "status": "onboarded",
        "teamId": 646,
        "llmId": "66b2708c6eb5635d1c71f611",
        "supervisorId": "66b2708c6eb5635d1c71f611",
        "plannerId": "66b2708c6eb5635d1c71f611",
        "links": [],
        "assets": [],
        "agents": [
            {
                "id": "66758985e39f1c11f91027cf",
                "name": "python coder",
                "description": "You are an AI with advanced code understanding and planning capabilities. When you encounter a specific problem, your goal is to devise a Python code to solve it.",
                "status": "onboarded",
                "teamId": 646,
                "llmId": "66b2708c6eb5635d1c71f611",
                "assets": [

                ],
                "createdAt": "2024-06-21T14:09:09.903Z",
                "updatedAt": "2024-06-21T14:09:09.903Z",
                "number": 1,
                "type": "AGENT",
                "label": "OUTPUT"
            }
        ],
        "createdAt": "2024-06-21T14:09:09.903Z",
        "updatedAt": "2024-06-21T14:09:09.903Z",
    },
    "query": "",
    "chat_history": [],
    "api_key": aixplain_config.TEAM_API_KEY,
}
community_execute_input = TeamAgentExecuteInput(**input_data)
community_execute_input

TeamAgentExecuteInput(agent=TeamAgent(id='66758985e39f1c11f9102700', name='Test Community', status=<AssetStatus.ONBOARDED: 'onboarded'>, teamId=646, agents=[Agent(id='66758985e39f1c11f91027cf', name='python coder', status=<AssetStatus.ONBOARDED: 'onboarded'>, teamId=646, assets=[], llmId='66b2708c6eb5635d1c71f611', supervisorId=None, plannerId=None, number=1, type='AGENT', label='OUTPUT', description='You are an AI with advanced code understanding and planning capabilities. When you encounter a specific problem, your goal is to devise a Python code to solve it.')], links=[], assets=[], llmId='66b2708c6eb5635d1c71f611', supervisorId='66b2708c6eb5635d1c71f611', plannerId='66b2708c6eb5635d1c71f611'), api_key='9136c08bf02b5552885b9f2a5e0fae517d81ff2fa6fe7084a3adb655c4aa7215', query='', session_id=None, chat_history=[])

In [None]:
community_execute_input.query = "Answer with just the number: Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?"
chat_history = []
community_execute_input.session_id = "123"
response = TeamAgentService.run(community_execute_input, chat_history)
response

In [7]:
print(response.output)

The final answer is $18.


In [None]:
import time
import re
import json

def extract_number(text):
    match = re.search(r'\d+\.?\d*', text)
    number_str = match.group(0) if match else ""
    return number_str.strip()

correct_count = 0
results = []  
start = time.time()

output_file = 'results4.json'

for i, sample in enumerate(modified_samples[711:]):
    community_execute_input.query = "Answer with just the number as an integer: " + sample['question']
    chat_history = []
    community_execute_input.session_id = "123"
    response = TeamAgentService.run(community_execute_input, chat_history)
    
    expected_answer = sample['answer']
    agent_response_number = extract_number(response.output)
    
    print(f"Sample {i+1}:")
    print(f"  Question: {sample['question']}")
    print(f"  Expected Answer: {expected_answer}")
    print(f"  Agent's Response: {agent_response_number}")
    
    is_correct = expected_answer == agent_response_number
    if is_correct:
        correct_count += 1
        print("CORRECT")
    
    accuracy = correct_count / (i + 1) * 100
    end = time.time()
    length = end - start
    result_data = {
        "sample": i+1,
        "question": sample['question'],
        "expected_answer": expected_answer,
        "agent_response": agent_response_number,
        "correct": is_correct,
        "accuracy_so_far": accuracy,
        "length_so_far": length
    }
    
    results.append(result_data)
    
    with open(output_file, 'w') as file:
        json.dump(results, file, indent=4)

end = time.time()
length = end - start
final_accuracy = correct_count / len(modified_samples[578:]) * 100

print(f"\nFinal Accuracy: {final_accuracy}%")
print(f"Time taken: {length} seconds")

final_data = {
    "final_accuracy": final_accuracy,
    "time_taken": length
}

with open(output_file, 'w') as file:
    json.dump(results + [final_data], file, indent=4)
