### Step 1: Ensure your virtual environment

Run `python script/install_venv_packages.py --require_jupyter`

### Step 2: Ensure your virtual environment

Run `python script/preprocess_work_dir.py --dataset daco`

### Step 3: Setup your default model config

In [None]:
import json
import os
import sys

from tqdm import tqdm

In [None]:
sys.path.append("..")

from argos import ArgosAgent, ArgosConfig

# ==== You can uncomment the following lines to inspect the client calls (using agent._client_calls)
# import argos.agent
# argos.agent.MONKEY_PATCH_TO_SAVE_CALLS = True 

In [None]:
def get_model_config(entry_id):
    work_dir = f"../work/daco/{entry_id}"
    
    with open(f"{work_dir}/meta.json", "r") as f:
        meta = json.load(f)
    
    dataset_names = meta['dataset_names']     
    question = meta['question']  
    
    config = ArgosConfig(
        work_dir=work_dir,
        dataset_names=dataset_names,
        question=question,
        venv_dir="../work/venv",
        default_model_name="gpt-4o-mini",
        default_api_key="<your-openai-api-key>", # replace this line with your OpenAI API key
        default_base_url="https://<openai-endpoint>/v1" # replace this line with your OpenAI endpoint
    )
    
    return config

### Step 4: Run Argos on the dataset

In [None]:
async def process_one_entry(entry_id):
    config = get_model_config(entry_id)
    agent = ArgosAgent(config)
    await agent.run(print_to_console=False)  # If you want to inspect the intermediate results, set print_to_console=True
    await agent.save_task_result()  # This will save `task_result.json` to the work directory
    await agent.save_data_report()  # This will save `report.json` and `report.md` to the work directory

In [None]:
for entry_id in tqdm(os.listdir("../work/daco")):
    try:
        await process_one_entry(entry_id)
    except Exception as e:
        print(f"Error processing {entry_id}: {e}, please try again. ")
        continue
    
# ==== You can also:
# 1. use asyncio.gather() to run multiple entries concurrently, which can speed up the process
# 2. modify the `process_one_entry` function to skip entries that have already been processed

### Step 5: Collect the reports, then evaluate them

Run `python script/collect_reports.py`

Run `python script/eval/daco/eval_helpfulness.py --model_type vllm --vllm_base_url <your-vllm-endpoint> --model "Llama-3-8B-Instruct" --api_key "EMPTY" --pred "work/output/daco_collected_reports.json"`