# 02c - 生成测试数据集

In [3]:
from pprint import pprint
import json
from dotenv import load_dotenv
load_dotenv()
import os
# 检查 API Key 和 Base URL 是否已配置
bool(os.environ["ANTHROPIC_API_KEY"] and os.environ["BASE_URL"])

True

In [4]:
from anthropic import Anthropic

# 初始化 Anthropic 客户端
client = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"], base_url=os.environ["BASE_URL"])
# 注意，我们在这里换用了相对廉价且快速的 haiku 4.5 模型
model = "claude-haiku-4-5-20251001"

In [5]:
def add_user_message(messages, text):
    """向消息列表中添加用户消息"""
    user_message = {"role": "user", "content": text}
    messages.append(user_message)

def add_assistant_message(messages, text):
    """向消息列表中添加助手消息"""
    assistant_message = {"role": "assistant", "content": text}
    messages.append(assistant_message)

def chat(messages, **kwargs):
    """带着完整的消息列表发起对话请求并返回 Claude 的回复文本"""
    message = client.messages.create(
        model=model,
        max_tokens=1000,
        messages=messages,
        **kwargs
    )
    return message.content[0].text

In [None]:
# 这个示例对于对 AWS 不够熟悉的用户可能有些陌生，不需要太过纠结，掌握整体思路和流程即可
def generate_dataset():
    prompt = """
Generate an evaluation dataset for a prompt evaluation. The dataset will be used to evaluate prompts that generate Python, JSON, or Regex specifically for AWS-related tasks. Generate an array of JSON objects, each representing task that requires Python, JSON, or a Regex to complete.

Example output:
```json
[
  {
    "task": "Description of task",
  },
  ...additional
]
```

* Focus on tasks that can be solved by writing a single Python function, a single JSON object, or a single regex
* Focus on tasks that do not require writing much code

Please generate 3 objects.
"""
    messages = []
    add_user_message(messages, prompt)
    add_assistant_message(messages, "```json")
    text = chat(messages, stop_sequences=["```"])
    return json.loads(text)

In [9]:
dataset = generate_dataset()

In [10]:
pprint(dataset)

[{'task': 'Write a Python function that extracts the AWS account ID from an '
          'ARN string. The function should take an ARN like '
          "'arn:aws:s3:::my-bucket' or "
          "'arn:aws:ec2:us-east-1:123456789012:instance/i-1234567890abcdef0' "
          "and return the account ID if present, or None if the ARN doesn't "
          'contain one.'},
 {'task': 'Write a regex pattern that matches valid AWS S3 bucket names. S3 '
          'bucket names must be 3-63 characters long, contain only lowercase '
          'letters, numbers, hyphens, and periods, start with a letter or '
          'number, and cannot end with a hyphen.'},
 {'task': 'Write a Python function that parses an AWS CloudWatch Logs JSON log '
          'entry and extracts the timestamp, message, and log level (if '
          'available). The function should handle nested JSON structures and '
          'return a dictionary with these fields or raise an error if the '
          'entry is malformed.'}]


In [None]:
# 保存下来，以备后续小节的内容使用
with open("02c-dataset.json", "w") as f:
    json.dump(dataset, f, indent=2)