# Learning Rate

In [None]:
import os
import yaml

# 读取原始 config 文件
template_path = "experiments/exp12_dsqwen7b_gsm8k/config/new_sys_prompt.yaml"  # 替换为原始配置文件路径
output_dir = "experiments/exp12_dsqwen7b_gsm8k/config/"         # 输出文件夹路径
os.makedirs(output_dir, exist_ok=True)

# 你想尝试的学习率
learning_rates = [1e-6, 5e-6, 1e-5, 5e-5, 1e-4]

# 加载 base config
with open(template_path, "r") as f:
    base_config = yaml.safe_load(f)

# 对每个学习率生成新配置
for lr in learning_rates:
    config = base_config.copy()
    config["learning_rate"] = lr
    filename = f"config_lr_{lr:.0e}.yaml".replace("+", "")
    path = os.path.join(output_dir, filename)

    # 保证换行不被转义
    with open(path, "w", encoding="utf-8") as f:
        yaml.dump(config, f, default_flow_style=False, sort_keys=False)

    print(f"✅ Saved: {path}")


# Ratio

In [None]:
import yaml
import os
from pathlib import Path

# 原始yaml路径（可以替换成你自己的）
template_path = "experiments/exp12_dsqwen7b_gsm8k/config/new_sys_prompt.yaml"
output_dir = Path(template_path).parent


# 确保输出目录存在
Path(output_dir).mkdir(exist_ok=True)

# 定义 reward function 映射（顺序必须一致）
reward_keys = ["accuracy", "format", "length"]

# 比例输入：每个字符串代表 accuracy:format:length，例如"210" 表示 2:1:0
ratios = [
    "110",
    "101",
    "210",
    "201",
    "211",
    "311",
    "321",
    "312",
    "411",
    "421",
    "412",
    "431",
    "413",
]

# 读取模板yaml
with open(template_path, "r") as f:
    base_config = yaml.safe_load(f)


for ratio_str in ratios:
    weights = [int(c) for c in ratio_str]
    reward_funcs = [k for k, w in zip(reward_keys, weights) if w > 0]
    reward_weights = [float(w) for w in weights if w > 0]

    config = base_config.copy()
    config["reward_funcs"] = reward_funcs
    config["reward_weights"] = reward_weights

    filename = f"config_reward_{ratio_str}.yaml"
    path = os.path.join(output_dir, filename)

    with open(path, "w") as f:
        yaml.dump(config, f, sort_keys=False, allow_unicode=True, width=1000)

    print(f"✅ Saved: {path}")
