In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from wandb_utils import download_wandb_metrics
import pandas as pd
import os
from plot_utils import plot_line_chart,extract_metrics
import numpy as np

In [3]:
team="zihanwang-ai-northwestern-university"
project="RAGEN"

## Bandit Results
This figure is the low arm and high arm rate for the normal and reverse of the no think and main bandit

In [4]:



figure4_bandit_results = {
    "bandit_no_think_rl_archive": "1gyb6m0f",
    "bandit_reverse_no_think_rl_archive":"qrgyldl3",
    "bandit_main_archive":"d4rw9kmn",
    "bandit_reverse_archive":"jbe3s5bb",
}


for k,v in figure4_bandit_results.items():
    download_wandb_metrics(team=team,project=project,run_id=v,out_dir='./wandb_logs')



In [5]:
file_name_to_plot_name={
    "bandit_no_think_rl_archive":"no-think-RL",
    "bandit_reverse_no_think_rl_archive":"no-think-RL-reverse",
    "bandit_main_archive":"RAGEN",
    "bandit_reverse_archive":"RAGEN-reverse",
    
}
file_plot_dict={}
for k in figure4_bandit_results.keys():
    file_plot_dict[f"./wandb_logs/{k}/metrics.csv"] = file_name_to_plot_name[k]

In [6]:

metric_names_mapping={
    "metric/n_high_arm":"Row High Arm Rate (%)",
    "metric/n_low_arm":"Row Low Arm Rate (%)",
    "_step":"Steps",
}

In [7]:
processed_data = extract_metrics(file_plot_dict,metric_names_mapping)

no-think-RL 56
no-think-RL-reverse 44
RAGEN 56
RAGEN-reverse 50


In [8]:
for k,v in processed_data.items():
    # recalculate the rate as high/(high+low) and low/(high+low)
    v["High Arm Rate (%)"] = v["Row High Arm Rate (%)"]/128*100 
    v["Low Arm Rate (%)"] = v["Row Low Arm Rate (%)"]/128*100

In [10]:
plot_line_chart(processed_data,x_data_label="Steps",y_data_label="High Arm Rate (%)",save_path="./figures/high_arm_rate_vs_step.pdf",smooth=True)

In [10]:
plot_line_chart(processed_data,x_data_label="Steps",y_data_label="Low Arm Rate (%)",save_path="./figures/low_arm_rate_vs_step.pdf",smooth=True)

# Sokoban Results

In [11]:
sokoban_results = {
    "sokoban_main_archive": "5z8a6cog",
    "sokoban_main_no_think_rl_archive":"46ihhpmx",
}


for k,v in sokoban_results.items():
    download_wandb_metrics(team=team,project=project,run_id=v,out_dir='./wandb_logs')

In [12]:
file_name_to_table_name={
    "sokoban_main_archive":"RAGEN",
    "sokoban_main_no_think_rl_archive":"no-think-RL",
}
file_plot_dict={}
for k in file_name_to_table_name.keys():
    file_plot_dict[f"./wandb_logs/{k}/metrics.csv"] = file_name_to_table_name[k]

In [13]:
metric_names_mapping={
    "metric/valid_action":"Valid Action",
    "metric/effective_action":"Effective Action",
    "metric/traj_length":"Turns",
    "metric/finished":"Success Rate",
}

In [14]:
processed_data = extract_metrics(file_plot_dict,metric_names_mapping)

RAGEN 78
no-think-RL 76


In [15]:

for k,v in processed_data.items():
    max_index = np.argmax(v["Success Rate"])
    for kk,vv in v.items():
        processed_data[k][kk] = vv[max_index]
        if kk=="Success Rate":
            processed_data[k][kk]=processed_data[k][kk]*100/128

In [16]:
processed_data

{'RAGEN': {'Steps': 24,
  'Valid Action': 3.578125,
  'Effective Action': 1.3671875,
  'Turns': 3.8671875,
  'Success Rate': 32.03125},
 'no-think-RL': {'Steps': 34,
  'Valid Action': 2.1796875,
  'Effective Action': 1.484375,
  'Turns': 4.265625,
  'Success Rate': 29.6875}}

# Frozon Results

In [17]:
frozenlake_results = {
    "frozenlake_main_archive": "zexswzel",
    "frozenlake_main_no_think_rl_archive":"khkqagbh",
}


for k,v in frozenlake_results.items():
    download_wandb_metrics(team=team,project=project,run_id=v,out_dir='./wandb_logs')

In [18]:
file_name_to_table_name={
    "frozenlake_main_archive":"RAGEN",
    "frozenlake_main_no_think_rl_archive":"no-think-RL",
}
file_plot_dict={}
for k in file_name_to_table_name.keys():
    file_plot_dict[f"./wandb_logs/{k}/metrics.csv"] = file_name_to_table_name[k]
metric_names_mapping={
    "metric/valid_action":"Valid Action",
    "metric/effective_action":"Effective Action",
    "metric/traj_length":"Turns",
    "metric/finished":"Success Rate",
}

In [19]:
processed_data = extract_metrics(file_plot_dict,metric_names_mapping)

RAGEN 53
no-think-RL 38


In [20]:


for k,v in processed_data.items():
    max_index = np.argmax(v["Success Rate"])
    for kk,vv in v.items():
        processed_data[k][kk] = vv[max_index]
        if kk=="Success Rate":
            processed_data[k][kk]=processed_data[k][kk]*100/128

In [21]:
processed_data

{'RAGEN': {'Steps': 6,
  'Valid Action': 1.6484375,
  'Effective Action': 1.3515625,
  'Turns': 2.7421875,
  'Success Rate': 64.0625},
 'no-think-RL': {'Steps': 3,
  'Valid Action': 1.53125,
  'Effective Action': 1.1953125,
  'Turns': 3.9296875,
  'Success Rate': 33.59375}}

# Prompt Diversity

In [22]:
file_name_to_id = {
    "sokoban_ablation_p32n4_archive": "d7mmuycz",
    "sokoban_ablation_p64n2_archive":"6ztyjq6i",
    "sokoban_ablation_p16n8_archive":"zsfku6gz",
    "sokoban_ablation_p4n32_archive":"xif2lpo8",
    "sokoban_ablation_p2n64_archive":"ljrdx916"
}


for k,v in file_name_to_id.items():
    download_wandb_metrics(team=team,project=project,run_id=v,out_dir='./wandb_logs')

In [23]:
file_name_mapping={
    "sokoban_ablation_p32n4_archive": 32,
    "sokoban_ablation_p64n2_archive":64,
    "sokoban_ablation_p16n8_archive":16,
    "sokoban_ablation_p4n32_archive":4,
    #"sokoban_ablation_p2n64_archive":2
}
file_plot_dict={}
for k in file_name_mapping.keys():
    file_plot_dict[f"./wandb_logs/{k}/metrics.csv"] = file_name_mapping[k]
metric_names_mapping={
    "metric/valid_action":"Valid Action",
    "metric/effective_action":"Effective Action",
    "metric/traj_length":"Turns",
    "metric/finished":"Success Rate",
}

In [24]:
processed_data = extract_metrics(file_plot_dict,metric_names_mapping)

32 28
64 98
16 28
4 78


In [25]:
for k,v in processed_data.items():
    
    #max_index=np.argmax(v["Success Rate"])
    # get the second max index
    max_index=np.argsort(v["Success Rate"])[-1]
    
    for kk,vv in v.items():
        # get the max index
        processed_data[k][kk] = vv[max_index]
        if kk=="Success Rate":
            processed_data[k][kk]=processed_data[k][kk]*100/128

In [26]:
new_results = {}
prompt_diversity=list(processed_data.keys())
prompt_diversity.sort()
new_results["RAGEN"] = {
    "Prompt Diversity":prompt_diversity,
    "Success Rate (%)": [processed_data[k]["Success Rate"] for k in prompt_diversity],
}

In [27]:
plot_line_chart(new_results,
                x_data_label="Prompt Diversity",
                y_data_label="Success Rate (%)",
                save_path="./figures/prompt_diversity.pdf",
                use_log_scale=True,log_base=2)

# Different Algorithm

In [28]:
file_name_to_id = {
    "sokoban_brpo_archive": "zcng8j4k",
    "sokoban_arpo_archive":"6vijzn23",
    "sokoban_main_archive":"5z8a6cog",
    "frozenlake_brpo_archive":"obzkp1wc",
    "frozenlake_arpo_archive":"ifastrrz",
    "frozenlake_main_archive":"zexswzel",
}

for k,v in file_name_to_id.items():
    download_wandb_metrics(team=team,project=project,run_id=v,out_dir='./wandb_logs')

In [29]:
file_name_mapping={
    "sokoban_arpo_archive":"ARPO",
    "sokoban_brpo_archive": "BRPO",
    "sokoban_main_archive":"GRPO",
}
file_plot_dict={}
for k in file_name_mapping.keys():
    file_plot_dict[f"./wandb_logs/{k}/metrics.csv"] = file_name_mapping[k]
metric_names_mapping={
    "metric/valid_action":"Valid Action",
    "metric/effective_action":"Effective Action",
    "metric/traj_length":"Turns",
    "metric/finished":"Success Rate (%)",
    "_step":"Steps"
}

In [30]:
processed_data = extract_metrics(file_plot_dict,metric_names_mapping,hard_cut_off=18)

ARPO 68
BRPO 58
GRPO 78


In [31]:
plot_line_chart(processed_data,
                x_data_label="Steps",
                y_data_label="Success Rate (%)",
                save_path="./figures/sokoban_algorithm.pdf",
                )

In [32]:
file_name_mapping={
    "frozenlake_arpo_archive":"ARPO",
    "frozenlake_brpo_archive":"BRPO",
    "frozenlake_main_archive":"GRPO",
}
file_plot_dict={}
for k in file_name_mapping.keys():
    file_plot_dict[f"./wandb_logs/{k}/metrics.csv"] = file_name_mapping[k]
metric_names_mapping={
    "metric/valid_action":"Valid Action",
    "metric/effective_action":"Effective Action",
    "metric/traj_length":"Turns",
    "metric/finished":"Success Rate (%)",
    "_step":"Steps"
}
processed_data = extract_metrics(file_plot_dict,metric_names_mapping,hard_cut_off=18)
plot_line_chart(processed_data,
                x_data_label="Steps",
                y_data_label="Success Rate (%)",
                save_path="./figures/frozonlake_algorithm.pdf",
                )

ARPO 48
BRPO 18
GRPO 53


# Offline

In [33]:
file_name_to_id = {
    "sokoban_ablation_offline2_archive": "gilgbeo2",
    "sokoban_ablation_offline5_archive":"rh1a08w1",
    "sokoban_ablation_offline10_archive":"86xslll0",
    "sokoban_ablation_offline20_archive":"jm0wh04h",
}


for k,v in file_name_to_id.items():
    download_wandb_metrics(team=team,project=project,run_id=v,out_dir='./wandb_logs')

In [34]:
file_name_mapping={
    "sokoban_ablation_offline2_archive": 2,
    "sokoban_ablation_offline5_archive":5,
    "sokoban_ablation_offline10_archive":10,
    "sokoban_ablation_offline20_archive":20,
}
file_plot_dict={}
for k in file_name_mapping.keys():
    file_plot_dict[f"./wandb_logs/{k}/metrics.csv"] = file_name_mapping[k]
metric_names_mapping={
    "metric/valid_action":"Valid Action",
    "metric/effective_action":"Effective Action",
    "metric/traj_length":"Turns",
    "metric/finished":"Success Rate",
}
processed_data = extract_metrics(file_plot_dict,metric_names_mapping)
for k,v in processed_data.items():
    
    #max_index=np.argmax(v["Success Rate"])
    # get the second max index
    #max_index=np.argsort(v["Success Rate"][20:])[-1]
    max_index=len(v["Success Rate"])-1
    for kk,vv in v.items():
        # get the max index
        processed_data[k][kk] = vv[max_index]
        if kk=="Success Rate":
            processed_data[k][kk]=processed_data[k][kk]*100/128/k

2 13
5 18
10 8
20 3


In [35]:
processed_data

{2: {'Steps': 3,
  'Valid Action': 2.484375,
  'Effective Action': 0.90625,
  'Turns': 4.7890625,
  'Success Rate': 7.03125},
 5: {'Steps': 3,
  'Valid Action': 2.721875,
  'Effective Action': 1.065625,
  'Turns': 4.5875,
  'Success Rate': 12.96875},
 10: {'Steps': 3,
  'Valid Action': 3.63203125,
  'Effective Action': 1.39140625,
  'Turns': 4.559375,
  'Success Rate': 14.296875},
 20: {'Steps': 3,
  'Valid Action': 3.790625,
  'Effective Action': 1.3078125,
  'Turns': 4.50078125,
  'Success Rate': 16.2109375}}

# Bandits Analysis

In [36]:
file_name_to_id = {
    "bandit_genea_reverse_no_think_rl": "hs2370nr",
    "bandit_genea_reverse_archive":"aif22pzz",
    "bandit_genea_regular_no_think_rl":"g9dlkrhr",
    "bandit_genea_regular":"7bqfe4y1",
    "bandit_genea_reverse_testdiff_archive":"h42n6i2t",
    "bandit_genea_reverse_testdiff_no_think_rl_archive":"iad87j0f",
}
for k,v in file_name_to_id.items():
    download_wandb_metrics(team=team,project=project,run_id=v,out_dir='./wandb_logs')
    

In [37]:
file_name_mapping={
    "bandit_genea_regular":"[Teacher, Engineer] (RAGEN)",
    "bandit_genea_reverse_archive":"[Engineer, Teacher] (RAGEN)",
    "bandit_genea_reverse_testdiff_archive":"[Trader, Librarian] (RAGEN)",
}
file_plot_dict={}
for k in file_name_mapping.keys():
    file_plot_dict[f"./wandb_logs/{k}/metrics.csv"] = file_name_mapping[k]
metric_names_mapping={
    "_step":"Steps",
    "metric/n_high_arm":"High Arm Rate (%)",
    "metric/n_low_arm":"Low Arm Rate (%)",
}
processed_data = extract_metrics(file_plot_dict,metric_names_mapping)
plot_line_chart(processed_data,x_data_label="Steps",y_data_label="High Arm Rate (%)",
             save_path="./figures/analysis_high_arm_rate_vs_step.pdf",smooth=True,font_config={"legnet_size":18})
plot_line_chart(processed_data,x_data_label="Steps",y_data_label="Low Arm Rate (%)",
             save_path="./figures/analysis_low_arm_rate_vs_step.pdf",smooth=True,font_config={"legnet_size":18})

[Teacher, Engineer] (RAGEN) 48
[Engineer, Teacher] (RAGEN) 48
[Trader, Librarian] (RAGEN) 48


In [38]:
file_name_mapping={
    "bandit_genea_regular_no_think_rl":"[Teacher, Engineer] (no-think-RL)",
    "bandit_genea_reverse_no_think_rl": "[Engineer, Teacher] (no-think-RL)",
    "bandit_genea_reverse_testdiff_no_think_rl_archive":"[Trader, Librarian] (no-think-RL)",
}
file_plot_dict={}
for k in file_name_mapping.keys():
    file_plot_dict[f"./wandb_logs/{k}/metrics.csv"] = file_name_mapping[k]
metric_names_mapping={
    "_step":"Steps",
    "metric/n_high_arm":"High Arm Rate (%)",
    "metric/n_low_arm":"Low Arm Rate (%)",
}
processed_data = extract_metrics(file_plot_dict,metric_names_mapping)
plot_line_chart(processed_data,x_data_label="Steps",y_data_label="High Arm Rate (%)",
             save_path="./figures/analysis_high_arm_rate_vs_step_no_thinking.pdf",smooth=True,font_config={"legnet_size":18})
plot_line_chart(processed_data,x_data_label="Steps",y_data_label="Low Arm Rate (%)",
             save_path="./figures/analysis_low_arm_rate_vs_step_no_thinking.pdf",smooth=True,font_config={"legnet_size":18})

[Teacher, Engineer] (no-think-RL) 48
[Engineer, Teacher] (no-think-RL) 48
[Trader, Librarian] (no-think-RL) 48


## Scaling Model Size

In [39]:
file_name_to_id = {
    "sokoban_main_Qwen_1_5B": "y8oxwch3",
    "sokoban_main_Qwen_3B":"27ymjev4",
    "sokoban_main_Qwen_0_5B":"d6ajd34v"
    
}
for k,v in file_name_to_id.items():
    download_wandb_metrics(team=team,project=project,run_id=v,out_dir='./wandb_logs')
    
file_name_mapping={
    "sokoban_main_Qwen_0_5B":"Qwen 0.5B",
     "sokoban_main_Qwen_1_5B":"Qwen 1.5B",
    "sokoban_main_Qwen_3B":"Qwen 3B",
    
    
}
file_plot_dict={}
for k in file_name_mapping.keys():
    file_plot_dict[f"./wandb_logs/{k}/metrics.csv"] = file_name_mapping[k]
metric_names_mapping={
    "_step":"Steps",
    "metric/finished_env":"Finished Env",
    "metric/total_env":"Total Env",
    
}
processed_data = extract_metrics(file_plot_dict,metric_names_mapping)


Qwen 0.5B 48
Qwen 1.5B 48
Qwen 3B 28


In [40]:
for k,v in processed_data.items():
    processed_data[k]["Success Rate (%)"] = v["Finished Env"]/v["Total Env"]*100

In [41]:
plot_line_chart(processed_data,x_data_label="Steps",y_data_label="Success Rate (%)",
             save_path="./figures/scaling_model_size.pdf")

## Scaling Horizon


In [42]:
file_name_to_id = {
    "frozenlake_horizon_20": "di8acl6m",
    "frozenlake_horizon_15":"yvzsldol",
    "frozenlake_horizon_10":"o0je56o3"
    
}
for k,v in file_name_to_id.items():
    download_wandb_metrics(team=team,project=project,run_id=v,out_dir='./wandb_logs')
    
file_name_mapping={
    "frozenlake_horizon_10":"10 Turns",
     "frozenlake_horizon_15":"15 Turns",
    "frozenlake_horizon_20":"20 Truns",
}
file_plot_dict={}
for k in file_name_mapping.keys():
    file_plot_dict[f"./wandb_logs/{k}/metrics.csv"] = file_name_mapping[k]
metric_names_mapping={
    "_step":"Steps",
    "metric/finished_env":"Finished Env",
    "metric/total_env":"Total Env",
    
}
processed_data = extract_metrics(file_plot_dict,metric_names_mapping)
for k,v in processed_data.items():
    processed_data[k]["Success Rate (%)"] = v["Finished Env"]/v["Total Env"]*100
plot_line_chart(processed_data,x_data_label="Steps",y_data_label="Success Rate (%)",
             save_path="./figures/scaling_horizon_frozen.pdf")


10 Turns 38
15 Turns 31
20 Truns 30


In [43]:
file_name_to_id = {
    "sokoban_horizon_20": "9ak5y9rj",
    "sokoban_horizon_15":"hekcppay",
    "sokoban_horizon_10":"vfcrb9k3"
    
}
for k,v in file_name_to_id.items():
    download_wandb_metrics(team=team,project=project,run_id=v,out_dir='./wandb_logs')
    
file_name_mapping={
    "sokoban_horizon_10":"10 Turns",
     "sokoban_horizon_15":"15 Turns",
    "sokoban_horizon_20":"20 Truns",
}
file_plot_dict={}
for k in file_name_mapping.keys():
    file_plot_dict[f"./wandb_logs/{k}/metrics.csv"] = file_name_mapping[k]
metric_names_mapping={
    "_step":"Steps",
    "metric/finished_env":"Finished Env",
    "metric/total_env":"Total Env",
    
}
processed_data = extract_metrics(file_plot_dict,metric_names_mapping)
for k,v in processed_data.items():
    processed_data[k]["Success Rate (%)"] = v["Finished Env"]/v["Total Env"]*100
plot_line_chart(processed_data,x_data_label="Steps",y_data_label="Success Rate (%)",
             save_path="./figures/scaling_horizon_sokoban.pdf")

10 Turns 18
15 Turns 58
20 Truns 18


## Instruct vs Base

In [44]:
file_name_to_id = {
    "sokoban_abl_base":"yzztq7p7",
    #"sokoban_main_Qwen_3B":"27ymjev4",
    "sokoban_main_Qwen_0_5B":"d6ajd34v",
    "sokoban_main_Qwen_1_5B": "y8oxwch3",
    "sokoban_main_Qwen_1_5B_Base":"a27bq2ll",
    
}
for k,v in file_name_to_id.items():
    download_wandb_metrics(team=team,project=project,run_id=v,out_dir='./wandb_logs')
    
file_name_mapping={
    "sokoban_main_Qwen_0_5B":"Qwen 0.5B Instruct",
     "sokoban_main_Qwen_1_5B":"Qwen 1.5B Instruct",
    "sokoban_abl_base":"Qwen 0.5B Base",
    "sokoban_main_Qwen_1_5B_Base":"Qwen 1.5B Base",
}
file_plot_dict={}
for k in file_name_mapping.keys():
    file_plot_dict[f"./wandb_logs/{k}/metrics.csv"] = file_name_mapping[k]
metric_names_mapping={
    "_step":"Steps",
    "metric/finished_env":"Finished Env",
    "metric/total_env":"Total Env",
    
}
processed_data = extract_metrics(file_plot_dict,metric_names_mapping)
for k,v in processed_data.items():
    processed_data[k]["Success Rate (%)"] = v["Finished Env"]/v["Total Env"]*100
plot_line_chart(processed_data,x_data_label="Steps",y_data_label="Success Rate (%)",
             save_path="./figures/base_vs_instruct_sokoban.pdf")


Qwen 0.5B Instruct 48
Qwen 1.5B Instruct 48
Qwen 0.5B Base 29
Qwen 1.5B Base 98


## Scaling Model New