In [1]:
import pandas as pd
import wandb

# Initialize wandb
wandb.init(project="your_project_name")

# Fetch runs from your project
api = wandb.Api()
runs = api.runs("multi_reward_feedback_final_lul", filters={"display_name": {"$regex": "^RL_.*"}})
#runs_orig = api.runs("multi_reward_feedback_final", filters={"display_name": {"$regex": "^RL_.*"}})

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mymetz[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
# Create a list to store data from filtered runs
filtered_run_data = []

# Iterate through the runs
for run in runs:
    # Check if the run name starts with "ppo_"
    if run.name.startswith("RL_") and "ensemble" not in run.name:
        # Get the summary statistics (includes final values of metrics)
        summary = run.summary._json_dict

        # Get the history (includes all logged metrics)
        history = run.history(keys=["eval/mean_reward", "global_step"])

        # Combine summary and history data
        run_data = {
            "run_id": run.id,
            "run_name": run.name,
            **summary,
            **{f"{k}_history": v.tolist() for k, v in history.items()}
        }

        filtered_run_data.append(run_data)

"""for run in runs_orig:
    # Check if the run name starts with "ppo_"
    if run.name.startswith("RL_") and "ensemble" not in run.name:
        # Get the summary statistics (includes final values of metrics)
        summary = run.summary._json_dict

        # Get the history (includes all logged metrics)
        history = run.history(keys=["eval/mean_reward", "global_step"])

        # Combine summary and history data
        run_data = {
            "run_id": run.id,
            "run_name": run.name,
            **summary,
            **{f"{k}_history": v.tolist() for k, v in history.items()}
        }

        filtered_run_data.append(run_data)
"""


# Create a DataFrame from filtered run data
orig_df = pd.DataFrame(filtered_run_data)

In [4]:
import colorsys
from collections import OrderedDict, defaultdict

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Existing color scale
color_scale = OrderedDict([
    ('evaluative', '#1f77b4'),     # blue
    ('comparative', '#ff7f0e'),    # orange
    ('demonstrative', '#2ca02c'),  # green
    ('corrective', '#d62728'),     # red
    ('descriptive', '#9467bd'),    # purple
    ('preference', '#8c564b'),     # brown
    ('descriptive_preference', '#e377c2'),  # pink
])

# Function to create color variations with stronger fading
def create_color_variations(base_color, num_variations=5):
    rgb = plt.matplotlib.colors.to_rgb(base_color)
    hsv = colorsys.rgb_to_hsv(*rgb)
    colors = []
    for i in range(num_variations):
        s = max(0.1, hsv[1] * (1 - i * 0.25))
        v = min(1.0, hsv[2] * (1 + i * 0.25))
        colors.append(colorsys.hsv_to_rgb(hsv[0], s, v))
    return colors

# Function to extract environment, feedback type, and noise level from run name
def extract_info(run_name):
    parts = run_name.split('_')
    env = parts[2]
    
    # Handle the special case of "descriptive_preference"
    if "descriptive_preference" in run_name:
        feedback = "descriptive_preference"
        noise = parts[-1] if parts[-2] == "noise" else "0.0"
    else:
        feedback = parts[4]
        noise = parts[-1] if parts[-2] == "noise" else "0.0"

    return env, feedback, float(noise)

def safe_convert_to_float(value):
    try:
        return float(value)
    except (ValueError, TypeError):
        return np.nan

# Function to interpolate NaN values in a series
def interpolate_nans(series):
    return pd.Series(series).interpolate().values

# Group runs by environment, feedback type, and noise level
grouped_runs = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
for _, row in orig_df.iterrows():
    env, feedback, noise = extract_info(row['run_name'])
    if isinstance(row['eval/mean_reward_history'], float):
        continue
    row['eval/mean_reward_history'] = [np.nan if x == "nan" else x for x in row['eval/mean_reward_history']]
    #row['eval/mean_reward_history'] = interpolate_nans(row['eval/mean_reward_history'])
    grouped_runs[env][feedback][noise].append(row)

# Plotting function
def plot_environment(env, feedback_runs):
    fig, axes = plt.subplots(2, 3, figsize=(12, 8))
    #fig.suptitle(f"Reward Model: Validation Loss Curves for {env}", fontsize=18)
    
    feedback_types = ["evaluative", "comparative", "demonstrative", "corrective", "descriptive", "descriptive_preference"]
    noise_levels = [0.0, 0.1, 0.25, 0.5, 0.75]
    
    for idx, feedback in enumerate(feedback_types):
        ax = axes[idx // 3, idx % 3]
        base_color = color_scale[feedback]
        color_variations = create_color_variations(base_color)
        
        for noise, color in zip(noise_levels, color_variations):
            if noise not in feedback_runs[feedback]:
                continue
            
            runs = feedback_runs[feedback][noise]
            max_steps = max(max(map(safe_convert_to_float, run['global_step_history'])) for run in runs)
            
            # Create a common x-axis (steps) based on the actual step values
            common_steps = np.arange(0, int(max_steps) + 1, 1000)  # Adjust step size as needed
            
            all_losses = np.full((len(runs), len(common_steps)), np.nan)
            
            for i, run in enumerate(runs):
                steps = np.array([safe_convert_to_float(step) for step in run['global_step_history']])
                losses = np.array([safe_convert_to_float(loss) for loss in run['eval/mean_reward_history']])
                
                # Remove any NaN values
                valid = ~np.isnan(steps) & ~np.isnan(losses)
                steps = steps[valid]
                losses = losses[valid]
                
                if len(steps) > 0 and len(losses) > 0:
                    # Interpolate the losses to the common step range
                    interpolated_losses = np.interp(common_steps, steps, losses)
                    all_losses[i] = interpolated_losses
            
            mean_loss = np.nanmean(all_losses, axis=0)
            std_loss = np.nanstd(all_losses, axis=0)
            
            ax.plot(common_steps, mean_loss, label=f"Noise {noise}", color=color, linewidth=2)
            #ax.fill_between(common_steps, mean_loss - std_loss, mean_loss + std_loss, color=color, alpha=0.2)
        
        ax.set_title(f"{feedback.capitalize()}", fontsize=14)
        ax.set_xlabel("Global Steps", fontsize=14)
        ax.set_ylabel("Reward", fontsize=14)
        ax.tick_params(axis='both', which='major', labelsize=12)
        ax.legend(fontsize=12, loc='lower right')
        ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.subplots_adjust(top=0.92, hspace=0.4, wspace=0.3)
    plt.savefig(f"noise_rl_curves_{env}.png", dpi=300)
    plt.close()
    print(f"Loss curves for {env} have been saved to noise_rl_curves_{env}.png")

# Create plots for each environment
for env, feedback_runs in grouped_runs.items():
    plot_environment(env, feedback_runs)

Loss curves for Swimmer-v5 have been saved to noise_rl_curves_Swimmer-v5.png
Loss curves for HalfCheetah-v5 have been saved to noise_rl_curves_HalfCheetah-v5.png
Loss curves for Ant-v5 have been saved to noise_rl_curves_Ant-v5.png
Loss curves for Humanoid-v5 have been saved to noise_rl_curves_Humanoid-v5.png
Loss curves for Hopper-v5 have been saved to noise_rl_curves_Hopper-v5.png
Loss curves for Walker2d-v5 have been saved to noise_rl_curves_Walker2d-v5.png
