In [5]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from collections import OrderedDict

# Define color scale for feedback types (matching your existing code)
color_scale = OrderedDict([
    ('evaluative', '#1f77b4'),     # blue
    ('comparative', '#ff7f0e'),    # orange
    ('demonstrative', '#2ca02c'),  # green
    ('corrective', '#d62728'),     # red
    ('descriptive', '#9467bd'),    # purple
    ('descriptive_preference', '#8c564b'),     # brown
])

def extract_info(run_name):
    """Extract environment and feedback type from run name."""
    parts = run_name.split('_')
    if "descriptive_preference" in run_name:
        return parts[2], parts[3], "descriptive_preference", 
    
    return parts[2], parts[3], parts[-2]

def get_final_rewards(df, score_field="eval/mean_reward"):
    """Extract final rewards from history."""
    final_rewards = {}
    
    for _, row in df.iterrows():
        env, seed, feedback = extract_info(row['run_name'])
        if env not in final_rewards:
            final_rewards[env] = {}
        if feedback not in final_rewards[env]:
            final_rewards[env][feedback] = {}
        final_rewards[env][feedback][seed] = row[score_field]
    
    return final_rewards

def normalize_rewards(final_rewards, eval_df, environments, feedback_types):
    """Normalize rewards by mean expert score for each environment."""
    normalized_rewards = {}
    
    for env in environments:
        env_scores = eval_df[eval_df['env'] == env]['eval_score']
        if env not in normalized_rewards:
            normalized_rewards[env] = {}
        for feedback in feedback_types:
            if feedback not in normalized_rewards[env]:
                normalized_rewards[env][feedback] = {}
            for seed in final_rewards[env][feedback].keys():
                if not env_scores.empty:
                    mean_expert_score = env_scores.mean()
                    reward = final_rewards[env][feedback][seed]
                    normalized_rewards[env][feedback][seed] = reward / mean_expert_score
    
    return normalized_rewards

def create_scatter_plot(corr_data, rewards_data, environments, feedback_types, fb_type_list, env_list):
    """Create scatter plot of normalized rewards vs correlations with separate legends for feedback types and environments."""
    plt.figure(figsize=(12, 8))
    
    # Increase font size
    plt.rcParams.update({'font.size': 14})
    
    # Define markers for different environments
    env_markers = {
        env: marker for env, marker in zip(
            env_list, 
            ['o', 's', '^', 'D', 'v', '<', '>', 'p', 'h', '8']  # Add more markers if needed
        )
    }
    
    # Plot all points
    for i in range(len(corr_data)):
        plt.scatter(
            corr_data[i],
            rewards_data[i],
            c=color_scale[feedback_types[i]],
            marker=env_markers[environments[i]],
            s=100,
            alpha=0.7
        )
    
    # Create two separate legend handles
    feedback_handles = [plt.scatter([], [], c=color_scale[fb_type], 
                                  label=fb_type.capitalize().replace("Descriptive_preference", "Desc.Pref."), marker='o') 
                       for fb_type in fb_type_list]
    
    env_handles = [plt.scatter([], [], c='gray', 
                             marker=env_markers[env], 
                             label=env) 
                  for env in env_list]
    
    # Add labels and title
    plt.xlabel('Correlation with Ground Truth')
    plt.ylabel('Normalized Mean Reward')
    #plt.title('Normalized Rewards vs Ground Truth Correlation')
    
    # Add grid
    plt.grid(True, alpha=0.3)
    
    # Add two legends
    first_legend = plt.legend(handles=feedback_handles,
                            title="Feedback Types",
                            bbox_to_anchor=(1.05, 1), 
                            loc='upper left',
                            borderaxespad=0.)
    
    # Add the first legend manually to the plot
    plt.gca().add_artist(first_legend)
    
    # Add second legend for environments
    plt.legend(handles=env_handles,
              title="Environments",
              bbox_to_anchor=(1.05, 0.5),  # Positioned below the first legend
              loc='upper left',
              borderaxespad=0.)
    
    # Set axis limits
    plt.xlim(-0.1, 1.1)
    
    # Add horizontal line at y=1 (expert performance)
    plt.axhline(y=1, color='gray', linestyle='--', alpha=0.5)
    
    # Adjust layout to prevent legend cutoff
    plt.tight_layout()
    
    # Save plot
    plt.savefig('reward_correlation_scatter.png', bbox_inches='tight', dpi=300)
    plt.close()

In [2]:
def get_correlations(normalized_rewards, correlations, seeds):
    """Extract correlations with ground truth for each run."""
    out_correlations = {}
    
    # Map feedback types to indices in the correlation matrix
    feedback_indices = {
        "evaluative": 1,
        "comparative": 2,
        "demonstrative": 3,
        "corrective": 4,
        "descriptive": 5,
        "descriptive_preference": 6
    }
    
    for env in normalized_rewards.keys():
        out_correlations[env] = {}
        for feedback in normalized_rewards[env].keys():
            out_correlations[env][feedback] = {}
            for seed in seeds:    
                if env in all_env_rewards:
                    corr, _ = pearsonr(correlations[env][int(seed)][0], correlations[env][int(seed)][feedback_indices[feedback]])
                    out_correlations[env][feedback][seed] = corr
    
    return out_correlations

In [3]:
import wandb
import pandas as pd

# Initialize wandb
wandb.init(project="your_project_name")

# Fetch runs from your project
api = wandb.Api()
runs = api.runs("multi_reward_feedback_final_lul", filters={"display_name": {"$regex": "^RL_.*"}})

# Create a list to store data from filtered runs
filtered_run_data = []

# Iterate through the runs
for run in runs:
    # Check if the run name starts with "ppo_"
    if run.name.startswith("RL_") and "noise" not in run.name and "ensemble" not in run.name:
        # Get the summary statistics (includes final values of metrics)
        summary = run.summary._json_dict

        # Get the history (includes all logged metrics)
        history = run.history(keys=["eval/mean_reward", "global_step"])

        # Combine summary and history data
        run_data = {
            "run_id": run.id,
            "run_name": run.name,
            **summary,
            **{f"{k}_history": v.tolist() for k, v in history.items()}
        }

        filtered_run_data.append(run_data)

# Create a DataFrame from filtered run data
orig_df = pd.DataFrame(filtered_run_data)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mymetz[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [6]:
%load_ext autoreload
%autoreload 2
import os
import pickle
from scipy.stats import pearsonr

# Load evaluation scores
eval_df = pd.read_csv("../../main/gt_agents/collected_results.csv")

# Get final rewards from history
final_rewards = get_final_rewards(orig_df)

# Extract environments and feedback types
environments = ["HalfCheetah-v5", "Walker2d-v5", "Swimmer-v5", "Ant-v5", "Hopper-v5", "Humanoid-v5"]
feedback_types = ["evaluative", "comparative", "demonstrative", "corrective", "descriptive", "descriptive_preference"]

# Normalize rewards
normalized_rewards = normalize_rewards(final_rewards, eval_df, environments, feedback_types)

# Load correlation data
environments_dict = {"ppo": ["HalfCheetah-v5", "Walker2d-v5", "Swimmer-v5"], 
                    "sac": ["Ant-v5", "Hopper-v5", "Humanoid-v5"]}
algo = ["ppo", "sac"]
#noise = [0.0, 0.1, 0.25, 0.5, 0.75]
noise = 0.0
seeds = [1687123, 1789, 12]

# Load correlation data
all_env_rewards = {}
for i, alg in enumerate(algo):
    for env in environments_dict[alg]:
        all_env_rewards[env] = {}
        for seed in seeds:
            file_name = f"corr_{env}_{alg}_noise_{noise}_{seed}.pkl"
            with open(os.path.join("../correlation_data", file_name), "rb") as load_file:
                load_rewards = pickle.load(load_file)
                pred_rewards = np.array(load_rewards)
            all_env_rewards[env][seed] = pred_rewards

# Get correlations
correlations = get_correlations(normalized_rewards, all_env_rewards, seeds)

plot_correlations = []
plot_rewards = []
plot_envs = []
plot_fb_types = []
for env in environments:
    for feedback_type in feedback_types:
        for seed in seeds:
                if str(seed) not in normalized_rewards[env][feedback_type]:
                    continue
                plot_correlations.append(correlations[env][feedback_type][seed])
                plot_rewards.append(normalized_rewards[env][feedback_type][str(seed)])
                plot_envs.append(env)
                plot_fb_types.append(feedback_type)

# Create scatter plot
create_scatter_plot(plot_correlations, plot_rewards, plot_envs, plot_fb_types, feedback_types, environments)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
