In [None]:
import wandb
import json
import pandas as pd
import numpy as np
from collections import defaultdict
from itertools import chain

In [None]:
# Setup experiments to extract data from.
api = wandb.Api(timeout=120)
runs = api.runs("wilrop/IPRO_runs") 
ppo_grid_runs = api.runs("wilrop/IPRO_ppo_grid")
a2c_grid_runs = api.runs("wilrop/IPRO_a2c_grid")
algs = ["SN-MO-PPO", "SN-MO-DQN", "SN-MO-A2C"]
env_ids = ["deep-sea-treasure-concave-v0", "minecart-v0", "mo-reacher-v4"]

In [None]:
# Group runs based on Parent runs, env_id and algorithm.
run_hists = {env_id: {alg: defaultdict(list) for alg in algs} for env_id in env_ids}

for run in chain(runs, ppo_grid_runs, a2c_grid_runs):
    env_id = run.config['env_id']
    alg = run.config['alg_name']
    if env_id in env_ids and alg in algs:
        if 'parent_run_id' in run.config:
            group = run.config['parent_run_id']
        else:
            group = run.config['group']
        run_hists[env_id][alg][group].append(run)
        print(f'Added run to {env_id} - {alg}')

In [None]:
# Keep only the best runs.
best_data = {env_id: {alg: None for alg in algs} for env_id in env_ids}
max_iterations = {env_id: 0 for env_id in env_ids}
leftovers = []

for env_id in run_hists:
    for alg in run_hists[env_id]:
        best_hv = -1
        for group, runs in run_hists[env_id][alg].items():
            hvs = [run.summary['outer/hypervolume'] for run in runs]
            mean_hv = np.mean(hvs)
            if mean_hv > best_hv:
                if len(runs) == 5:
                    best_hv = mean_hv
                    best_data[env_id][alg] = runs
                elif len(runs) < 5:
                    print(f"Adding {alg} - {env_id} with mean {mean_hv} and {len(runs)} runs to leftovers")
                    processed_seeds = [run.config['seed'] for run in runs]
                    for seed in range(5):
                        if seed not in processed_seeds:
                            leftovers.append([alg, env_id, seed, group])
                else:
                    print(f"Skipping {alg} - {env_id} - {group} with mean {mean_hv} and {len(runs)} runs")

In [None]:
# Save the runs to try and best runs to JSON files.      
leftovers = {idx: tpl for idx, tpl in enumerate(leftovers, 1)}
print(f"Number of runs leftover: {len(leftovers)}")
json.dump(leftovers, open('../experiments/evaluation/leftovers.json', 'w'))
json.dump({env_id: {alg: ['/'.join(run.path) for run in runs] for alg, runs in best_data[env_id].items()} for env_id in best_data}, open('data/best_runs.json', 'w'))

In [None]:
# Print best results
for env_id in best_data:
    for alg, runs in best_data[env_id].items():
        hvs = np.array([run.summary['outer/hypervolume'] for run in runs])
        print(f"Best run for {alg} - {env_id} | Mean HV: {np.mean(hvs)} - HVs: {hvs}")

In [None]:
# Extract the maximum number of iterations.
max_iterations = {env_id: {alg: 0 for alg in algs} for env_id in env_ids}
for env_id in best_data:
    for alg, runs in best_data[env_id].items():
        for run in runs:
            df = run.history(keys=['iteration'])
            if not df.empty:
                iters = df.iloc[-1]['iteration'] + 1
                max_iterations[env_id][alg] = max(max_iterations[env_id][alg], iters)
print(max_iterations)

In [None]:
def fill_iterations(hypervolumes, coverages, max_iter):
    """Fill the hypervolume and coverage lists with the last value to have the same length as max_iter.
    
    Args:
        hypervolumes (list): List of hypervolumes.
        coverages (list): List of coverages.
        max_iter (int): Maximum number of iterations.

    Returns:
        None
    """
    while len(hypervolumes) < max_iter:
        hypervolumes.append(hypervolumes[-1])
        coverages.append(coverages[-1])

In [None]:
# Make dictionaries with the data and save to csv.
for env_id in best_data:
    for alg, runs in best_data[env_id].items():
        max_iter = max_iterations[env_id][alg]
        hv_dict = {alg: [], 'Step': [], 'Seed': []}
        cov_dict = {alg: [], 'Step': [], 'Seed': []}

        for seed, run in enumerate(runs):
            hist = run.history(keys=['outer/hypervolume', 'outer/coverage'])
            hypervolumes = hist['outer/hypervolume'].values.tolist()
            coverages = list(np.clip(hist['outer/coverage'].values.tolist(), 0, 1))
            step_size = run.config['online_steps']
            fill_iterations(hypervolumes, coverages, max_iter)
            last_iter = max_iter
            global_steps = np.arange(last_iter) * step_size
            global_steps = global_steps.tolist()
            hv_dict[alg].extend(hypervolumes)
            cov_dict[alg].extend(coverages)
            hv_dict['Step'].extend(global_steps)
            cov_dict['Step'].extend(global_steps)
            hv_dict['Seed'].extend([seed] * last_iter)
            cov_dict['Seed'].extend([seed] * last_iter)

        hv_df = pd.DataFrame.from_dict(hv_dict)
        cov_df = pd.DataFrame.from_dict(cov_dict)
        print(f"Saving data for {env_id} - {alg}")
        hv_df.to_csv(f'data/{alg}_{env_id}_hv.csv', index=False)
        cov_df.to_csv(f'data/{alg}_{env_id}_cov.csv', index=False)