In [6]:
import wandb
import json
import pandas as pd
import numpy as np
from collections import defaultdict

In [16]:
# Setup experiments to extract data from.
api = wandb.Api(timeout=120)
runs = api.runs("wilrop/IPRO_runs")
algs = ["SN-MO-PPO", "SN-MO-DQN", "SN-MO-A2C"]
env_ids = ["deep-sea-treasure-concave-v0", "minecart-v0", "mo-reacher-v4"]

In [17]:
# Group runs based on Parent runs, env_id and algorithm.
run_hists = {env_id: {alg: defaultdict(list) for alg in algs} for env_id in env_ids}

for run in runs:
    env_id = run.config['env_id']
    alg = run.config['alg_name']
    if env_id in env_ids and alg in algs:
        parent_run = run.config['parent_run_id']
        run_hists[env_id][alg][parent_run].append(run)
        print(f'Added run to {alg} - {parent_run}')

Added run to SN-MO-DQN - wilrop/IPRO_opt/3th7rtwf
Added run to SN-MO-PPO - wilrop/IPRO_opt/2ovb5l08
Added run to SN-MO-PPO - wilrop/IPRO_opt/15ke2ffr
Added run to SN-MO-A2C - wilrop/IPRO_opt/14i992ci
Added run to SN-MO-PPO - wilrop/IPRO_opt/3jjbpzsj
Added run to SN-MO-PPO - wilrop/IPRO_opt/2dlhuhfi
Added run to SN-MO-DQN - wilrop/IPRO_opt/13tc3uki
Added run to SN-MO-PPO - wilrop/IPRO_opt/1hnv5e6y
Added run to SN-MO-DQN - wilrop/IPRO_opt/3r23ofpd
Added run to SN-MO-A2C - wilrop/IPRO_opt/28auaasv
Added run to SN-MO-DQN - wilrop/IPRO_opt/wo3xajt8
Added run to SN-MO-DQN - wilrop/IPRO_opt/1yy360a7
Added run to SN-MO-DQN - wilrop/IPRO_opt/3th7rtwf
Added run to SN-MO-A2C - wilrop/IPRO_opt/2qfbstvh
Added run to SN-MO-PPO - wilrop/IPRO_opt/1t0sgdjb
Added run to SN-MO-A2C - wilrop/IPRO_opt/319oolp1
Added run to SN-MO-A2C - wilrop/IPRO_opt/19f3hb1m
Added run to SN-MO-PPO - wilrop/IPRO_opt/1fypy5cj
Added run to SN-MO-A2C - wilrop/IPRO_opt/14i992ci
Added run to SN-MO-PPO - wilrop/IPRO_opt/1qgjuxzh


In [20]:
# Keep only the best runs.
best_data = {env_id: {alg: None for alg in algs} for env_id in env_ids}
best_parents = {env_id: {alg: None for alg in algs} for env_id in env_ids}
max_iterations = {env_id: 0 for env_id in env_ids}
leftovers = []

for env_id in run_hists:
    for alg in run_hists[env_id]:
        best_hv = -1
        for parent, runs in run_hists[env_id][alg].items():
            hvs = [run.summary['outer/hypervolume'] for run in runs]
            mean_hv = np.mean(hvs)
            if mean_hv > best_hv:
                if len(runs) == 5:
                    best_hv = mean_hv
                    best_data[env_id][alg] = runs
                    best_parents[env_id][alg] = (parent, ['/'.join(run.path) for run in runs])
                elif len(runs) < 5:
                    print(f"Adding {alg} - {env_id} - {parent} with mean {mean_hv} and {len(runs)} runs to leftovers")
                    processed_seeds = [run.config['seed'] for run in runs]
                    for seed in range(5):
                        if seed not in processed_seeds:
                            leftovers.append([alg, env_id, seed, parent])
                else:
                    print(f"Skipping {alg} - {env_id} - {parent} with mean {mean_hv} and {len(runs)} runs")

Adding SN-MO-PPO - deep-sea-treasure-concave-v0 - wilrop/IPRO_opt/1k8onny9 with mean 4002.75 and 4 runs to leftovers
Adding SN-MO-PPO - deep-sea-treasure-concave-v0 - wilrop/IPRO_opt/26o4dtht with mean 4022.0 and 2 runs to leftovers
Adding SN-MO-DQN - deep-sea-treasure-concave-v0 - wilrop/IPRO_opt/2rngyr7i with mean 4216.75 and 4 runs to leftovers
Adding SN-MO-DQN - deep-sea-treasure-concave-v0 - wilrop/IPRO_opt/2ngagpi1 with mean 4221.5 and 4 runs to leftovers
Adding SN-MO-A2C - deep-sea-treasure-concave-v0 - wilrop/IPRO_opt/1uxbj4qd with mean 4216.0 and 2 runs to leftovers
Skipping SN-MO-PPO - minecart-v0 - wilrop/IPRO_opt/1ucq5ti1 with mean 625.6622361028594 and 10 runs


In [22]:
# Save the runs to try and best runs to JSON files.      
leftovers = {idx: tpl for idx, tpl in enumerate(leftovers, 1)}
print(f"Number of runs leftover: {len(leftovers)}")
json.dump(leftovers, open('../experiments/evaluation/leftovers.json', 'w'))
json.dump(best_parents, open('data/best_parents.json', 'w'))

Number of runs leftover: 9


In [23]:
# Print best results
for env_id in best_data:
    for alg, runs in best_data[env_id].items():
        parent = best_parents[env_id][alg]
        hvs = np.array([run.summary['outer/hypervolume'] for run in runs])
        print(f"Best run for {alg} - {env_id} - {parent}")
        print(f"Mean HV: {np.mean(hvs)} - HVs: {hvs}")

Best run for SN-MO-PPO - deep-sea-treasure-concave-v0 - ('wilrop/IPRO_opt/2b6o3lp6', ['wilrop/IPRO_runs/14jhj2t6', 'wilrop/IPRO_runs/mvs6ivt0', 'wilrop/IPRO_runs/1n9ur3hb', 'wilrop/IPRO_runs/2peqgch9', 'wilrop/IPRO_runs/shm67quo'])
Mean HV: 4038.4 - HVs: [3609 4077 4242 4243 4021]
Best run for SN-MO-DQN - deep-sea-treasure-concave-v0 - ('wilrop/IPRO_opt/3r23ofpd', ['wilrop/IPRO_runs/28mnzit9', 'wilrop/IPRO_runs/3aw7vr5u', 'wilrop/IPRO_runs/2zbjp4b2', 'wilrop/IPRO_runs/ebp3ka59', 'wilrop/IPRO_runs/1ch3652e'])
Mean HV: 3377.2 - HVs: [4196    0 4246 4197 4247]
Best run for SN-MO-A2C - deep-sea-treasure-concave-v0 - ('wilrop/IPRO_opt/3a7qvc13', ['wilrop/IPRO_runs/3ea7yn9i', 'wilrop/IPRO_runs/2l88xw6b', 'wilrop/IPRO_runs/1qpaqxep', 'wilrop/IPRO_runs/2mlxq9oy', 'wilrop/IPRO_runs/1s99x09o'])
Mean HV: 4159.0 - HVs: [4245 4201 4238 3862 4249]
Best run for SN-MO-PPO - minecart-v0 - ('wilrop/IPRO_opt/1yavu2eq', ['wilrop/IPRO_runs/1w9vpxix', 'wilrop/IPRO_runs/363i3xko', 'wilrop/IPRO_runs/1bj4yyap'

In [None]:
# Extract the maximum number of iterations.
max_iterations = {env_id: {alg: 0 for alg in algs} for env_id in env_ids}
for env_id in best_data:
    for alg, runs in best_data[env_id].items():
        for run in runs:
            df = run.history(keys=['iteration'])
            if not df.empty:
                iters = df.iloc[-1]['iteration'] + 1
                max_iterations[env_id][alg] = max(max_iterations[env_id][alg], iters)
print(max_iterations)

In [None]:
def fill_iterations(hypervolumes, coverages, max_iter):
    """Fill the hypervolume and coverage lists with the last value to have the same length as max_iter.
    
    Args:
        hypervolumes (list): List of hypervolumes.
        coverages (list): List of coverages.
        max_iter (int): Maximum number of iterations.

    Returns:
        None
    """
    while len(hypervolumes) < max_iter:
        hypervolumes.append(hypervolumes[-1])
        coverages.append(coverages[-1])

In [None]:
# Make dictionaries with the data and save to csv.
for env_id in best_data:
    for alg, runs in best_data[env_id].items():
        max_iter = max_iterations[env_id][alg]
        hv_dict = {alg: [], 'Step': [], 'Seed': []}
        cov_dict = {alg: [], 'Step': [], 'Seed': []}

        for seed, run in enumerate(runs):
            hist = run.history(keys=['outer/hypervolume', 'outer/coverage'])
            hypervolumes = hist['outer/hypervolume'].values.tolist()
            coverages = list(np.clip(hist['outer/coverage'].values.tolist(), 0, 1))
            step_size = run.config['online_steps']
            fill_iterations(hypervolumes, coverages, max_iter)
            last_iter = max_iter
            global_steps = np.arange(last_iter) * step_size
            global_steps = global_steps.tolist()
            hv_dict[alg].extend(hypervolumes)
            cov_dict[alg].extend(coverages)
            hv_dict['Step'].extend(global_steps)
            cov_dict['Step'].extend(global_steps)
            hv_dict['Seed'].extend([seed] * last_iter)
            cov_dict['Seed'].extend([seed] * last_iter)

        hv_df = pd.DataFrame.from_dict(hv_dict)
        cov_df = pd.DataFrame.from_dict(cov_dict)
        print(f"Saving data for {env_id} - {alg}")
        hv_df.to_csv(f'data/{alg}_{env_id}_hv.csv', index=False)
        cov_df.to_csv(f'data/{alg}_{env_id}_cov.csv', index=False)