In [None]:
import wandb
import pandas as pd
import numpy as np
from collections import defaultdict

In [None]:
api = wandb.Api(timeout=120)
runs = api.runs("wilrop/IPRO_runs")
env_id = "mo-reacher-v4"

In [None]:
parent_runs = {
        'dqn': {
            'deep-sea-treasure-concave-v0': ['wilrop/IPRO_opt/2tn5owa1',
                                             'wilrop/IPRO_opt/zetw2qex'],
            'minecart-v0': [],
            'mo-reacher-v4': ['wilrop/IPRO_opt/vbxkaso4']
        },
        'a2c': {
            'deep-sea-treasure-concave-v0': ['wilrop/IPRO_opt/g347p7nz',
                                             'wilrop/IPRO_opt/10sl3sct',
                                             'wilrop/IPRO_opt/239dy8eu',
                                             'wilrop/IPRO_opt/37hzrb0t',
                                             'wilrop/IPRO_opt/2ilk1u5q',
                                             'wilrop/IPRO_opt/2s5q3nff'],
            'minecart-v0': [],
            'mo-reacher-v4': ['wilrop/IPRO_opt/2ga1y2rc',
                              'wilrop/IPRO_opt/2i8jgu31',
                              'wilrop/IPRO_opt/34omm1q6',
                              'wilrop/IPRO_opt/y5gcmdof',
                              'wilrop/IPRO_opt/18kxy4la',
                              'wilrop/IPRO_opt/2niygvug',
                              'wilrop/IPRO_opt/1hf09bbk',
                              'wilrop/IPRO_opt/3abwo2bu',
                              'wilrop/IPRO_opt/2cfpvgbm']
        },
        'ppo': {
            'deep-sea-treasure-concave-v0': ['wilrop/IPRO_opt/3gyzdum4',
                                             'wilrop/IPRO_opt/g1dxgkl4'],
            'minecart-v0': ['wilrop/IPRO_opt/14kyvvys',
                            'wilrop/IPRO_opt/1ucq5ti1',
                            'wilrop/IPRO_opt/31wg6d5l'],
            'mo-reacher-v4': ['wilrop/IPRO_opt/3jbowwrd',
                              'wilrop/IPRO_opt/3pzv9rk4',
                              'wilrop/IPRO_opt/14jtu9k9',
                              'wilrop/IPRO_opt/1xox36av',
                              'wilrop/IPRO_opt/2je0ir5m',
                              'wilrop/IPRO_opt/19jh6tg4']
        }
    }

run_hists = {'dqn': defaultdict(list),
             'a2c': defaultdict(list),
             'ppo': defaultdict(list), }

for run in runs:
    if run.config['env_id'] == env_id:
        name = run.name
        components = name.split('__')
        parent_run = run.config['parent_run_id']
        alg = None
        for key, values in parent_runs.items():
            if parent_run in values[env_id]:
                alg = key
                break
        if alg is not None:
            run_hists[alg][parent_run].append((run.history(keys=['outer/hypervolume', 'outer/coverage']), run.config['online_steps']))
            print(f'Added run to {alg} - {parent_run}')

In [None]:
def extract_iter_hist(hist):
    hypervolumes = hist['outer/hypervolume'].values.tolist()
    coverages = list(np.clip(hist['outer/coverage'].values.tolist(), 0, 1))
    return hypervolumes, coverages

In [None]:
run_data = {'dqn': defaultdict(list),
            'a2c': defaultdict(list),
            'ppo': defaultdict(list), }

for alg in run_hists:
    print(f"Extracting data for {alg}")
    for arg in run_hists[alg]:
        print(f"Extracting data for {alg} - {arg}")
        for seed, (hist, step_size) in enumerate(run_hists[alg][arg]):
            print(f'Run {seed}')
            hypervolumes, coverages = extract_iter_hist(hist)
            run_data[alg][arg].append((hypervolumes, coverages, step_size))

In [None]:
best_data = {'ppo': [],
             'dqn': [],
             'a2c': []}

# Extract the best argument i.e. the argument with the largest mean final hypervolume
max_iter = 0

for alg in run_data:
    best_arg = None
    best_mean = -1
    best_std = -1
    for arg in run_data[alg]:
        hypervolumes = [tpl[0] for tpl in run_data[alg][arg]]
        final_hvs = [hv[-1] for hv in hypervolumes]
        arg_mean = np.mean(final_hvs)
        arg_std = np.std(final_hvs)
        print(f"Mean final hypervolume for {alg} - {arg}: {arg_mean}")

        if arg_mean > best_mean and len(run_data[alg][arg]) == 5:
            best_mean = arg_mean
            best_arg = arg
            best_std = arg_std
            max_iter = max(max_iter, max([len(hv) for hv in hypervolumes]))
    print(f"Best argument for {alg} is {best_arg} with final hypervolume {best_mean} +/- {best_std}")
    print('---')
    best_data[alg] = run_data[alg][best_arg]

In [None]:
def fill_iterations(hypervolumes, coverages, max_iter):
    while len(hypervolumes) < max_iter:
        hypervolumes.append(hypervolumes[-1])
        coverages.append(coverages[-1])

In [None]:
# Make dictionaries with the data for all seeds.
for alg in best_data:
    hv_dict = {alg: [], 'Step': [], 'Seed': []}
    cov_dict = {alg: [], 'Step': [], 'Seed': []}

    for seed, (hypervolumes, coverages, step_size) in enumerate(best_data[alg]):
        fill_iterations(hypervolumes, coverages, max_iter)
        global_steps = np.arange(len(hypervolumes)) * step_size
        global_steps = global_steps.tolist()
        hv_dict[alg].extend(hypervolumes)
        cov_dict[alg].extend(coverages)
        hv_dict['Step'].extend(global_steps)
        cov_dict['Step'].extend(global_steps)
        hv_dict['Seed'].extend([seed] * max_iter)
        cov_dict['Seed'].extend([seed] * max_iter)

    hv_df = pd.DataFrame.from_dict(hv_dict)
    cov_df = pd.DataFrame.from_dict(cov_dict)
    print(f"Saving data for {alg}")
    hv_df.to_csv(f'../utils/results/{alg}_{env_id}_hv.csv', index=False)
    cov_df.to_csv(f'../utils/results/{alg}_{env_id}_cov.csv', index=False)