In [None]:
import wandb
import pandas as pd
import numpy as np

In [None]:
api = wandb.Api(timeout=120)
runs = api.runs("IPRO_experiments")
env_id = "deep-sea-treasure-concave-v0"

In [None]:
run_hists = {'ppo': {'arg1': [], 'arg2': [], 'arg3': []},
             'dqn': {'arg1': [], 'arg2': [], 'arg3': []},
             'a2c': {'arg1': [], 'arg2': [], 'arg3': []}}

for run in runs:
    if run.config['env_id'] == env_id:
        name = run.name
        splitted = name.split('_')
        alg = splitted[0]
        arg = splitted[-1]
        run_hists[alg][arg].append(run.history(keys=['outer/hypervolume', 'outer/coverage']))
        print(f'Added run {name} to {alg} - {arg}')

In [None]:
def extract_iter_hist(hist):
    hypervolumes = list(hist['outer/hypervolume'].values)
    coverages = list(np.clip(list(hist['outer/coverage'].values), 0, 1))
    return hypervolumes, coverages

In [None]:
run_data = {'ppo': {'arg1': [], 'arg2': [], 'arg3': []},
            'dqn': {'arg1': [], 'arg2': [], 'arg3': []},
            'a2c': {'arg1': [], 'arg2': [], 'arg3': []}}

for alg in run_hists:
    print(f"Extracting data for {alg}")
    for arg in run_hists[alg]:
        print(f"Extracting data for {alg} - {arg}")
        for seed, hist in enumerate(run_hists[alg][arg]):
            print(f'Run {seed}')
            hypervolumes, coverages = extract_iter_hist(hist)
            run_data[alg][arg].append((hypervolumes, coverages))

In [None]:
best_data = {'ppo': [],
             'dqn': [],
             'a2c': []}

# Extract the best argument i.e. the argument with the largest mean final hypervolume
max_iter = 0

for alg in run_data:
    best_arg = None
    best_mean = -1
    for arg in run_data[alg]:
        hypervolumes = [tpl[0] for tpl in run_data[alg][arg]]
        arg_mean = np.mean([hv[-1] for hv in hypervolumes])
        print(f"Mean final hypervolume for {alg} - {arg}: {arg_mean}")

        if arg_mean > best_mean and len(run_data[alg][arg]) == 5:
            best_mean = arg_mean
            best_arg = arg
            max_iter = max(max_iter, max([len(hv) for hv in hypervolumes]))
    print(f"Best argument for {alg} is {best_arg} with mean final hypervolume {best_mean}")
    print('---')
    best_data[alg] = run_data[alg][best_arg]

In [None]:
def fill_iterations(hypervolumes, coverages, max_iter):
    while len(hypervolumes) < max_iter:
        hypervolumes.append(hypervolumes[-1])
        coverages.append(coverages[-1])

In [None]:
print(f"Max iterations: {max_iter}")
for alg in best_data:
    for seed, (hypervolumes, coverages) in enumerate(best_data[alg]):
        fill_iterations(hypervolumes, coverages, max_iter)

In [None]:
# Make dictionaries with the data for all seeds.
for alg in best_data:
    hv_dict = {alg: [], 'Iteration': [], 'Seed': []}
    cov_dict = {alg: [], 'Iteration': [], 'Seed': []}

    for seed, (hypervolumes, coverages) in enumerate(best_data[alg]):
        hv_dict[alg].extend(hypervolumes)
        cov_dict[alg].extend(coverages)
        hv_dict['Iteration'].extend(range(max_iter))
        cov_dict['Iteration'].extend(range(max_iter))
        hv_dict['Seed'].extend([seed] * max_iter)
        cov_dict['Seed'].extend([seed] * max_iter)

    hv_df = pd.DataFrame.from_dict(hv_dict)
    cov_df = pd.DataFrame.from_dict(cov_dict)
    hv_df.to_csv(f'results/{alg}_{env_id}_hv.csv', index=False)
    cov_df.to_csv(f'results/{alg}_{env_id}_cov.csv', index=False)