In [1]:
import wandb
import pandas as pd
import numpy as np

In [2]:
api = wandb.Api(timeout=120)
runs = api.runs("IPRO_experiments_v2")
env_id = "mo-reacher-v4"

In [3]:
run_hists = {'ppo': {'arg1': [], 'arg2': [], 'arg3': []},
             'dqn': {'arg1': [], 'arg2': [], 'arg3': []},
             'a2c': {'arg1': [], 'arg2': [], 'arg3': []}}

for run in runs:
    if run.config['env_id'] == env_id:
        name = run.name
        splitted = name.split('_')
        alg = splitted[0]
        arg = splitted[-1]
        run_hists[alg][arg].append(run.history(keys=['outer/hypervolume', 'outer/coverage']))
        print(f'Added run {name} to {alg} - {arg}')

Added run a2c_dst__2__arg3 to a2c - arg3
Added run dqn_dst__4__arg2 to dqn - arg2
Added run a2c_dst__1__arg1 to a2c - arg1
Added run ppo_dst__4__arg1 to ppo - arg1
Added run a2c_dst__1__arg2 to a2c - arg2
Added run a2c_dst__2__arg1 to a2c - arg1
Added run ppo_dst__1__arg3 to ppo - arg3
Added run dqn_dst__2__arg3 to dqn - arg3
Added run a2c_dst__4__arg2 to a2c - arg2
Added run dqn_dst__0__arg2 to dqn - arg2
Added run a2c_dst__3__arg2 to a2c - arg2
Added run dqn_dst__0__arg3 to dqn - arg3
Added run ppo_dst__3__arg3 to ppo - arg3
Added run ppo_dst__4__arg3 to ppo - arg3
Added run a2c_dst__0__arg3 to a2c - arg3
Added run a2c_dst__3__arg1 to a2c - arg1
Added run ppo_dst__2__arg2 to ppo - arg2
Added run ppo_dst__2__arg1 to ppo - arg1
Added run ppo_dst__0__arg2 to ppo - arg2
Added run ppo_dst__4__arg2 to ppo - arg2
Added run ppo_dst__3__arg2 to ppo - arg2
Added run ppo_dst__0__arg3 to ppo - arg3
Added run dqn_dst__1__arg2 to dqn - arg2
Added run ppo_dst__1__arg2 to ppo - arg2
Added run a2c_ds

In [4]:
def extract_iter_full_hist(hist):
    hypervolumes = []
    coverages = []
    for row in hist:
        row_hv = row[0]
        if row_hv is not None:
            hypervolumes.append(row_hv)
            coverages.append(row[1])
    return hypervolumes, coverages

def extract_iter_hist(hist):
    hypervolumes = list(hist['outer/hypervolume'].values)
    coverages = list(np.clip(list(hist['outer/coverage'].values), 0, 1))
    return hypervolumes, coverages

In [5]:
run_data = {'ppo': {'arg1': [], 'arg2': [], 'arg3': []},
             'dqn': {'arg1': [], 'arg2': [], 'arg3': []},
             'a2c': {'arg1': [], 'arg2': [], 'arg3': []}}

for alg in run_hists:
    print(f"Extracting data for {alg}")
    for arg in run_hists[alg]:
        print(f"Extracting data for {alg} - {arg}")
        for seed, hist in enumerate(run_hists[alg][arg]):
            print(f'Run {seed}')
            hypervolumes, coverages = extract_iter_hist(hist)
            run_data[alg][arg].append((hypervolumes, coverages))

Extracting data for ppo
Extracting data for dqn
Extracting data for a2c


In [6]:
best_data = {'ppo': [],
             'dqn': [],
             'a2c': []}

# Extract the best argument i.e. the argument with the largest mean final hypervolume
max_iter = 0

for alg in run_data:
    best_arg = None
    best_mean = -1
    for arg in run_data[alg]:
        hypervolumes = [tpl[0] for tpl in run_data[alg][arg]]
        arg_mean = np.mean([hv[-1] for hv in hypervolumes])
        if arg_mean > best_mean and len(run_data[alg][arg]) > 1:
            best_mean = arg_mean
            best_arg = arg
            max_iter = max(max_iter, max([len(hv) for hv in hypervolumes]))
    print(f"Best argument for {alg} is {best_arg} with mean final hypervolume {best_mean}")
    best_data[alg] = run_data[alg][best_arg]

Best argument for ppo is arg1 with mean final hypervolume 4233.4
Best argument for dqn is arg3 with mean final hypervolume 4243.8
Best argument for a2c is arg1 with mean final hypervolume 4249.6


In [7]:
def fill_iterations(hypervolumes, coverages, max_iter):
    while len(hypervolumes) < max_iter:
        hypervolumes.append(hypervolumes[-1])
        coverages.append(coverages[-1])

In [8]:
print(f"Max iterations: {max_iter}")
for alg in best_data:
    for seed, (hypervolumes, coverages) in enumerate(best_data[alg]):
        fill_iterations(hypervolumes, coverages, max_iter)

Max iterations: 26


In [9]:
# Make dictionaries with the data for all seeds.
for alg in best_data:
    hv_dict = {alg: [], 'Iteration': [], 'Seed': []}
    cov_dict = {alg: [], 'Iteration': [], 'Seed': []}

    for seed, (hypervolumes, coverages) in enumerate(best_data[alg]):
        hv_dict[alg].extend(hypervolumes)
        cov_dict[alg].extend(coverages)
        hv_dict['Iteration'].extend(range(max_iter))
        cov_dict['Iteration'].extend(range(max_iter))
        hv_dict['Seed'].extend([seed] * max_iter)
        cov_dict['Seed'].extend([seed] * max_iter)

    hv_df = pd.DataFrame.from_dict(hv_dict)
    cov_df = pd.DataFrame.from_dict(cov_dict)
    hv_df.to_csv(f'results/{alg}_{env_id}_hv.csv', index=False)
    cov_df.to_csv(f'results/{alg}_{env_id}_cov.csv', index=False)