In [1]:
import pandas as pd
import torch
import os
from matplotlib import pyplot as plt
import numpy as np
import tensorboard as tb
from tensorboard.backend.event_processing import event_accumulator

import seaborn as sns
sns.set_theme()
%matplotlib inline

In [2]:
PREV_SUCC_RATE = 1.0

In [3]:
import re

In [4]:
ICM_RUN_DIR = "eval/eval/rs_s/none/lidar_all/icm_ppo/1/events.out.tfevents.1696383185.HAL.1649658.0"
PPO_RUN_DIR = "eval/eval2/rs_s/none/lidar_all/ppo/12/events.out.tfevents.1696721329.Yichens-MacBook-Pro-2.local.57943.0"

In [5]:
results_folder = "eval/eval2"

In [6]:
NOVELTIES = ["axe", "chest", "dist_trade", "fence", "fire"]
ALGOS = ["icm_ppo", "ppo"]

In [7]:
# # ea = event_accumulator.EventAccumulator('eval/eval/rs_s/none/lidar_all/icm_ppo/1/events.out.tfevents.1696383185.HAL.1649658.0',
# ea = event_accumulator.EventAccumulator('eval/eval/sa/axe/lidar_all/icm_ppo/3/events.out.tfevents.1696472442.HAL.187936.0',
#   size_guidance={ # see below regarding this argument
#       event_accumulator.COMPRESSED_HISTOGRAMS: 500,
#       event_accumulator.IMAGES: 4,
#       event_accumulator.AUDIO: 4,
#       event_accumulator.SCALARS: 0,
#       event_accumulator.HISTOGRAMS: 1,
#   })
# ea.Reload()

In [8]:
import queue
pre_novelty_sr = 1.0

In [9]:
def get_baseline_metrics(ea):
    eta = 5
    hist = []
    hist2 = []

    for item in ea.Scalars('test/length')[-eta:]:
        hist.append(item.value)
    # for item in ea.Scalars('test/percent_dones')[-eta]:
    #     hist2.append(item.value)
    return ea.Scalars('test/percent_dones')[-1].value, np.mean(hist)

In [10]:
def load_ea(path):
    ea = event_accumulator.EventAccumulator(path,
    size_guidance={ # see below regarding this argument
        event_accumulator.COMPRESSED_HISTOGRAMS: 500,
        event_accumulator.IMAGES: 4,
        event_accumulator.AUDIO: 4,
        event_accumulator.SCALARS: 0,
        event_accumulator.HISTOGRAMS: 1,
    })
    ea.Reload()
    return ea

In [11]:
PLANNING_LEN = 17
icm_rl_ea = load_ea(ICM_RUN_DIR)
ppo_rl_ea = load_ea(PPO_RUN_DIR)
icm_succ_rate, icm_last_5_len = get_baseline_metrics(icm_rl_ea)
ppo_succ_rate, ppo_last_5_len = get_baseline_metrics(ppo_rl_ea)

In [12]:
icm_succ_rate, icm_last_5_len, ppo_succ_rate, ppo_last_5_len

(1.0, 35.0060001373291, 1.0, 64.5160011291504)

In [13]:
def get_metrics(ea, env_type, algo):
    eta = 5
    min_succ_rate = 0.9
    min_reward = 400
    upsilon = 5 # improvement_compare_window
    upe = upsilon + eta

    pd_hist = []
    rew_hist = []
    len_hist = []

    i_novelty = pre_novelty_sr - ea.Scalars('test/percent_dones')[0].value

    history_size = len(ea.Scalars('test/percent_dones'))

    for idx, (pd, rew, length) in enumerate(zip(ea.Scalars('test/percent_dones'), ea.Scalars('test/reward'), ea.Scalars('test/length'))):

        if pd is None or rew is None or length is None: 
            raise KeyError
        
        # no improvement window, criteria 3
        pd_hist.append(pd.value)
        rew_hist.append(rew.value)
        len_hist.append(length.value)
        
        if len(pd_hist) > eta and \
                np.average(pd_hist[-eta:]) > min_succ_rate and \
                np.min(rew_hist[-eta:]) > min_reward and \
                (np.max(rew_hist[-upe:]) >= np.max(rew_hist[-eta:]) and np.max(pd_hist[-upe:] >= np.max(pd_hist[-eta:])) or idx + 1 == history_size):
            time_to_adapt = pd.step
            post_novelty_performance = pd_hist[-1]
            avg_last_length = np.mean(len_hist[-eta:])
            if env_type == "sa":
                if algo == "icm_ppo": avg_last_length -= icm_last_5_len
                elif algo == "ppo": avg_last_length -= ppo_last_5_len
            return i_novelty, time_to_adapt, post_novelty_performance, avg_last_length
            # return {
            #     "m2": i_novelty, 
            #     "m3": time_to_adapt, 
            #     "m4": post_novelty_performance, 
            #     "m5": avg_last_length
            # }
    return None

In [14]:
results = {}
for env in ["pf", "sa"]:
    for novelty in NOVELTIES:
        obs_type = "lidar_all"
        for algo in ALGOS:
            path = os.path.join(results_folder, env, novelty, obs_type, algo)
            try:
                seeds = os.listdir(path)
            except (FileNotFoundError, NotADirectoryError):
                continue
            metric_hist = []
            for seed in seeds:
                run_path = os.path.join(path, seed)
                if not os.path.isdir(run_path):
                    continue
                files = [filename for filename in os.listdir(run_path) if "events.out.tfevents" in filename]
                for file in files:
                    result = None
                    try:
                        ea = load_ea(os.path.join(run_path, files[0]))
                        result = get_metrics(ea, env_type=env, algo=algo)
                        if result is None:
                            print(run_path + os.sep + files[0], "did not finish or did not converge")
                    except:
                        pass
                    if result is not None:
                        metric_hist.append(result)
                        # break # only count one run under each seed
            mean = [np.mean(metric) for metric in zip(*metric_hist)]
            std = [np.std(metric) for metric in zip(*metric_hist)]
            if len(mean) > 0:
                results["/".join([novelty, env, obs_type, algo])] = {"mean": mean, "std": std, "count": len(metric_hist)}
            else:
                results["/".join([novelty, env, obs_type, algo])] = {"mean": [None] * 4, "std": [None] * 4, "count": 0}

                

eval/eval2/pf/fence/lidar_all/icm_ppo/8/events.out.tfevents.1696832978.Yichens-MacBook-Pro-2.local.20210.0 did not finish or did not converge
eval/eval2/pf/fence/lidar_all/ppo/10/events.out.tfevents.1696828237.mulipstargazer.2127812.0 did not finish or did not converge
eval/eval2/pf/fence/lidar_all/ppo/3/events.out.tfevents.1696848912.HAL.3662321.0 did not finish or did not converge
eval/eval2/sa/axe/lidar_all/icm_ppo/9/events.out.tfevents.1696824430.HAL.3782717.0 did not finish or did not converge
eval/eval2/sa/axe/lidar_all/icm_ppo/2/events.out.tfevents.1696757727.HAL.134152.0 did not finish or did not converge
eval/eval2/sa/fence/lidar_all/icm_ppo/1/events.out.tfevents.1696743726.HAL.2175294.0 did not finish or did not converge
eval/eval2/sa/fence/lidar_all/icm_ppo/8/events.out.tfevents.1696768430.Yichens-MacBook-Pro-2.local.15675.0 did not finish or did not converge
eval/eval2/sa/fence/lidar_all/icm_ppo/3/events.out.tfevents.1696787093.Yichens-MacBook-Pro-2.local.87115.0 did not fi

In [15]:
result_formatted = {}
for key, val in results.items():
    res = {}
    for i, (mean_m, std_m) in enumerate(zip(val['mean'], val['std'])):
        res[f"m{i + 2}_mean"] = mean_m
        res[f"m{i + 2}_std"] = std_m
    res["count"] = val["count"]
    result_formatted[key] = res

In [16]:
df = pd.DataFrame.from_dict(result_formatted, orient="index").sort_index()
df

Unnamed: 0,m2_mean,m2_std,m3_mean,m3_std,m4_mean,m4_std,m5_mean,m5_std,count
axe/pf/lidar_all/icm_ppo,0.757,0.2046,70560.0,17179.708961,0.999,0.003,129.635,8.798164,10
axe/pf/lidar_all/ppo,0.692,0.206727,68160.0,17934.280025,0.998,0.004,116.920799,9.884981,10
axe/sa/lidar_all/icm_ppo,1.0,0.0,205714.285714,99018.662434,0.977143,0.010302,61.972572,8.674011,7
axe/sa/lidar_all/ppo,1.0,0.0,114240.0,19650.709911,0.973,0.017916,63.121599,8.169276,10
chest/sa/lidar_all/icm_ppo,0.002,0.004,24000.0,0.0,1.0,0.0,-3.0134,0.783633,10
chest/sa/lidar_all/ppo,0.007,0.009,24000.0,0.0,0.982,0.01077,-3.540201,3.12832,10
dist_trade/pf/lidar_all/icm_ppo,0.453,0.069,102720.0,20723.551819,0.953,0.016763,126.923799,9.973476,10
dist_trade/pf/lidar_all/ppo,0.505,0.070178,96480.0,21190.790452,0.963,0.019519,122.785599,8.925823,10
dist_trade/sa/lidar_all/icm_ppo,0.964,0.108,199200.0,90470.76876,0.987,0.011874,92.328999,15.172348,10
dist_trade/sa/lidar_all/ppo,0.928889,0.133037,371200.0,241456.91127,0.96,0.024944,90.509555,7.307709,9


In [17]:
df['count']

axe/pf/lidar_all/icm_ppo           10
axe/pf/lidar_all/ppo               10
axe/sa/lidar_all/icm_ppo            7
axe/sa/lidar_all/ppo               10
chest/sa/lidar_all/icm_ppo         10
chest/sa/lidar_all/ppo             10
dist_trade/pf/lidar_all/icm_ppo    10
dist_trade/pf/lidar_all/ppo        10
dist_trade/sa/lidar_all/icm_ppo    10
dist_trade/sa/lidar_all/ppo         9
fence/pf/lidar_all/icm_ppo          5
fence/pf/lidar_all/ppo              2
fence/sa/lidar_all/icm_ppo         10
fence/sa/lidar_all/ppo             10
fire/pf/lidar_all/icm_ppo          10
fire/pf/lidar_all/ppo              10
fire/sa/lidar_all/icm_ppo           4
fire/sa/lidar_all/ppo              10
Name: count, dtype: int64

In [21]:
df_reformat = df.dropna()
# df_reformat = df_reformat[df_reformat['count'] >= 10]
df_reformat.loc[:, 'm2_mean'] = df_reformat['m2_mean'].round(2).astype(str)
df_reformat.loc[:, 'm2_std'] = df_reformat['m2_std'].round(3).astype(str)
df_reformat.loc[:, 'm3_mean'] = df_reformat['m3_mean'].round(0).astype(int).astype(str)
df_reformat.loc[:, 'm3_std'] = df_reformat['m3_std'].round(0).astype(int).astype(str)
df_reformat.loc[:, 'm4_mean'] = df_reformat['m4_mean'].round(2).astype(str)
df_reformat.loc[:, 'm4_std'] = df_reformat['m4_std'].round(3).astype(str)
df_reformat.loc[:, 'm5_mean'] = df_reformat['m5_mean'].round(1).astype(str)
df_reformat.loc[:, 'm5_std'] = df_reformat['m5_std'].round(2).astype(str)
del df_reformat['count']
df_reformat

Unnamed: 0,m2_mean,m2_std,m3_mean,m3_std,m4_mean,m4_std,m5_mean,m5_std
axe/pf/lidar_all/icm_ppo,0.76,0.205,70560,17180,1.0,0.003,129.6,8.8
axe/pf/lidar_all/ppo,0.69,0.207,68160,17934,1.0,0.004,116.9,9.88
axe/sa/lidar_all/icm_ppo,1.0,0.0,205714,99019,0.98,0.01,62.0,8.67
axe/sa/lidar_all/ppo,1.0,0.0,114240,19651,0.97,0.018,63.1,8.17
chest/sa/lidar_all/icm_ppo,0.0,0.004,24000,0,1.0,0.0,-3.0,0.78
chest/sa/lidar_all/ppo,0.01,0.009,24000,0,0.98,0.011,-3.5,3.13
dist_trade/pf/lidar_all/icm_ppo,0.45,0.069,102720,20724,0.95,0.017,126.9,9.97
dist_trade/pf/lidar_all/ppo,0.5,0.07,96480,21191,0.96,0.02,122.8,8.93
dist_trade/sa/lidar_all/icm_ppo,0.96,0.108,199200,90471,0.99,0.012,92.3,15.17
dist_trade/sa/lidar_all/ppo,0.93,0.133,371200,241457,0.96,0.025,90.5,7.31


In [22]:
df_reformat.to_csv("results.csv")

In [23]:
df_reformat.to_latex("results.tex")