In [2]:
import pandas as pd
import torch
import os
from matplotlib import pyplot as plt
import numpy as np
import tensorboard as tb
from tensorboard.backend.event_processing import event_accumulator

import seaborn as sns
sns.set_theme()
%matplotlib inline

In [3]:
PREV_SUCC_RATE = 1.0

In [4]:
import re

In [5]:
ICM_RUN_DIR = "eval/eval/rs_s/none/lidar_all/icm_ppo/1/events.out.tfevents.1696383185.HAL.1649658.0"
PPO_RUN_DIR = "eval/eval2/rs_s/none/lidar_all/ppo/12/events.out.tfevents.1696721329.Yichens-MacBook-Pro-2.local.57943.0"

In [6]:
results_folder = "eval/eval2"

In [7]:
NOVELTIES = ["axe", "chest", "dist_trade", "fence", "fire"]
ALGOS = ["icm_ppo", "ppo"]

In [8]:
# # ea = event_accumulator.EventAccumulator('eval/eval/rs_s/none/lidar_all/icm_ppo/1/events.out.tfevents.1696383185.HAL.1649658.0',
# ea = event_accumulator.EventAccumulator('eval/eval/sa/axe/lidar_all/icm_ppo/3/events.out.tfevents.1696472442.HAL.187936.0',
#   size_guidance={ # see below regarding this argument
#       event_accumulator.COMPRESSED_HISTOGRAMS: 500,
#       event_accumulator.IMAGES: 4,
#       event_accumulator.AUDIO: 4,
#       event_accumulator.SCALARS: 0,
#       event_accumulator.HISTOGRAMS: 1,
#   })
# ea.Reload()

In [9]:
import queue
pre_novelty_sr = 1.0

In [12]:
def get_baseline_metrics(ea):
    eta = 5
    hist = []
    hist2 = []

    for item in ea.Scalars('test/length')[-eta:]:
        hist.append(item.value)
    # for item in ea.Scalars('test/percent_dones')[-eta]:
    #     hist2.append(item.value)
    return ea.Scalars('test/percent_dones')[-1].value, np.mean(hist)

In [13]:
def load_ea(path):
    ea = event_accumulator.EventAccumulator(path,
    size_guidance={ # see below regarding this argument
        event_accumulator.COMPRESSED_HISTOGRAMS: 500,
        event_accumulator.IMAGES: 4,
        event_accumulator.AUDIO: 4,
        event_accumulator.SCALARS: 0,
        event_accumulator.HISTOGRAMS: 1,
    })
    ea.Reload()
    return ea

In [14]:
PLANNING_LEN = 17
icm_rl_ea = load_ea(ICM_RUN_DIR)
ppo_rl_ea = load_ea(PPO_RUN_DIR)
icm_succ_rate, icm_last_5_len = get_baseline_metrics(icm_rl_ea)
ppo_succ_rate, ppo_last_5_len = get_baseline_metrics(ppo_rl_ea)

In [16]:
icm_succ_rate, icm_last_5_len, ppo_succ_rate, ppo_last_5_len

(1.0, 35.0060001373291, 1.0, 64.5160011291504)

In [68]:
def get_metrics(ea, env_type, algo):
    eta = 5
    min_succ_rate = 0.9
    min_reward = 900

    pd_hist = []
    rew_hist = []
    len_hist = []

    i_novelty = pre_novelty_sr - ea.Scalars('test/percent_dones')[0].value

    for pd, rew, length in zip(ea.Scalars('test/percent_dones'), ea.Scalars('test/reward'), ea.Scalars('test/length')):
        if pd is None or rew is None or length is None: 
            raise KeyError
        pd_hist.append(pd.value)
        rew_hist.append(rew.value)
        len_hist.append(length.value)
        
        if len(pd_hist) > eta and np.average(pd_hist[-eta:]) > min_succ_rate and np.min(rew_hist[-eta:]) > min_reward:
            time_to_adapt = pd.step
            post_novelty_performance = pd_hist[-1]
            avg_last_length = np.mean(len_hist[-5:])
            if env_type == "sa":
                if algo == "icm_ppo": avg_last_length -= icm_last_5_len
                elif algo == "ppo": avg_last_length -= ppo_last_5_len
            return i_novelty, time_to_adapt, post_novelty_performance, avg_last_length
            # return {
            #     "m2": i_novelty, 
            #     "m3": time_to_adapt, 
            #     "m4": post_novelty_performance, 
            #     "m5": avg_last_length
            # }
    return None

In [69]:
results = {}
for env in ["pf", "sa"]:
    for novelty in NOVELTIES:
        obs_type = "lidar_all"
        for algo in ALGOS:
            path = os.path.join(results_folder, env, novelty, obs_type, algo)
            try:
                seeds = os.listdir(path)
            except (FileNotFoundError, NotADirectoryError):
                continue
            metric_hist = []
            for seed in seeds:
                run_path = os.path.join(path, seed)
                if not os.path.isdir(run_path):
                    continue
                files = [filename for filename in os.listdir(run_path) if "events.out.tfevents" in filename]
                for file in files:
                    result = None
                    try:
                        ea = load_ea(os.path.join(run_path, files[0]))
                        result = get_metrics(ea, env_type=env, algo=algo)
                        if result is None:
                            print(run_path + os.sep + files[0], "did not finish or did not converge")
                    except:
                        pass
                    if result is not None:
                        metric_hist.append(result)
            mean = [np.mean(metric) for metric in zip(*metric_hist)]
            std = [np.std(metric) for metric in zip(*metric_hist)]
            if len(mean) > 0:
                results["_".join([novelty, env, obs_type, algo])] = {"mean": mean, "std": std, "count": len(metric_hist)}
            else:
                results["_".join([novelty, env, obs_type, algo])] = {"mean": [None] * 4, "std": [None] * 4, "count": 0}

                

eval/eval2/sa/axe/lidar_all/icm_ppo/1/events.out.tfevents.1696743675.HAL.2169501.0 did not finish or did not converge
eval/eval2/sa/fence/lidar_all/icm_ppo/1/events.out.tfevents.1696743726.HAL.2175294.0 did not finish or did not converge
eval/eval2/sa/fence/lidar_all/ppo/1/events.out.tfevents.1696734638.mulipstargazer.143489.0 did not finish or did not converge
eval/eval2/sa/fire/lidar_all/icm_ppo/1/events.out.tfevents.1696743774.HAL.2186702.0 did not finish or did not converge
eval/eval2/sa/fire/lidar_all/icm_ppo/2/events.out.tfevents.1696731100.HAL.1323227.0 did not finish or did not converge
eval/eval2/sa/fire/lidar_all/ppo/1/events.out.tfevents.1696734953.mulipstargazer.240773.0 did not finish or did not converge


In [70]:
result_formatted = {}
for key, val in results.items():
    res = {}
    for i, (mean_m, std_m) in enumerate(zip(val['mean'], val['std'])):
        res[f"m{i + 2}_mean"] = mean_m
        res[f"m{i + 2}_std"] = std_m
    res["count"] = val["count"]
    result_formatted[key] = res

In [71]:
df = pd.DataFrame.from_dict(result_formatted, orient="index").sort_index()
df

Unnamed: 0,m2_mean,m2_std,m3_mean,m3_std,m4_mean,m4_std,m5_mean,m5_std,count
axe_pf_lidar_all_icm_ppo,0.757,0.2046,86560.0,17581.08074,1.0,0.0,29.3868,4.227893,10
axe_pf_lidar_all_ppo,0.692,0.206727,82560.0,17751.349244,1.0,0.0,29.8456,5.623814,10
axe_sa_lidar_all_icm_ppo,,,,,,,,,0
axe_sa_lidar_all_ppo,1.0,0.0,230400.0,0.0,0.98,0.0,14.788,0.0,1
chest_sa_lidar_all_icm_ppo,0.002,0.004,24000.0,0.0,1.0,0.0,-3.0134,0.783633,10
chest_sa_lidar_all_ppo,0.01,0.0,57600.0,0.0,0.99,0.0,-6.290001,0.0,1
dist_trade_pf_lidar_all_icm_ppo,0.453,0.069,136800.0,20505.608989,0.994,0.008,45.1664,8.727864,10
dist_trade_pf_lidar_all_ppo,0.505,0.070178,122880.0,20723.551819,0.995,0.005,50.4936,6.211132,10
dist_trade_sa_lidar_all_icm_ppo,1.0,0.0,144000.0,0.0,0.99,0.0,31.811999,0.0,2
dist_trade_sa_lidar_all_ppo,,,,,,,,,0


In [54]:
df = df.dropna()
df.loc[:, 'm2_mean'] = df['m2_mean'].round(2).astype(str)
df.loc[:, 'm2_std'] = df['m2_std'].round(3).astype(str)
df.loc[:, 'm3_mean'] = df['m3_mean'].round(0).astype(int)
df.loc[:, 'm3_std'] = df['m3_std'].round(0).astype(int)
df.loc[:, 'm4_mean'] = df['m4_mean'].round(2).astype(str)
df.loc[:, 'm4_std'] = df['m4_std'].round(3).astype(str)
df.loc[:, 'm5_mean'] = df['m5_mean'].round(1).astype(str)
df.loc[:, 'm5_std'] = df['m5_std'].round(2).astype(str)
del df['count']
df

Unnamed: 0,m2_mean,m2_std,m3_mean,m3_std,m4_mean,m4_std,m5_mean,m5_std
axe_pf_lidar_all_icm_ppo,0.73,0.198,83378.0,15562.0,1.0,0.0,28.9,4.19
axe_pf_lidar_all_ppo,0.69,0.207,82560.0,17751.0,1.0,0.0,29.8,5.62
axe_sa_lidar_all_ppo,1.0,0.0,230400.0,0.0,0.98,0.0,14.8,0.0
chest_sa_lidar_all_icm_ppo,0.0,0.004,24000.0,0.0,1.0,0.0,-3.0,0.23
chest_sa_lidar_all_ppo,0.01,0.0,57600.0,0.0,0.99,0.0,-6.3,0.0
dist_trade_pf_lidar_all_icm_ppo,0.45,0.069,136800.0,20506.0,0.99,0.008,45.2,8.73
dist_trade_pf_lidar_all_ppo,0.5,0.07,122880.0,20724.0,1.0,0.005,50.5,6.21
fence_pf_lidar_all_icm_ppo,0.43,0.0,100800.0,0.0,1.0,0.0,36.2,0.0
fence_pf_lidar_all_ppo,0.34,0.0,86400.0,0.0,1.0,0.0,44.3,0.0


In [55]:
df.to_csv("results.csv")

In [56]:
df.to_latex("results.tex")