In [220]:
import pandas as pd
import torch
import os
from matplotlib import pyplot as plt
import numpy as np
import tensorboard as tb
from tensorboard.backend.event_processing import event_accumulator

import seaborn as sns
sns.set_theme()
%matplotlib inline

In [221]:
PREV_SUCC_RATE = 1.0

In [222]:
import re

In [223]:
ICM_RUN_DIR = "eval/eval/rs_s/none/lidar_all/icm_ppo/1/events.out.tfevents.1696383185.HAL.1649658.0"
PPO_RUN_DIR = "eval/eval2/rs_s/none/lidar_all/ppo/12/events.out.tfevents.1696721329.Yichens-MacBook-Pro-2.local.57943.0"

In [224]:
results_folder = "eval/eval2"

In [225]:
NOVELTIES = ["axe", "chest", "dist_trade", "fence", "fire"]
ALGOS = ["icm_ppo", "ppo"]

In [226]:
# # ea = event_accumulator.EventAccumulator('eval/eval/rs_s/none/lidar_all/icm_ppo/1/events.out.tfevents.1696383185.HAL.1649658.0',
# ea = event_accumulator.EventAccumulator('eval/eval/sa/axe/lidar_all/icm_ppo/3/events.out.tfevents.1696472442.HAL.187936.0',
#   size_guidance={ # see below regarding this argument
#       event_accumulator.COMPRESSED_HISTOGRAMS: 500,
#       event_accumulator.IMAGES: 4,
#       event_accumulator.AUDIO: 4,
#       event_accumulator.SCALARS: 0,
#       event_accumulator.HISTOGRAMS: 1,
#   })
# ea.Reload()

In [227]:
import queue
pre_novelty_sr = 1.0

In [243]:
def get_baseline_metrics(ea):
    eta = 5
    hist = []

    for item in ea.Scalars('test/length')[-eta:]:
        hist.append(item.value)

    return np.mean(hist)

In [249]:
PLANNING_LEN = 17
icm_rl_ea = load_ea(ICM_RUN_DIR)
ppo_rl_ea = load_ea(PPO_RUN_DIR)
icm_last_5_len = get_baseline_metrics(icm_rl_ea)
ppo_last_5_len = get_baseline_metrics(ppo_rl_ea)

In [251]:
def get_metrics(ea, env_type, algo):
    eta = 5
    min_succ_rate = 0.9
    min_reward = 900

    pd_hist = []
    rew_hist = []
    len_hist = []

    i_novelty = pre_novelty_sr - ea.Scalars('test/percent_dones')[0].value

    for pd, rew, length in zip(ea.Scalars('test/percent_dones'), ea.Scalars('test/reward'), ea.Scalars('test/length')):
        if pd is None or rew is None or length is None: 
            raise KeyError
        pd_hist.append(pd.value)
        rew_hist.append(rew.value)
        len_hist.append(length.value)
        
        if len(pd_hist) > eta and np.average(pd_hist[-eta:]) > min_succ_rate and np.min(rew_hist[-eta:]) > min_reward:
            time_to_adapt = pd.step
            post_novelty_performance = pd_hist[-1]
            avg_last_length = np.mean(len_hist[-5:])
            if env_type == "sa":
                if algo == "icm_ppo": avg_last_length -= icm_last_5_len
                elif algo == "ppo": avg_last_length -= ppo_last_5_len
            return i_novelty, time_to_adapt, post_novelty_performance, avg_last_length
            # return {
            #     "m2": i_novelty, 
            #     "m3": time_to_adapt, 
            #     "m4": post_novelty_performance, 
            #     "m5": avg_last_length
            # }
    return None

In [252]:
def load_ea(path):
    ea = event_accumulator.EventAccumulator(path,
    size_guidance={ # see below regarding this argument
        event_accumulator.COMPRESSED_HISTOGRAMS: 500,
        event_accumulator.IMAGES: 4,
        event_accumulator.AUDIO: 4,
        event_accumulator.SCALARS: 0,
        event_accumulator.HISTOGRAMS: 1,
    })
    ea.Reload()
    return ea

In [253]:
icm_last_5_len, ppo_last_5_len

(35.0060001373291, 64.5160011291504)

In [254]:
results = {}
for env in ["pf", "sa"]:
    for novelty in NOVELTIES:
        obs_type = "lidar_all"
        for algo in ALGOS:
            path = os.path.join(results_folder, env, novelty, obs_type, algo)
            try:
                seeds = os.listdir(path)
            except (FileNotFoundError, NotADirectoryError):
                continue
            metric_hist = []
            for seed in seeds:
                run_path = os.path.join(path, seed)
                if not os.path.isdir(run_path):
                    continue
                files = [filename for filename in os.listdir(run_path) if "events.out.tfevents" in filename]
                for file in files:
                    result = None
                    try:
                        ea = load_ea(os.path.join(run_path, files[0]))
                        result = get_metrics(ea, env_type=env, algo=algo)
                        if result is None:
                            print(run_path, "did not finish or did not converge")
                    except:
                        pass
                    if result is not None:
                        metric_hist.append(result)
            mean = [np.mean(metric) for metric in zip(*metric_hist)]
            std = [np.std(metric) for metric in zip(*metric_hist)]
            if len(mean) > 0:
                results["_".join([novelty, env, obs_type, algo])] = {"mean": mean, "std": std, "count": len(metric_hist)}
            else:
                results["_".join([novelty, env, obs_type, algo])] = {"mean": [None] * 4, "std": [None] * 4, "count": 0}

                

eval/eval2/pf/axe/lidar_all/icm_ppo/10 did not finish or did not converge
eval/eval2/sa/axe/lidar_all/icm_ppo/1 did not finish or did not converge
eval/eval2/sa/chest/lidar_all/icm_ppo/6 did not finish or did not converge
eval/eval2/sa/dist_trade/lidar_all/icm_ppo/1 did not finish or did not converge
eval/eval2/sa/dist_trade/lidar_all/icm_ppo/1 did not finish or did not converge
eval/eval2/sa/fence/lidar_all/icm_ppo/1 did not finish or did not converge
eval/eval2/sa/fence/lidar_all/ppo/1 did not finish or did not converge
eval/eval2/sa/fire/lidar_all/icm_ppo/1 did not finish or did not converge
eval/eval2/sa/fire/lidar_all/icm_ppo/2 did not finish or did not converge
eval/eval2/sa/fire/lidar_all/ppo/1 did not finish or did not converge


In [255]:
results

{'axe_pf_lidar_all_icm_ppo': {'mean': [0.7299999992052714,
   83377.77777777778,
   1.0,
   28.903778054979107],
  'std': [0.1980460092633062, 15561.903466676973, 0.0, 4.186657158163059],
  'count': 9},
 'axe_pf_lidar_all_ppo': {'mean': [0.6919999986886978,
   82560.0,
   1.0,
   29.84559995651245],
  'std': [0.2067268748606296, 17751.349244494064, 0.0, 5.623814483885095],
  'count': 10},
 'dist_trade_pf_lidar_all_icm_ppo': {'mean': [0.45300000309944155,
   136800.0,
   0.9940000057220459,
   45.166399765014646],
  'std': [0.06900000598119807,
   20505.608988762076,
   0.007999992370605469,
   8.727863819620929],
  'count': 10},
 'dist_trade_pf_lidar_all_ppo': {'mean': [0.5049999952316284,
   122880.0,
   0.9950000047683716,
   50.49359992980957],
  'std': [0.07017835205193709,
   20723.551819125987,
   0.004999995231628418,
   6.211132490404416],
  'count': 10},
 'fence_pf_lidar_all_icm_ppo': {'mean': [0.4300000071525574,
   100800.0,
   1.0,
   36.24600028991699],
  'std': [0.0, 0.0,

In [256]:
result_formatted = {}
for key, val in results.items():
    res = {}
    for i, (mean_m, std_m) in enumerate(zip(val['mean'], val['std'])):
        res[f"m{i + 2}_mean"] = mean_m
        res[f"m{i + 2}_std"] = std_m
    res["count"] = val["count"]
    result_formatted[key] = res

In [257]:
result_formatted

{'axe_pf_lidar_all_icm_ppo': {'m2_mean': 0.7299999992052714,
  'm2_std': 0.1980460092633062,
  'm3_mean': 83377.77777777778,
  'm3_std': 15561.903466676973,
  'm4_mean': 1.0,
  'm4_std': 0.0,
  'm5_mean': 28.903778054979107,
  'm5_std': 4.186657158163059,
  'count': 9},
 'axe_pf_lidar_all_ppo': {'m2_mean': 0.6919999986886978,
  'm2_std': 0.2067268748606296,
  'm3_mean': 82560.0,
  'm3_std': 17751.349244494064,
  'm4_mean': 1.0,
  'm4_std': 0.0,
  'm5_mean': 29.84559995651245,
  'm5_std': 5.623814483885095,
  'count': 10},
 'dist_trade_pf_lidar_all_icm_ppo': {'m2_mean': 0.45300000309944155,
  'm2_std': 0.06900000598119807,
  'm3_mean': 136800.0,
  'm3_std': 20505.608988762076,
  'm4_mean': 0.9940000057220459,
  'm4_std': 0.007999992370605469,
  'm5_mean': 45.166399765014646,
  'm5_std': 8.727863819620929,
  'count': 10},
 'dist_trade_pf_lidar_all_ppo': {'m2_mean': 0.5049999952316284,
  'm2_std': 0.07017835205193709,
  'm3_mean': 122880.0,
  'm3_std': 20723.551819125987,
  'm4_mean': 0.9

In [258]:
df = pd.DataFrame.from_dict(result_formatted, orient="index").sort_index()
df

Unnamed: 0,m2_mean,m2_std,m3_mean,m3_std,m4_mean,m4_std,m5_mean,m5_std,count
axe_pf_lidar_all_icm_ppo,0.73,0.198046,83377.777778,15561.903467,1.0,0.0,28.903778,4.186657,9
axe_pf_lidar_all_ppo,0.692,0.206727,82560.0,17751.349244,1.0,0.0,29.8456,5.623814,10
axe_sa_lidar_all_icm_ppo,,,,,,,,,0
axe_sa_lidar_all_ppo,1.0,0.0,230400.0,0.0,0.98,0.0,14.788,0.0,1
chest_sa_lidar_all_icm_ppo,0.002,0.004,24000.0,0.0,1.0,0.0,-2.9516,0.227724,5
chest_sa_lidar_all_ppo,0.01,0.0,57600.0,0.0,0.99,0.0,-6.290001,0.0,1
dist_trade_pf_lidar_all_icm_ppo,0.453,0.069,136800.0,20505.608989,0.994,0.008,45.1664,8.727864,10
dist_trade_pf_lidar_all_ppo,0.505,0.070178,122880.0,20723.551819,0.995,0.005,50.4936,6.211132,10
dist_trade_sa_lidar_all_icm_ppo,,,,,,,,,0
dist_trade_sa_lidar_all_ppo,,,,,,,,,0


In [59]:
df = df.dropna()
df['m2_mean'] = df['m2_mean'].round(2).astype(str)
df['m2_std'] = df['m2_std'].round(3).astype(str)
df['m3_mean'] = df['m3_mean'].round(0).astype(int)
df['m3_std'] = df['m3_std'].round(0).astype(int)
df['m4_mean'] = df['m4_mean'].round(2).astype(str)
df['m4_std'] = df['m4_std'].round(3).astype(str)
df['m5_mean'] = df['m5_mean'].round(1).astype(str)
df['m5_std'] = df['m5_std'].round(2).astype(str)
del df['count']
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['m2_mean'] = df['m2_mean'].round(2).astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['m2_std'] = df['m2_std'].round(3).astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['m3_mean'] = df['m3_mean'].round(0).astype(int)
A value is trying to be set on a copy of a slice from 

Unnamed: 0,m2_mean,m2_std,m3_mean,m3_std,m4_mean,m4_std,m5_mean,m5_std
axe_pf_lidar_all_icm_ppo,0.73,0.198,67733,14030,1.0,0.003,287.7,12.23
axe_pf_lidar_all_ppo,0.69,0.207,68160,17934,1.0,0.004,286.6,17.41
axe_sa_lidar_all_ppo,1.0,0.0,129600,0,0.97,0.0,372.8,0.0
chest_sa_lidar_all_icm_ppo,0.0,0.004,24000,0,1.0,0.0,32.9,1.17
chest_sa_lidar_all_ppo,0.01,0.0,24000,0,0.98,0.0,63.9,0.0
dist_trade_pf_lidar_all_icm_ppo,0.45,0.069,102720,20724,0.95,0.017,226.3,6.21
dist_trade_pf_lidar_all_ppo,0.5,0.07,96480,21191,0.96,0.02,226.2,9.6
fence_pf_lidar_all_icm_ppo,0.43,0.0,62400,0,0.95,0.0,242.6,0.0
fence_pf_lidar_all_ppo,0.34,0.0,52800,0,0.94,0.0,246.5,0.0
fire_sa_lidar_all_ppo,1.0,0.0,129600,0,0.95,0.0,348.6,0.0


In [60]:
df.to_csv("results.csv")

In [61]:
df.to_latex("results.tex")