In [29]:
import bokeh
from bokeh.plotting import figure, show, output_notebook
from bokeh.charts import Scatter, Line, color
from bokeh import palettes
from bokeh.layouts import row
import colorcet
output_notebook()

import numpy as np
import pandas as pd
import os
import math
import json
import time
from IPython.display import clear_output

colors = palettes.Paired12
basedirs = ['results',
            '/mnt/nyu/baselines/results']
# basedirs = ['/mnt/nyu/structure-experiments/results']

tooltips = [
    ('Experiment', '@experiment'),
#     ('Step', '@step'),
#     ('Loss', '@loss'),
    ('Accuracy', '@EpAccuracy'),
#     ('Transition divergence', '@transdivergence'),
]

In [30]:
def sparsify(steps, values, amount):
    if amount < 2: return steps, values
    sparse_steps = []
    sparse_values = []

    for i in range(min(len(steps), len(values))):
        if i % amount == 0:
            sparse_values.append(values[i])
            sparse_steps.append(steps[i])
    return np.array(sparse_steps), np.array(sparse_values)

In [31]:
def load_result_df(experiment, smoothing=1, max_points=500, upto=None, jitter=0, min_difficulty=0, test=False):
    for basedir in basedirs:
        df = pd.DataFrame()
        try:
            path = os.path.join(basedir, experiment, 'results.json')
            f = open(path)
            json_lst = f.readlines()
            df = pd.DataFrame.from_records(map(json.loads, json_lst))
            
            if len(df) > 0:
                break
        except Exception as e:
            import traceback
#             traceback.print_exc()
#             print("Error in parsing experiment {} from basedir {}:".format(experiment, basedir), e)
            continue

    try:
        df['step'] = df['TimestepsSoFar']
    except:
        print("No results found for {}".format(experiment))
        return None
    numeric_columns = list(df.columns)
    df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors="coerce")
    
    if min_difficulty > 0 and not test:
        if df['EpDifficulty'].max() < min_difficulty:
            print("Experiment did not reach difficulty threshold: {}".format(experiment))
            return None

    if upto is not None:
        df = pd.DataFrame(df.query("step<{}".format(upto)))
    
    df = pd.DataFrame(df.query("TrainingMode=={}".format(not test)))
    
    for col in numeric_columns:
        df[col] = df[col].rolling(smoothing, center=True).mean()
        if jitter != 0 and col != 'step':
            df[col] = df[col] + np.random.randn(len(df[col])) * jitter
    
    for col in df:
        df["".join(col.split()).lower()] = df[col]
    df['experiment'] = pd.Series([experiment] * len(df), index=df.index)
    
    every_nth = max(int(len(df) / max_points), 1)
    return df.iloc[::every_nth, :]

def make_dataframe(results, **kwargs):
    data = pd.DataFrame()
    for experiment in results:
        single_df = load_result_df(experiment, **kwargs)
        if single_df is not None:
            data = data.append(single_df, ignore_index=True)
    return data

In [32]:
title = "training set performance, curriculum"
networks = [
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.001_lam0.98_seed0",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.001_lam0.98_seed1",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.001_lam0.98_seed2",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.001_lam1.0_seed0",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.001_lam1.0_seed1",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.001_lam1.0_seed2",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.01_lam0.98_seed0",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.01_lam0.98_seed1",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.01_lam0.98_seed2",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.01_lam1.0_seed0",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.01_lam1.0_seed1",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.01_lam1.0_seed2",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.001_lam0.98_seed0",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.001_lam0.98_seed1",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.001_lam0.98_seed2",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.001_lam1.0_seed0",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.001_lam1.0_seed1",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.001_lam1.0_seed2",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.01_lam0.98_seed0",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.01_lam0.98_seed1",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.01_lam0.98_seed2",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.01_lam1.0_seed0",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.01_lam1.0_seed1",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.01_lam1.0_seed2",
]

data = make_dataframe(networks, smoothing=5, max_points=100, min_difficulty=17, test=False)

spaced_palette = palettes.linear_palette(colorcet.rainbow, len(networks))

plots = []
for facet in ['EpAccuracy', 'EpRewMean', 'EpDifficulty']:
    plot = Line(data, x='step', y=facet,
                title=title + ", " + facet,
                color=color(columns='experiment', palette=spaced_palette),
                tools='pan,wheel_zoom,box_zoom,save,reset', 
                active_scroll="wheel_zoom",
                tooltips=tooltips,
                plot_height=300,
                legend='top_right',)
    plots.append(plot)
    show(row(children=[plot], responsive=True))

Experiment did not reach difficulty threshold: trpo_8x8_test_curriculum1_walldeath0_max-kl0.001_lam0.98_seed0
Experiment did not reach difficulty threshold: trpo_8x8_test_curriculum1_walldeath0_max-kl0.001_lam0.98_seed1
Experiment did not reach difficulty threshold: trpo_8x8_test_curriculum1_walldeath0_max-kl0.001_lam0.98_seed2
Experiment did not reach difficulty threshold: trpo_8x8_test_curriculum1_walldeath0_max-kl0.001_lam1.0_seed0
No results found for trpo_8x8_test_curriculum1_walldeath0_max-kl0.001_lam1.0_seed1
Experiment did not reach difficulty threshold: trpo_8x8_test_curriculum1_walldeath0_max-kl0.001_lam1.0_seed2
Experiment did not reach difficulty threshold: trpo_8x8_test_curriculum1_walldeath0_max-kl0.01_lam1.0_seed0
Experiment did not reach difficulty threshold: trpo_8x8_test_curriculum1_walldeath0_max-kl0.01_lam1.0_seed1
Experiment did not reach difficulty threshold: trpo_8x8_test_curriculum1_walldeath1_max-kl0.001_lam0.98_seed0
Experiment did not reach difficulty thresho

In [33]:
title = "test set performance, curriculum"
networks = [
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.001_lam0.98_seed0",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.001_lam0.98_seed1",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.001_lam0.98_seed2",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.001_lam1.0_seed0",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.001_lam1.0_seed1",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.001_lam1.0_seed2",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.01_lam0.98_seed0",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.01_lam0.98_seed1",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.01_lam0.98_seed2",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.01_lam1.0_seed0",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.01_lam1.0_seed1",
    "trpo_8x8_test_curriculum1_walldeath0_max-kl0.01_lam1.0_seed2",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.001_lam0.98_seed0",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.001_lam0.98_seed1",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.001_lam0.98_seed2",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.001_lam1.0_seed0",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.001_lam1.0_seed1",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.001_lam1.0_seed2",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.01_lam0.98_seed0",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.01_lam0.98_seed1",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.01_lam0.98_seed2",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.01_lam1.0_seed0",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.01_lam1.0_seed1",
    "trpo_8x8_test_curriculum1_walldeath1_max-kl0.01_lam1.0_seed2",
]

data = make_dataframe(networks, smoothing=10, max_points=100, min_difficulty=17, test=True)

spaced_palette = palettes.linear_palette(colorcet.rainbow, len(networks))

plots = []
for facet in ['EpAccuracy', 'EpRewMean', 'EpDifficulty']:
    plot = Line(data, x='step', y=facet,
                title=title + ", " + facet,
                color=color(columns='experiment', palette=spaced_palette),
                tools='pan,wheel_zoom,box_zoom,save,reset', 
                active_scroll="wheel_zoom",
                tooltips=tooltips,
                plot_height=300,
                legend='top_right',)
    plots.append(plot)
    show(row(children=[plot], responsive=True))

No results found for trpo_8x8_test_curriculum1_walldeath0_max-kl0.001_lam1.0_seed1
No results found for trpo_8x8_test_curriculum1_walldeath1_max-kl0.001_lam1.0_seed2


In [23]:
title = "training set performance, no curriculum"
networks = [
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.001_lam0.98_seed0",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.001_lam0.98_seed1",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.001_lam0.98_seed2",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.001_lam1.0_seed0",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.001_lam1.0_seed1",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.001_lam1.0_seed2",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.01_lam0.98_seed0",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.01_lam0.98_seed1",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.01_lam0.98_seed2",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.01_lam1.0_seed0",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.01_lam1.0_seed1",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.01_lam1.0_seed2",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.001_lam0.98_seed0",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.001_lam0.98_seed1",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.001_lam0.98_seed2",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.001_lam1.0_seed0",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.001_lam1.0_seed1",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.001_lam1.0_seed2",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.01_lam0.98_seed0",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.01_lam0.98_seed1",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.01_lam0.98_seed2",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.01_lam1.0_seed0",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.01_lam1.0_seed1",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.01_lam1.0_seed2",

]

data = make_dataframe(networks, smoothing=1, max_points=100, min_difficulty=0, test=False)

spaced_palette = palettes.linear_palette(colorcet.rainbow, len(networks))

plots = []
for facet in ['EpAccuracy', 'EpRewMean']:
    plot = Line(data, x='step', y=facet,
                title=title + ", " + facet,
                color=color(columns='experiment', palette=spaced_palette),
                tools='pan,wheel_zoom,box_zoom,save,reset', 
                active_scroll="wheel_zoom",
                tooltips=tooltips,
                plot_height=300,
                legend='top_right',)
    plots.append(plot)
    show(row(children=[plot], responsive=True))

In [27]:
title = "test set performance, no curriculum"
networks = [
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.001_lam0.98_seed0",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.001_lam0.98_seed1",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.001_lam0.98_seed2",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.001_lam1.0_seed0",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.001_lam1.0_seed1",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.001_lam1.0_seed2",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.01_lam0.98_seed0",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.01_lam0.98_seed1",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.01_lam0.98_seed2",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.01_lam1.0_seed0",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.01_lam1.0_seed1",
    "trpo_8x8_test_curriculum0_walldeath0_max-kl0.01_lam1.0_seed2",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.001_lam0.98_seed0",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.001_lam0.98_seed1",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.001_lam0.98_seed2",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.001_lam1.0_seed0",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.001_lam1.0_seed1",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.001_lam1.0_seed2",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.01_lam0.98_seed0",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.01_lam0.98_seed1",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.01_lam0.98_seed2",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.01_lam1.0_seed0",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.01_lam1.0_seed1",
    "trpo_8x8_test_curriculum0_walldeath1_max-kl0.01_lam1.0_seed2",

]

data = make_dataframe(networks, smoothing=10, max_points=100, min_difficulty=0, test=True)

spaced_palette = palettes.linear_palette(colorcet.rainbow, len(networks))

plots = []
for facet in ['EpAccuracy', 'EpRewMean']:
    plot = Line(data, x='step', y=facet,
                title=title + ", " + facet,
                color=color(columns='experiment', palette=spaced_palette),
                tools='pan,wheel_zoom,box_zoom,save,reset', 
                active_scroll="wheel_zoom",
                tooltips=tooltips,
                plot_height=300,
                legend='top_right',)
    plots.append(plot)
    show(row(children=[plot], responsive=True))