In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import pandas as pd
import numpy as np
import altair as alt
import altair_saver
import glob
import os
import copy
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [3]:
def personal():
    return {
        'config': {
            'view': {
                'height': 300,
                'width': 400,
            },
            'range': {
                'category': {'scheme': 'set2'},
                'ordinal': {'scheme': 'set2'},
            },
            'legend': {
                'labelLimit': 0,
            },
            'background': 'white',
            'mark': {
                'clip': True,
            },
            'line': {
                'size': 3,
#                 'opacity': 0.4
            },


        }
    }

def publication():
    colorscheme = 'set2'
    stroke_color = '333'
    title_size = 24
    label_size = 20
    line_width = 5

    return {
        'config': {
            'view': {
                'height': 500,
                'width': 600,
                'strokeWidth': 0,
                'background': 'white',
            },
            'title': {
                'fontSize': title_size,
            },
            'range': {
                'category': {'scheme': colorscheme},
                'ordinal': {'scheme': colorscheme},
            },
            'axis': {
                'titleFontSize': title_size,
                'labelFontSize': label_size,
                'grid': False,
                'domainWidth': 5,
                'domainColor': stroke_color,
                'tickWidth': 3,
                'tickSize': 9,
                'tickCount': 4,
                'tickColor': stroke_color,
                'tickOffset': 0,
            },
            'legend': {
                'titleFontSize': title_size,
                'labelFontSize': label_size,
                'labelLimit': 0,
                'titleLimit': 0,
                'orient': 'top-left',
#                 'padding': 10,
                'titlePadding': 10,
#                 'rowPadding': 5,
                'fillColor': '#ffffff88',
#                 'strokeColor': 'black',
                'cornerRadius': 0,
            },
            'rule': {
                'size': 3,
                'color': '999',
                # 'strokeDash': [4, 4],
            },
            'line': {
                'size': line_width,
#                 'opacity': 0.4
            },
        }
    }

alt.themes.register('personal', personal)
alt.themes.register('publication', publication)
alt.themes.enable('personal')

ThemeRegistry.enable('personal')

In [4]:
def load_jobs(pattern, subdir='exploration'):    
    jobs = glob.glob(f'results/{subdir}/{pattern}')
    results = []
    for job in jobs:
        name = os.path.basename(os.path.normpath(job))
        train_data = pd.read_csv(job + '/train.csv')
        train_data['test'] = False
        test_data = pd.read_csv(job + '/test.csv')
        test_data['test'] = True
        data = pd.concat([train_data, test_data], sort=False)
        data['name'] = name
        results.append(data)
    df = pd.concat(results, sort=False)
    return df.reset_index(drop=True)

In [5]:
def plot_with_bars(base_chart, y_col, test, extent='ci'):
    dummy_chart = base_chart.mark_circle(size=0, opacity=1).encode(
        y=f'mean({y_col}):Q',
    ).transform_filter(alt.datum.test == test)
    mean_chart = base_chart.encode(
        y=f'mean({y_col}):Q'
    ).transform_filter(alt.datum.test == test)
    err_chart = base_chart.encode(
        y=f'{y_col}:Q'
    ).transform_filter(alt.datum.test == test).mark_errorband(extent=extent)
    return dummy_chart + err_chart + mean_chart

def make_base_chart(data, title, color):
    chart = alt.Chart(data, title=title).mark_line().encode(
        x=alt.X('episode', title='Episode'),
        color=color,
        tooltip=['Algorithm', 'episode']
    ).transform_calculate(
        has_score=(alt.datum.score > 0.1),
    ).transform_window(
        sum_novelty='sum(novelty_score)',
        frame=[None, 0],
        groupby=['name', 'test'],
        sort=[{'field': 'episode', 'order': 'ascending'}],
    ).transform_window(
        sum_score='sum(score)',
        frame=[None, 0],
        groupby=['name', 'test'],
        sort=[{'field': 'episode', 'order': 'ascending'}],
    ).transform_window(
        count_score='sum(has_score)',
        frame=[None, 0],
        groupby=['name', 'test'],
        sort=[{'field': 'episode', 'order': 'ascending'}],
    ).transform_window(
        rolling_mean_score='mean(score)',
        frame=[-10, 0],
        groupby=['name', 'test'],
        sort=[{'field': 'episode', 'order': 'ascending'}]
    ).transform_window(
        rolling_mean_novelty='mean(novelty_score)',
        frame=[-10, 0],
        groupby=['name', 'test'],
        sort=[{'field': 'episode', 'order': 'ascending'}],
    )
    return chart

In [18]:
alt.themes.enable('personal')
jobs = [
    load_jobs('arxiv2_grid40*noreward*', subdir='intrinsic'),
    load_jobs('arxiv2_grid40*puniform*', subdir='slow'),
    load_jobs('arxiv2_grid40*puniform*', subdir='exploration'),
]
data = pd.concat(jobs, sort=False)
data['Algorithm'] = 'Ours: IR + FP + FA + Optimism'
data.loc[data['name'].str.contains('noopt'), 'Algorithm'] = 'IR + FP + Fast adaptation'
data.loc[data['name'].str.contains('slow'), 'Algorithm'] = 'IR + Factored policies'
data.loc[data['name'].str.contains('intrinsic'), 'Algorithm'] = 'Intrinsic reward'
data.loc[data['name'].str.contains('noexplore'), 'Algorithm'] = 'No exploration'
algorithms = [
    'No exploration',
    'Intrinsic reward',
    'IR + Factored policies',
    'IR + FP + Fast adaptation',
    'Ours: IR + FP + FA + Optimism'
]

subset = data
subset = subset[(subset['episode'] <= 1000)]
chart = make_base_chart(
    subset, 
    title="Gridworld 40x40 without reward", 
    color=alt.Color('Algorithm', scale=alt.Scale(domain=algorithms)))

(
    plot_with_bars(chart, 'rolling_mean_score', test=False) | \
    plot_with_bars(chart, 'rolling_mean_score', test=True)
) & (
    plot_with_bars(chart, 'sum_novelty', test=False) | \
    plot_with_bars(chart, 'count_score', test=False)
)

In [20]:
alt.themes.enable('personal')
jobs = [
    load_jobs('arxiv2_grid40_intrinsic_seed*', subdir='intrinsic'),
    load_jobs('arxiv2_grid40_slow_seed*', subdir='slow'),
    load_jobs('arxiv2_grid40_seed*', subdir='exploration'),
    load_jobs('arxiv2_grid40_noopt_seed*', subdir='exploration'),
    load_jobs('arxiv2_grid40_noexplore_seed*', subdir='exploration'),
]
data = pd.concat(jobs, sort=False)
data['Algorithm'] = 'Ours: IR + FP + FA + Optimism'
data.loc[data['name'].str.contains('noopt'), 'Algorithm'] = 'IR + FP + Fast adaptation'
data.loc[data['name'].str.contains('slow'), 'Algorithm'] = 'IR + Factored policies'
data.loc[data['name'].str.contains('intrinsic'), 'Algorithm'] = 'Intrinsic reward'
data.loc[data['name'].str.contains('noexplore'), 'Algorithm'] = 'No exploration'
algorithms = [
    'No exploration',
    'Intrinsic reward',
    'IR + Factored policies',
    'IR + FP + Fast adaptation',
    'Ours: IR + FP + FA + Optimism'
]

subset = data
subset = subset[(subset['episode'] <= 1000)]
chart = make_base_chart(
    subset, 
    title="Gridworld 40x40 with reward", 
    color=alt.Color('Algorithm', scale=alt.Scale(domain=algorithms)))

(
    plot_with_bars(chart, 'rolling_mean_score', test=False) | \
    plot_with_bars(chart, 'rolling_mean_score', test=True)
) & (
    plot_with_bars(chart, 'sum_novelty', test=False) | \
    plot_with_bars(chart, 'count_score', test=False)
)

In [19]:
alt.themes.enable('personal')
jobs = [
    load_jobs('arxiv2_pv100*noreward*', subdir='intrinsic'),
    load_jobs('arxiv2_pv100*puniform*', subdir='slow'),
    load_jobs('arxiv2_pv100*puniform*', subdir='exploration'),
]
data = pd.concat(jobs, sort=False)
data['Algorithm'] = 'Ours: IR + FP + FA + Optimism'
data.loc[data['name'].str.contains('noopt'), 'Algorithm'] = 'IR + FP + Fast adaptation'
data.loc[data['name'].str.contains('slow'), 'Algorithm'] = 'IR + Factored policies'
data.loc[data['name'].str.contains('intrinsic'), 'Algorithm'] = 'Intrinsic reward'
data.loc[data['name'].str.contains('noexplore'), 'Algorithm'] = 'No exploration'
algorithms = [
    'No exploration',
    'Intrinsic reward',
    'IR + Factored policies',
    'IR + FP + Fast adaptation',
    'Ours: IR + FP + FA + Optimism'
]

subset = data
subset = subset[(subset['episode'] <= 1000)]
chart = make_base_chart(
    subset, 
    title="Point Velocity without reward", 
    color=alt.Color('Algorithm', scale=alt.Scale(domain=algorithms)))

(
    plot_with_bars(chart, 'rolling_mean_score', test=False) | \
    plot_with_bars(chart, 'rolling_mean_score', test=True)
) & (
    plot_with_bars(chart, 'sum_novelty', test=False) | \
    plot_with_bars(chart, 'count_score', test=False)
)

In [None]:
alt.themes.enable('personal')
jobs = [
    load_jobs('arxiv2_pv100_intrinsic_seed*', subdir='intrinsic'),
    load_jobs('arxiv2_pv100_slow_seed*', subdir='slow'),
    load_jobs('arxiv2_pv100_seed*', subdir='exploration'),
    load_jobs('arxiv2_pv100_noopt_seed*', subdir='exploration'),
    load_jobs('arxiv2_pv100_noexplore_seed*', subdir='exploration'),
]
data = pd.concat(jobs, sort=False)
data['Algorithm'] = 'Ours: IR + FP + FA + Optimism'
data.loc[data['name'].str.contains('noopt'), 'Algorithm'] = 'IR + FP + Fast adaptation'
data.loc[data['name'].str.contains('slow'), 'Algorithm'] = 'IR + Factored policies'
data.loc[data['name'].str.contains('intrinsic'), 'Algorithm'] = 'Intrinsic reward'
data.loc[data['name'].str.contains('noexplore'), 'Algorithm'] = 'No exploration'
algorithms = [
    'No exploration',
    'Intrinsic reward',
    'IR + Factored policies',
    'IR + FP + Fast adaptation',
    'Ours: IR + FP + FA + Optimism'
]

subset = data
subset = subset[(subset['episode'] <= 1000)]
chart = make_base_chart(
    subset, 
    title="Point Velocity with reward", 
    color=alt.Color('Algorithm', scale=alt.Scale(domain=algorithms)))

(
    plot_with_bars(chart, 'rolling_mean_score', test=False) | \
    plot_with_bars(chart, 'rolling_mean_score', test=True)
) & (
    plot_with_bars(chart, 'sum_novelty', test=False) | \
    plot_with_bars(chart, 'count_score', test=False)
)

In [None]:
alt.themes.enable('publication')
jobs = [
    load_jobs('arxiv2_grid40_intrinsic_seed*', subdir='intrinsic'),
    load_jobs('arxiv2_grid40_slow_seed*', subdir='slow'),
    load_jobs('arxiv2_grid40_seed*', subdir='exploration'),
    load_jobs('arxiv2_grid40_noopt_seed*', subdir='exploration'),
    load_jobs('arxiv2_grid40_noexplore_seed*', subdir='exploration'),
]

data = pd.concat(jobs, sort=False)
data['Algorithm'] = 'Ours: IR + FP + FA + Optimism'
data.loc[data['name'].str.contains('noopt'), 'Algorithm'] = 'IR + FP + Fast adaptation'
data.loc[data['name'].str.contains('slow'), 'Algorithm'] = 'IR + Factored policies'
data.loc[data['name'].str.contains('intrinsic'), 'Algorithm'] = 'Intrinsic reward'
data.loc[data['name'].str.contains('noexplore'), 'Algorithm'] = 'No exploration'
algorithms = [
    'No exploration',
    'Intrinsic reward',
    'IR + Factored policies',
    'IR + FP + Fast adaptation',
    'Ours: IR + FP + FA + Optimism'
]

subset = data
subset = subset[(subset['episode'] <= 1000)]
chart = make_base_chart(
    subset, 
    title="Gridworld 40x40 with reward", 
    color=alt.Color('Algorithm', scale=alt.Scale(domain=algorithms)))

chart = plot_with_bars(chart, 'rolling_mean_score', test=True)
chart.layer[0].encoding.y['scale'] = alt.Scale(domain=[-0.5, 25], nice=False)
chart.layer[0].encoding.color['legend'] = alt.Legend(orient='bottom', legendX=300, legendY=100)
# altair_saver.save(chart, 'pv100_reward.pdf', method='node')
chart

In [None]:
alt.themes.enable('publication')
jobs = [
    load_jobs('arxiv2_pv100_intrinsic_seed*', subdir='intrinsic'),
    load_jobs('arxiv2_pv100_slow_seed*', subdir='slow'),
    load_jobs('arxiv2_pv100_seed*', subdir='exploration'),
    load_jobs('arxiv2_pv100_noopt_seed*', subdir='exploration'),
    load_jobs('arxiv2_pv100_noexplore_seed*', subdir='exploration'),
]
data = pd.concat(jobs, sort=False)
data['Algorithm'] = 'Ours: IR + FP + FA + Optimism'
data.loc[data['name'].str.contains('noopt'), 'Algorithm'] = 'IR + FP + Fast adaptation'
data.loc[data['name'].str.contains('slow'), 'Algorithm'] = 'IR + Factored policies'
data.loc[data['name'].str.contains('intrinsic'), 'Algorithm'] = 'Intrinsic reward'
data.loc[data['name'].str.contains('noexplore'), 'Algorithm'] = 'No exploration'
algorithms = [
    'No exploration',
    'Intrinsic reward',
    'IR + Factored policies',
    'IR + FP + Fast adaptation',
    'Ours: IR + FP + FA + Optimism'
]

subset = data
subset = subset[(subset['episode'] <= 1000)]
chart = make_base_chart(
    subset, 
    title="Point Velocity with reward", 
    color=alt.Color('Algorithm', scale=alt.Scale(domain=algorithms)))

chart = plot_with_bars(chart, 'rolling_mean_score', test=True)
chart.layer[0].encoding.y['scale'] = alt.Scale(domain=[-1, 70], nice=False)
chart

In [17]:
alt.themes.enable('publication')
jobs = [
    load_jobs('arxiv2_grid40_intrinsic_seed*', subdir='intrinsic'),
    load_jobs('arxiv2_grid40_slow_seed*', subdir='slow'),
    load_jobs('arxiv2_grid40_seed*', subdir='exploration'),
    load_jobs('arxiv2_grid40_noopt_seed*', subdir='exploration'),
    load_jobs('arxiv2_grid40_noexplore_seed*', subdir='exploration'),
]

data = pd.concat(jobs, sort=False)
data['Algorithm'] = 'Ours: IR + FP + FA + Optimism'
data.loc[data['name'].str.contains('noopt'), 'Algorithm'] = 'IR + FP + Fast adaptation'
data.loc[data['name'].str.contains('slow'), 'Algorithm'] = 'IR + Factored policies'
data.loc[data['name'].str.contains('intrinsic'), 'Algorithm'] = 'Intrinsic reward'
data.loc[data['name'].str.contains('noexplore'), 'Algorithm'] = 'No exploration'
algorithms = [
    'No exploration',
    'Intrinsic reward',
    'IR + Factored policies',
    'IR + FP + Fast adaptation',
    'Ours: IR + FP + FA + Optimism'
]

subset = data
subset = subset[(subset['episode'] <= 1000)]
chart = make_base_chart(
    subset, 
    title="", 
    color=alt.Color('Algorithm', scale=alt.Scale(domain=algorithms)))

chart = plot_with_bars(chart, 'rolling_mean_score', test=True)
chart.layer[0].encoding.y['scale'] = alt.Scale(domain=[-0.5, 25], nice=False)
for layer in chart.layer:
    layer.encoding.y['title'] = 'Reward'
chart.layer[0].encoding.color['legend'] = alt.Legend(orient='bottom', legendX=300, legendY=100)

# altair_saver.save(chart, 'pv100_reward.pdf', method='node')
chart_gridworld = chart

jobs = [
    load_jobs('arxiv2_pv100_intrinsic_seed*', subdir='intrinsic'),
    load_jobs('arxiv2_pv100_slow_seed*', subdir='slow'),
    load_jobs('arxiv2_pv100_seed*', subdir='exploration'),
    load_jobs('arxiv2_pv100_noopt_seed*', subdir='exploration'),
    load_jobs('arxiv2_pv100_noexplore_seed*', subdir='exploration'),
]
data = pd.concat(jobs, sort=False)
data['Algorithm'] = 'Ours: IR + FP + FA + Optimism'
data.loc[data['name'].str.contains('noopt'), 'Algorithm'] = 'IR + FP + Fast adaptation'
data.loc[data['name'].str.contains('slow'), 'Algorithm'] = 'IR + Factored policies'
data.loc[data['name'].str.contains('intrinsic'), 'Algorithm'] = 'Intrinsic reward'
data.loc[data['name'].str.contains('noexplore'), 'Algorithm'] = 'No exploration'
algorithms = [
    'No exploration',
    'Intrinsic reward',
    'IR + Factored policies',
    'IR + FP + Fast adaptation',
    'Ours: IR + FP + FA + Optimism'
]

subset = data
subset = subset[(subset['episode'] <= 1000)]
chart = make_base_chart(
    subset, 
    title="", 
    color=alt.Color('Algorithm', scale=alt.Scale(domain=algorithms)))

chart = plot_with_bars(chart, 'rolling_mean_score', test=True)
chart.layer[0].encoding.y['scale'] = alt.Scale(domain=[-1, 70], nice=False)
for layer in chart.layer:
    layer.encoding.y['title'] = 'Reward'
chart_pv = chart

chart = alt.concat(chart_gridworld, chart_pv, spacing=50)
# chart = chart.configure(spacing=20)
altair_saver.save(chart, 'grid40_pv100_reward.pdf', method='node')
chart

In [None]:
alt.themes.enable('personal')
jobs = [
    load_jobs('arxiv_grid20*', subdir='intrinsic'),
    load_jobs('arxiv_grid20*', subdir='slow'),
    load_jobs('arxiv_grid20*', subdir='exploration'),
]
data = pd.concat(jobs, sort=False)

subset = data
# subset['replay1M'] = subset['name'].str.contains('replay1M')
# subset = subset[subset['eval'] == False]
subset = subset[(subset['episode'] <= 300)]
chart = alt.Chart(subset, title="Gridworld 20x20", width=400, height=300).mark_line(size=3).encode(
    x='episode',
    color='name',
#     opacity='test',
    tooltip=['name', 'episode', 'score', 'novelty_score', 'count_score:Q']
).transform_calculate(
    has_score=(alt.datum.score > 0.1),
).transform_window(
    sum_novelty='sum(novelty_score)',
    frame=[None, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],
).transform_window(
    sum_score='sum(score)',
    frame=[None, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],
).transform_window(
    count_score='sum(has_score)',
    frame=[None, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],
).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-5, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],
).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-5, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],
)

(
    chart.encode(y='rolling_mean_score:Q').transform_filter(alt.datum.test == False) | \
    chart.encode(y='rolling_mean_score:Q').transform_filter(alt.datum.test == True)
) & (
    chart.encode(y='sum_novelty:Q').transform_filter(alt.datum.test == False) | \
    chart.encode(y='count_score:Q').transform_filter(alt.datum.test == False)
)

In [None]:
jobs = [
    load_jobs('grid20*', subdir='intrinsic'),
    load_jobs('grid20_slow_real_noflip', subdir='slow'),
    load_jobs('grid_optcheck', subdir='exploration'),
]
data = pd.concat(jobs, sort=False)

subset = data
# subset['replay1M'] = subset['name'].str.contains('replay1M')
# subset = subset[subset['eval'] == False]
subset = subset[(subset['episode'] > 2) & (subset['episode'] <= 300)]
chart = alt.Chart(subset, title="Gridworld 20x20", width=400, height=300).mark_line(size=3).encode(
    x='episode',
    color='name',
    opacity='test',
    tooltip=['name', 'episode', 'score', 'novelty_score', 'count_score:Q']
).transform_calculate(
    has_score=(alt.datum.score > 0.1),
).transform_window(
    sum_novelty='sum(novelty_score)',
    frame=[None, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],
).transform_window(
    sum_score='sum(score)',
    frame=[None, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],
).transform_window(
    count_score='sum(has_score)',
    frame=[None, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],
).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-5, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],
).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-5, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],
)

(
    chart.encode(y='rolling_mean_score:Q').transform_filter(alt.datum.test == False) | \
    chart.encode(y='rolling_mean_score:Q').transform_filter(alt.datum.test == True)
) & (
    chart.encode(y='sum_novelty:Q').transform_filter(alt.datum.test == False) | \
    chart.encode(y='count_score:Q').transform_filter(alt.datum.test == False)
)

In [None]:
jobs = [
    'grid40*',
]
data = pd.concat([load_jobs(j) for j in jobs], sort=False)

subset = data
# subset['replay1M'] = subset['name'].str.contains('replay1M')
# subset = subset[subset['eval'] == False]
subset = subset[(subset['episode'] > 2) & (subset['episode'] <= 1000)]
chart = alt.Chart(subset, title="Gridworld 40x40", width=400, height=300).mark_line(size=3).encode(
    x='episode',
    color='name',
    detail='name',
    tooltip=['name', 'episode', 'score', 'novelty_score', 'count_score:Q']
).transform_calculate(
    has_score=(alt.datum.score > 0.1),
).transform_window(
    sum_novelty='sum(novelty_score)',
    frame=[None, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],
).transform_window(
    sum_score='sum(score)',
    frame=[None, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],
).transform_window(
    count_score='sum(has_score)',
    frame=[None, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],
).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-5, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],
).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-5, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],
)
(chart.encode(y='rolling_mean_score:Q').transform_filter(alt.datum.test == True) | \
chart.encode(y='count_score:Q').transform_filter(alt.datum.test == False)) & \
(chart.encode(y='sum_novelty:Q').transform_filter(alt.datum.test == False) | \
chart.encode(y='policy_entropy:Q').transform_filter(alt.datum.test == True)) & \
chart.encode(y='explore_entropy:Q').transform_filter(alt.datum.test == False)

In [None]:
jobs = [
#     'pv100_noexplore',
#     'pv100_sigmoidstretch_clipvalue',
#     'pv100_clipvalue',
#     'pv100_sigmoidstretch_clipvalue_tupdate10',
#     'pv100_clipvalue_tupdate10',
#     'pv100_clipvalue_tupdate1',
#     'pv100_clipvalue_tupdate10_temp0.1',
#     'pv100_testtemp0.3*',
#     'pv100replay1M*',
    'pv100entropy*',
]
data = pd.concat([load_jobs(j) for j in jobs], sort=False)

subset = data
# subset['replay1M'] = subset['name'].str.contains('replay1M')
# subset = subset[subset['eval'] == False]
subset = subset[(subset['episode'] > 2) & (subset['episode'] <= 1000)]
chart = alt.Chart(subset, title="Point Velocity", width=400, height=300).mark_line(size=3).encode(
    x='episode',
    color='name',
    detail='name',
    tooltip=['name', 'episode', 'score', 'novelty_score', 'count_score:Q']
).transform_calculate(
    has_score=(alt.datum.score > 0.1),
).transform_window(
    sum_novelty='sum(novelty_score)',
    frame=[None, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],

).transform_window(
    sum_score='sum(score)',
    frame=[None, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],

).transform_window(
    count_score='sum(has_score)',
    frame=[None, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],

).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-5, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],

).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-5, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],

)
(chart.encode(y='rolling_mean_score:Q').transform_filter(alt.datum.test == True) | \
chart.encode(y='count_score:Q').transform_filter(alt.datum.test == False)) & \
(chart.encode(y='sum_novelty:Q').transform_filter(alt.datum.test == False) | \
chart.encode(y='policy_entropy:Q').transform_filter(alt.datum.test == True)) & \
chart.encode(y='explore_entropy:Q').transform_filter(alt.datum.test == False)

In [None]:
jobs = [
    'pm_temp0.1*',
    'pm_noexplore*',
]
data = pd.concat([load_jobs(j) for j in jobs], sort=False)

subset = data
# subset['replay1M'] = subset['name'].str.contains('replay1M')
# subset = subset[subset['eval'] == False]
subset = subset[(subset['episode'] > 2) & (subset['episode'] <= 300)]
chart = alt.Chart(subset, title="Point Mass", width=400, height=300).mark_line(size=3).encode(
    x='episode',
    color='name',
    detail='name',
    tooltip=['name', 'episode', 'score', 'novelty_score', 'count_score:Q']
).transform_calculate(
    has_score=(alt.datum.score > 0.1),
).transform_window(
    sum_novelty='sum(novelty_score)',
    frame=[None, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],

).transform_window(
    sum_score='sum(score)',
    frame=[None, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],

).transform_window(
    count_score='sum(has_score)',
    frame=[None, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],

).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-5, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],

).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-5, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],

)
(chart.encode(y='rolling_mean_score:Q').transform_filter(alt.datum.test == True) | \
chart.encode(y='count_score:Q').transform_filter(alt.datum.test == False)) & \
(chart.encode(y='sum_novelty:Q').transform_filter(alt.datum.test == False) | \
chart.encode(y='policy_entropy:Q').transform_filter(alt.datum.test == False))

In [None]:
jobs = [
#     'pm_temp0.1*',
    'swingup_noexplore_ptemp0.1-0.03',
#     'swingup_temp0.1_ptemp0.1-0.03',
#     'swingup_temp0.1_ptemp0.1-0.03_pddqn',
    'swingup_divergence*',
#     'swingup_divergence_pddqn_plr1e-3*',
#     'swingup_divergence_plr1e-3*',
]
data = pd.concat([load_jobs(j) for j in jobs], sort=False)

subset = data
# subset['replay1M'] = subset['name'].str.contains('replay1M')
# subset = subset[subset['eval'] == False]
subset = subset[(subset['episode'] > 2) & (subset['episode'] <= 10000)]
chart = alt.Chart(subset, title="Swingup Sparse", width=400, height=300).mark_line(size=3).encode(
    x='episode',
    color='name',
    detail='name',
    tooltip=['name', 'episode', 'score', 'novelty_score', 'count_score:Q']
).transform_calculate(
    has_score=(alt.datum.score > 0.1),
).transform_window(
    sum_novelty='sum(novelty_score)',
    frame=[None, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],

).transform_window(
    sum_score='sum(score)',
    frame=[None, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],

).transform_window(
    count_score='sum(has_score)',
    frame=[None, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],

).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-10, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],

).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-10, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],

)
(chart.encode(y='rolling_mean_score:Q').transform_filter(alt.datum.test == True) | \
chart.encode(y='count_score:Q').transform_filter(alt.datum.test == False)) & \
(chart.encode(y='sum_novelty:Q').transform_filter(alt.datum.test == False) | \
chart.encode(y='policy_entropy:Q').transform_filter(alt.datum.test == False))

In [None]:
jobs = [
    'pv100entropy_temp0.1_ptemp0.1-0.03',
    'pv100_novtemp*',
#     'pv100_derp',
    'pv100_rootcount',
]
data = pd.concat([load_jobs(j) for j in jobs], sort=False)

subset = data
# subset['replay1M'] = subset['name'].str.contains('replay1M')
# subset = subset[subset['eval'] == False]
subset = subset[(subset['episode'] > 2) & (subset['episode'] <= 1000)]
chart = alt.Chart(subset, title="Does a harder Qex update improve exploration on PV?", width=400, height=300).mark_line(size=3).encode(
    x='episode',
    color='name',
    detail='name',
    tooltip=['name', 'episode', 'score', 'novelty_score', 'count_score:Q']
).transform_calculate(
    has_score=(alt.datum.score > 0.1),
).transform_window(
    sum_novelty='sum(novelty_score)',
    frame=[None, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],
).transform_window(
    sum_score='sum(score)',
    frame=[None, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],
).transform_window(
    count_score='sum(has_score)',
    frame=[None, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],
).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-5, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],
).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-5, 0],
    groupby=['name', 'test'],
    sort=[{'field': 'episode', 'order': 'ascending'}],
)
(chart.encode(y='rolling_mean_score:Q').transform_filter(alt.datum.test == True) | \
chart.encode(y='count_score:Q').transform_filter(alt.datum.test == False)) & \
(chart.encode(y='sum_novelty:Q').transform_filter(alt.datum.test == False) | \
chart.encode(y='policy_entropy:Q').transform_filter(alt.datum.test == True)) & \
chart.encode(y='explore_entropy:Q').transform_filter(alt.datum.test == False)

In [None]:
data = pd.concat([
    load_jobs('point-mass_noexplore*'),
#     load_jobs('point-mass_clipvalue'),
    load_jobs('point-mass_clipvalue_exptemp1'),
    load_jobs('point-mass_clipvalue_exptemp5'),
#     load_jobs('point-mass_sigmoidstretch_clipvalue_exptemp1'),

], sort=False)

subset = data
subset = subset[subset['test'] == False]
subset = subset[subset['episode'] <= 1000]
chart = alt.Chart(subset, title="Can we learn policies faster than baseline?").mark_line().encode(
    x='episode',
    y='rolling_mean_score:Q',
    color='name',
    detail='eval',
    tooltip=['episode', 'score', 'novelty_score']
).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-20, 20]
).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-20, 20]
)
chart + chart.mark_circle().encode().interactive()

In [None]:
data = pd.concat([
    load_jobs('point-mass_clipvalue_exptemp1'),
    load_jobs('point-mass_sigmoidmargin_clipvalue_exptemp1'),
    load_jobs('point-mass_sigmoidstretch_clipvalue_exptemp1'),
], sort=False)

subset = data
subset = subset[subset['test'] == False]
subset = subset[subset['episode'] <= 1000]
chart = alt.Chart(subset, title="Does restricting Q range help?").mark_line().encode(
    x='episode',
    y='rolling_mean_novelty:Q',
    color='name',
    detail='eval',
    tooltip=['episode', 'score', 'novelty_score']
).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-40, 0]
).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-40, 0]
)
chart + chart.mark_circle().encode().interactive()

In [None]:
data = pd.concat([
    load_jobs('swingupsparse*'),

], sort=False)

subset = data
subset = subset[subset['test'] == False]
subset = subset[subset['episode'] <= 1000]
chart = alt.Chart(subset, title="Cartpole Swingup").mark_line().encode(
    x='episode',
    y='rolling_mean_score:Q',
    color='name',
    detail='eval',
    tooltip=['episode', 'score', 'novelty_score']
).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-40, 0]
).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-40, 0]
)
chart + chart.mark_circle().encode().interactive()

In [None]:
jobs = [
    'pv100_clipvalue',
    'pv100_clipvalue_tupdate10',
    'pv100_clipvalue_tupdate1',
]
data = pd.concat([load_jobs(j) for j in jobs], sort=False)

subset = data
subset = subset[subset['test'] == False]
subset = subset[subset['episode'] <= 1000]
chart = alt.Chart(subset, title="Do faster target updates help exploration? Not really.").mark_line().encode(
    x='episode',
    y='sum_novelty:Q',
    color='name',
    detail='eval',
    tooltip=['episode', 'score', 'novelty_score']
).transform_window(
    sum_novelty='sum(novelty_score)',
    frame=[None, 0],
    groupby=['name']
).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-5, 0],
    groupby=['name']
).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-5, 0],
    groupby=['name']
)

chart + chart.mark_circle().encode().interactive()

In [None]:
jobs = [
    'pv100_sigmoidstretch_clipvalue',
    'pv100_clipvalue',
    'pv100_sigmoidstretch_clipvalue_tupdate10',
    'pv100_clipvalue_tupdate10',
]
data = pd.concat([load_jobs(j) for j in jobs], sort=False)

subset = data
subset = subset[subset['test'] == False]
subset = subset[subset['episode'] <= 1000]
chart = alt.Chart(subset, title="Are sigmoid networks better? Not really, for PV.").mark_line().encode(
    x='episode',
    y='sum_novelty:Q',
    color='name',
    detail='eval',
    tooltip=['episode', 'score', 'novelty_score']
).transform_window(
    sum_novelty='sum(novelty_score)',
    frame=[None, 0],
    groupby=['name']
).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-5, 0],
    groupby=['name']
).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-5, 0],
    groupby=['name']
)

chart + chart.mark_circle().encode().interactive()

In [None]:
data = pd.concat([
    load_jobs('pv100_sigmoidstretch_clipvalue'),
    load_jobs('pv100_clipvalue'),
    load_jobs('pv100_sigmoidstretch_clipvalue_tupdate10'),
    load_jobs('pv100_noexplore'),

], sort=False)

subset = data
subset = subset[subset['test'] == False]
subset = subset[subset['episode'] <= 1000]
chart = alt.Chart(subset, title="Point Velocity").mark_line().encode(
    x='episode',
    y='score:Q',
    color='name',
    detail='eval',
    tooltip=['episode', 'score', 'novelty_score']
).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-40, 0],
    groupby=['name']
).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-40, 0],
    groupby=['name']
)
chart + chart.mark_circle().encode().interactive()