In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [3]:
import pandas as pd
import numpy as np
import altair as alt
import altair_saver
import glob
import os
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [4]:
def personal():
    return {
        'config': {
            'view': {
                'height': 400,
                'width': 600,
            },
            'range': {
                'category': {'scheme': 'set1'},
                'ordinal': {'scheme': 'set1'},
            },
            'legend': {
                'labelLimit': 0,
            },
            'background': 'white',
#             'mark': {
#                 'clip': True,
#             },
        }
    }
alt.themes.register('personal', personal)
alt.themes.enable('personal')

ThemeRegistry.enable('personal')

In [5]:
def load_jobs(pattern, subdir='exploration'):    
    jobs = glob.glob(f'results/{subdir}/{pattern}')
    results = []
    for job in jobs:
        name = os.path.basename(os.path.normpath(job))
        train_data = pd.read_csv(job + '/train.csv')
        train_data['test'] = False
        test_data = pd.read_csv(job + '/test.csv')
        test_data['test'] = True
        data = pd.concat([train_data, test_data], sort=False)
        data['name'] = name
        results.append(data)
    df = pd.concat(results, sort=False)
    return df.reset_index(drop=True)

In [536]:
jobs = [
#     'pv100_noexplore',
#     'pv100_sigmoidstretch_clipvalue',
#     'pv100_clipvalue',
#     'pv100_sigmoidstretch_clipvalue_tupdate10',
#     'pv100_clipvalue_tupdate10',
#     'pv100_clipvalue_tupdate1',
#     'pv100_clipvalue_tupdate10_temp0.1',
#     'pv100_testtemp0.3*',
#     'pv100replay1M*',
    'pv100entropy*',
]
data = pd.concat([load_jobs(j) for j in jobs], sort=False)

subset = data
# subset['replay1M'] = subset['name'].str.contains('replay1M')
# subset = subset[subset['eval'] == False]
subset = subset[(subset['episode'] > 2) & (subset['episode'] <= 1000)]
chart = alt.Chart(subset, title="Point Velocity", width=400, height=300).mark_line(size=3).encode(
    x='episode',
    color='name',
    detail='name',
    tooltip=['name', 'episode', 'score', 'novelty_score', 'count_score:Q']
).transform_calculate(
    has_score=(alt.datum.score > 0.1),
).transform_window(
    sum_novelty='sum(novelty_score)',
    frame=[None, 0],
    groupby=['name']
).transform_window(
    sum_score='sum(score)',
    frame=[None, 0],
    groupby=['name']
).transform_window(
    count_score='sum(has_score)',
    frame=[None, 0],
    groupby=['name']
).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-5, 0],
    groupby=['name']
).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-5, 0],
    groupby=['name']
)
(chart.encode(y='rolling_mean_score:Q').transform_filter(alt.datum.test == True) | \
chart.encode(y='count_score:Q').transform_filter(alt.datum.test == False)) & \
(chart.encode(y='sum_novelty:Q').transform_filter(alt.datum.test == False) | \
chart.encode(y='policy_entropy:Q').transform_filter(alt.datum.test == True)) & \
chart.encode(y='explore_entropy:Q').transform_filter(alt.datum.test == False)

In [542]:
jobs = [
    'pm_temp0.1*',
    'pm_noexplore*',
]
data = pd.concat([load_jobs(j) for j in jobs], sort=False)

subset = data
# subset['replay1M'] = subset['name'].str.contains('replay1M')
# subset = subset[subset['eval'] == False]
subset = subset[(subset['episode'] > 2) & (subset['episode'] <= 300)]
chart = alt.Chart(subset, title="Point Mass", width=400, height=300).mark_line(size=3).encode(
    x='episode',
    color='name',
    detail='name',
    tooltip=['name', 'episode', 'score', 'novelty_score', 'count_score:Q']
).transform_calculate(
    has_score=(alt.datum.score > 0.1),
).transform_window(
    sum_novelty='sum(novelty_score)',
    frame=[None, 0],
    groupby=['name']
).transform_window(
    sum_score='sum(score)',
    frame=[None, 0],
    groupby=['name']
).transform_window(
    count_score='sum(has_score)',
    frame=[None, 0],
    groupby=['name']
).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-5, 0],
    groupby=['name']
).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-5, 0],
    groupby=['name']
)
(chart.encode(y='rolling_mean_score:Q').transform_filter(alt.datum.test == True) | \
chart.encode(y='count_score:Q').transform_filter(alt.datum.test == False)) & \
(chart.encode(y='sum_novelty:Q').transform_filter(alt.datum.test == False) | \
chart.encode(y='policy_entropy:Q').transform_filter(alt.datum.test == False))

In [None]:
jobs = [
#     'pm_temp0.1*',
    'swingup_noexplore_ptemp0.1-0.03',
#     'swingup_temp0.1_ptemp0.1-0.03',
#     'swingup_temp0.1_ptemp0.1-0.03_pddqn',
    'swingup_divergence*',
#     'swingup_divergence_pddqn_plr1e-3*',
#     'swingup_divergence_plr1e-3*',
]
data = pd.concat([load_jobs(j) for j in jobs], sort=False)

subset = data
# subset['replay1M'] = subset['name'].str.contains('replay1M')
# subset = subset[subset['eval'] == False]
subset = subset[(subset['episode'] > 2) & (subset['episode'] <= 10000)]
chart = alt.Chart(subset, title="Swingup Sparse", width=400, height=300).mark_line(size=3).encode(
    x='episode',
    color='name',
    detail='name',
    tooltip=['name', 'episode', 'score', 'novelty_score', 'count_score:Q']
).transform_calculate(
    has_score=(alt.datum.score > 0.1),
).transform_window(
    sum_novelty='sum(novelty_score)',
    frame=[None, 0],
    groupby=['name']
).transform_window(
    sum_score='sum(score)',
    frame=[None, 0],
    groupby=['name']
).transform_window(
    count_score='sum(has_score)',
    frame=[None, 0],
    groupby=['name']
).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-10, 0],
    groupby=['name']
).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-10, 0],
    groupby=['name']
)
(chart.encode(y='rolling_mean_score:Q').transform_filter(alt.datum.test == True) | \
chart.encode(y='count_score:Q').transform_filter(alt.datum.test == False)) & \
(chart.encode(y='sum_novelty:Q').transform_filter(alt.datum.test == False) | \
chart.encode(y='policy_entropy:Q').transform_filter(alt.datum.test == False))

In [290]:
data = pd.concat([
    load_jobs('point-mass_noexplore*'),
#     load_jobs('point-mass_clipvalue'),
    load_jobs('point-mass_clipvalue_exptemp1'),
    load_jobs('point-mass_clipvalue_exptemp5'),
#     load_jobs('point-mass_sigmoidstretch_clipvalue_exptemp1'),

], sort=False)

subset = data
subset = subset[subset['test'] == False]
subset = subset[subset['episode'] <= 1000]
chart = alt.Chart(subset, title="Can we learn policies faster than baseline?").mark_line().encode(
    x='episode',
    y='rolling_mean_score:Q',
    color='name',
    detail='eval',
    tooltip=['episode', 'score', 'novelty_score']
).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-20, 20]
).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-20, 20]
)
chart + chart.mark_circle().encode().interactive()

In [117]:
data = pd.concat([
    load_jobs('point-mass_clipvalue_exptemp1'),
    load_jobs('point-mass_sigmoidmargin_clipvalue_exptemp1'),
    load_jobs('point-mass_sigmoidstretch_clipvalue_exptemp1'),
], sort=False)

subset = data
subset = subset[subset['test'] == False]
subset = subset[subset['episode'] <= 1000]
chart = alt.Chart(subset, title="Does restricting Q range help?").mark_line().encode(
    x='episode',
    y='rolling_mean_novelty:Q',
    color='name',
    detail='eval',
    tooltip=['episode', 'score', 'novelty_score']
).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-40, 0]
).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-40, 0]
)
chart + chart.mark_circle().encode().interactive()

In [128]:
data = pd.concat([
    load_jobs('swingupsparse*'),

], sort=False)

subset = data
subset = subset[subset['test'] == False]
subset = subset[subset['episode'] <= 1000]
chart = alt.Chart(subset, title="Cartpole Swingup").mark_line().encode(
    x='episode',
    y='rolling_mean_score:Q',
    color='name',
    detail='eval',
    tooltip=['episode', 'score', 'novelty_score']
).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-40, 0]
).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-40, 0]
)
chart + chart.mark_circle().encode().interactive()

In [231]:
jobs = [
    'pv100_clipvalue',
    'pv100_clipvalue_tupdate10',
    'pv100_clipvalue_tupdate1',
]
data = pd.concat([load_jobs(j) for j in jobs], sort=False)

subset = data
subset = subset[subset['test'] == False]
subset = subset[subset['episode'] <= 1000]
chart = alt.Chart(subset, title="Do faster target updates help exploration? Not really.").mark_line().encode(
    x='episode',
    y='sum_novelty:Q',
    color='name',
    detail='eval',
    tooltip=['episode', 'score', 'novelty_score']
).transform_window(
    sum_novelty='sum(novelty_score)',
    frame=[None, 0],
    groupby=['name']
).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-5, 0],
    groupby=['name']
).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-5, 0],
    groupby=['name']
)

chart + chart.mark_circle().encode().interactive()

In [232]:
jobs = [
    'pv100_sigmoidstretch_clipvalue',
    'pv100_clipvalue',
    'pv100_sigmoidstretch_clipvalue_tupdate10',
    'pv100_clipvalue_tupdate10',
]
data = pd.concat([load_jobs(j) for j in jobs], sort=False)

subset = data
subset = subset[subset['test'] == False]
subset = subset[subset['episode'] <= 1000]
chart = alt.Chart(subset, title="Are sigmoid networks better? Not really, for PV.").mark_line().encode(
    x='episode',
    y='sum_novelty:Q',
    color='name',
    detail='eval',
    tooltip=['episode', 'score', 'novelty_score']
).transform_window(
    sum_novelty='sum(novelty_score)',
    frame=[None, 0],
    groupby=['name']
).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-5, 0],
    groupby=['name']
).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-5, 0],
    groupby=['name']
)

chart + chart.mark_circle().encode().interactive()

In [230]:
data = pd.concat([
    load_jobs('pv100_sigmoidstretch_clipvalue'),
    load_jobs('pv100_clipvalue'),
    load_jobs('pv100_sigmoidstretch_clipvalue_tupdate10'),
    load_jobs('pv100_noexplore'),

], sort=False)

subset = data
subset = subset[subset['test'] == False]
subset = subset[subset['episode'] <= 1000]
chart = alt.Chart(subset, title="Point Velocity").mark_line().encode(
    x='episode',
    y='score:Q',
    color='name',
    detail='eval',
    tooltip=['episode', 'score', 'novelty_score']
).transform_window(
    rolling_mean_score='mean(score)',
    frame=[-40, 0],
    groupby=['name']
).transform_window(
    rolling_mean_novelty='mean(novelty_score)',
    frame=[-40, 0],
    groupby=['name']
)
chart + chart.mark_circle().encode().interactive()