### Soft shielding via inserting differentiable constraints



#### Experiments
    - Policy gradient (baseline)
    - Policy gradient + hard shielding 
    - Policy gradient + shielding + object detection 

In [1]:
import altair as alt
import os
import pandas as pd

# specifying the three algorithms
file_pg = "pg_grid2x2_raw"
file_pg_dpl = "pg_dpl_nodetect_grid2x2_raw"
file_pg_dpl_detect = "pg_dpl_detect_grid2x2_raw"

logger_files = [
    file_pg,
    file_pg_dpl,
    file_pg_dpl_detect
]


Making a chart from dataframes

In [2]:
def make_chart2(data_series, keys=['pg','pg_dpl','pg_dpl_detect'], window_speed=1000):
    combined_data_series = pd.concat(data_series, keys=keys, names=['setting'])
    combined_data_series = combined_data_series.reset_index(0)
    combined_data_series['series'] = combined_data_series['setting'] + " " + combined_data_series['variable']

    # Data is prepared, now make a chart
    selection_exp = alt.selection_multi(fields=['setting', 'variable'], empty='none')
    color_exp = alt.condition(selection_exp,
                      alt.Color('series:N', legend=None),
                      alt.value('lightgray'))

    timeseries = alt.Chart(combined_data_series).properties(
        width=500,
        height=250
    ).mark_line(
        opacity=0.5
    ).encode(
        x=alt.X('n_steps:Q',
                axis=alt.Axis(title=f'Steps (x{window_speed})', tickMinStep=1)),
        y=alt.Y('value:Q',
                sort="ascending",
                axis=alt.Axis(title='value', tickMinStep=0.1),
                ),
        color=color_exp
    ).add_selection(
        selection_exp
    )

    legend = alt.Chart(combined_data_series).mark_rect().encode(
        x=alt.X('setting:N', axis=alt.Axis(orient='bottom')),
        y='variable',
        color=color_exp
    ).add_selection(
        selection_exp
    )

    chart = timeseries | legend

    return chart

### Env 1

In [3]:
folderpath = "data/env1/"
env_file = folderpath + "/env_spec.txt"

with open(env_file, "r") as f:
    content = f.readlines()
for line in content:
    print(line)

Layout:           grid2x2

Learning rate:    0.001

Reward goal:      10

Reward crash:     0

Reward food:      0

Reward time:      -1


In [4]:
# load dataframes
data_frames = []
for logger_file in logger_files:
    pkl_path = f"{folderpath}{logger_file}.pkl"
    d = pd.read_pickle(pkl_path)
    data_frames.append(d)
    
chart = make_chart2(data_frames, keys=['pg','pg_dpl','pg_dpl_detect'], window_speed=1000)
chart

### Env 2

In [5]:
folderpath = "data/env2/"
env_file = folderpath + "/env_spec.txt"

with open(env_file, "r") as f:
    content = f.readlines()
for line in content:
    print(line)

Layout:           grid2x2

Learning rate:    0.001

Reward goal:      10

Reward crash:     -10

Reward food:      0

Reward time:      -1





In [6]:
# load dataframes
data_frames = []
for logger_file in logger_files:
    pkl_path = f"{folderpath}{logger_file}.pkl"
    d = pd.read_pickle(pkl_path)
    data_frames.append(d)
    
chart = make_chart2(data_frames, keys=['pg','pg_dpl','pg_dpl_detect'], window_speed=1000)
chart

### Env 3

In [7]:
folderpath = "data/env3/"
env_file = folderpath + "/env_spec.txt"

with open(env_file, "r") as f:
    content = f.readlines()
for line in content:
    print(line)

Layout:           grid2x3

Learning rate:    0.001

Reward goal:      10

Reward crash:     0

Reward food:      0

Reward time:      -1


In [8]:
file_pg = "pg_grid2x3_raw"
file_pg_dpl_detect = "pg_dpl_detect_grid2x3_raw"
# load dataframes
data_frames = []
for logger_file in [file_pg, file_pg_dpl_detect]:
    pkl_path = f"{folderpath}{logger_file}.pkl"
    d = pd.read_pickle(pkl_path)
    data_frames.append(d)
    
chart = make_chart2(data_frames, keys=['pg','pg_dpl_detect'], window_speed=1000)
chart