In [27]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd

In [28]:
old = pd.read_csv('mrl-1/progress.csv')
new = pd.read_csv('mrl-2/progress.csv')

In [31]:
matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
})

## Reward

In [34]:
def fit_line(x, y, n, linspace=None):
    if linspace:
        return np.poly1d(np.polyfit(x, y, n))(linspace)
    return np.poly1d(np.polyfit(x, y, n))(x)

old_x = old['Itr']
old_y = old['Average reward']
old_fit = fit_line(old_x, old_y, 3)
new_x = new['Itr']
new_y = new['Average reward']
new_fit = fit_line(new_x, new_y, 3)

# plt.scatter(old_x, old_y, s=0.5, alpha=0.4, label='Their data')
# plt.scatter(new_x, new_y, s=0.5, alpha=0.4, label='Our data')
# plt.plot(old_fit, label='Poly fit their data')
# plt.plot(new_fit, label='Poly fit our data')
# plt.legend()
# plt.xlabel('Iteration')
# plt.ylabel('Average Reward')
# plt.title('Progress of average reward during training')
# plt.show()

plt.figure(figsize=(5,4))
plt.tight_layout()
plt.plot(old['Itr'], old['Average reward'], linewidth=1, alpha=0.4, c='tab:blue')
plt.plot(new['Itr'], new['Average reward'], linewidth=1, alpha=0.4, c='orange')
plt.plot(old_fit, label='MRLCO data', c='tab:blue')
plt.plot(new_fit, label='Provided data', c='orange')
plt.legend()
plt.xlabel('Iteration')
plt.ylabel('Average Reward')
plt.title('Progress of average reward during training')
plt.savefig('reward.pgf')

## Return

In [18]:
old_avg = old['AverageReturn']
old_min = old['MinReturn']
old_max = old['MaxReturn']

new_avg = new['AverageReturn']
new_min = new['MinReturn']
new_max = new['MaxReturn']


fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(6, 2.5), constrained_layout=True, sharex=True, sharey=True)
ax1.set_title('MRLCO data')
ax1.plot(old_x, old_avg, c='tab:blue')
ax1.plot(old_x, old_min, c='tab:blue')
ax1.plot(old_x, old_max, c='tab:blue')
ax1.fill_between(old_x, old_min, old_max, alpha=0.5, color='tab:blue')

ax2.set_title('Provided data')
ax2.plot(new_x, new_avg, c='orange')
ax2.plot(new_x, new_min, c='orange')
ax2.plot(new_x, new_max, c='orange')
ax2.fill_between(new_x, new_min, new_max, alpha=0.4, color='orange')

plt.setp([ax1, ax2], xlabel='Iteration')
plt.setp(ax1, ylabel='Return Value')
fig.suptitle('Progress of maximal, average, and minimal return value during training')
plt.savefig('return.pgf')

## Execution Time

In [185]:
old_exec_time = old[['PolicyExecTime', 'EnvExecTime']].mean()
old_exec_time.name = 'Their Data'
new_exec_time = new[['PolicyExecTime', 'EnvExecTime']].mean()
new_exec_time.name = 'Our Data'

mean_exec_times = old_exec_time.to_frame().join(new_exec_time).transpose()
diff = mean_exec_times.iloc[1] - mean_exec_times.iloc[0]
diff.name = 'Difference'
mean_exec_times.append(diff)

Unnamed: 0,PolicyExecTime,EnvExecTime
Their Data,2.477595,0.953273
Our Data,2.485432,0.953415
Difference,0.007836,0.000142
