# Thompson Sampling for Linearly Constrained Bandits
## Plots for Regret and Violation

In [None]:
import numpy as np
from matplotlib import pyplot as plt

# Load Data

In [None]:
results_dir = 'results/'

filename    = 'edX_eta0.50_T50000_N16'
#filename    = 'coupon_purchase_eta0.25_T10000_N16'
file_ext    = '.npy'

#data = np.load('results/edX_eta0.30_T10000_N16.npy', allow_pickle=True)[()]
#data = np.load('results/edX_eta0.80_T10000_N16.npy', allow_pickle=True)[()]

data = np.load( results_dir + filename + file_ext, allow_pickle=True )[()]

filename = filename.replace('.','_')
image_format = '.png'

T = data['T']
N = data['N']
target_success_prob = data['constraint']

stationay_opt_reward = data['stationary_opt_reward']#0.0139

In [None]:
cum_constraint = np.tile( target_success_prob * np.arange( 0, T, 1 ), [ N, 1 ] )
cum_opt_reward = np.tile( stationay_opt_reward * np.arange( 0, T, 1 ), [ N, 1 ] )

lincon_kl_ucb_cum_reward = np.cumsum( data['lincon_kl_ucb_reward_values'], axis = 1 )
lincon_ts_cum_reward     = np.cumsum( data['lincon_ts_reward_values'], axis = 1 )

lincon_kl_ucb_cum_violation = np.maximum( 0.0, cum_constraint - np.cumsum( data['lincon_kl_ucb_reward_events'], axis=1 ) )
lincon_ts_cum_violation     = np.maximum( 0.0, cum_constraint - np.cumsum( data['lincon_ts_reward_events'], axis=1 ) )

lincon_kl_ucb_cum_regret = np.maximum(0.0, cum_opt_reward - lincon_kl_ucb_cum_reward)
lincon_ts_cum_regret     = np.maximum(0.0, cum_opt_reward - lincon_ts_cum_reward)

# Plot Results

In [None]:
plt.rcParams.update({'font.size': 30, 
                     'lines.linewidth' : 3,
                     'lines.markersize': 20})


#------------------------------------
# Expected Violation
#------------------------------------

plt.figure(figsize=[8, 6])
plt.grid(False)

plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt.ticklabel_format(style='sci', axis='x', scilimits=(3,3))
plt.xlim([0, T])
#plt.ylim([0, 5000])

x_ticks = np.arange(0, T)

plt.plot(x_ticks, np.mean( lincon_kl_ucb_cum_violation, axis=0))
plt.plot(x_ticks, np.mean( lincon_ts_cum_violation, axis=0))

plt.legend(['LinCon-KL-UCB', 'LinConTS'], loc='upper left', fontsize=20)
plt.xlabel('T')
plt.ylabel('Violation')

plt.savefig( results_dir + filename + '_VIOLATION' + image_format, bbox_inches='tight')

plt.show()

#------------------------------------
# Expected Regret
#------------------------------------

plt.figure(figsize=[8, 6])
plt.grid(False)

plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt.ticklabel_format(style='sci', axis='x', scilimits=(3,3))
plt.xlim([0, T])
#plt.ylim([0, 2000])

x_ticks = np.arange(0, T)

plt.plot(x_ticks, np.mean( lincon_kl_ucb_cum_regret, axis=0 ) )
plt.plot(x_ticks, np.mean( lincon_ts_cum_regret, axis=0 ) )

plt.legend(['LinCon-KL-UCB', 'LinConTS'], loc='upper left', fontsize=20)
plt.xlabel('T')
plt.ylabel('Regret')

plt.savefig( results_dir + filename + '_REGRET' + image_format, bbox_inches='tight')

plt.show()

In [None]:
plt.rcParams.update({'font.size': 30, 
                     'lines.linewidth' : 3,
                     'lines.markersize': 20})

#------------------------------------
# Expected Reward
#------------------------------------

plt.figure(figsize=[8, 6])
plt.grid(False)

plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt.ticklabel_format(style='sci', axis='x', scilimits=(3,3))
plt.xlim([0, T])
#plt.ylim([0, 2000])

x_ticks = np.arange(0, T)

plt.plot(x_ticks, np.mean( lincon_kl_ucb_cum_reward, axis=0 ) )
plt.plot(x_ticks, np.mean( lincon_ts_cum_reward, axis=0 ) )

plt.legend(['LinCon-KL-UCB', 'LinConTS'], loc='upper left', fontsize=20)
plt.xlabel('T')
plt.ylabel('Cumulative Reward ')

plt.savefig( results_dir + filename + '_REWARD' + image_format, bbox_inches='tight')

plt.show()

#------------------------------------
# Expected Reward / Expected Violation
#------------------------------------

plt.figure(figsize=[8, 6])
plt.grid(False)

plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt.ticklabel_format(style='sci', axis='x', scilimits=(3,3))
plt.xlim([0, T])

offset = 0
x_ticks = np.arange(offset, T)

plt.plot(x_ticks, np.divide( np.mean( lincon_kl_ucb_cum_reward, axis=0), 
                             np.mean( lincon_kl_ucb_cum_violation, axis=0)) )
plt.plot(x_ticks, np.divide( np.mean( lincon_ts_cum_reward, axis=0), 
                             np.mean( lincon_ts_cum_violation, axis=0) ) )

plt.legend(['LinCon-KL-UCB', 'LinConTS'], loc='upper left', fontsize=20)
plt.xlabel('T')
plt.ylabel('Cum. Reward / Violation')

plt.savefig( results_dir + filename + '_REWARD_VIO' + image_format, bbox_inches='tight')

plt.show()