In [1]:
import numpy as np
import matplotlib.pyplot as plt

from Simulators import BanditSimulator, ConfoundingBanditSimulator
from Samplers import StandardThomspon, CausalThomspon
from Environment import Environment

In [2]:
# Total Regret after n pulls:
def regret(probabilities, choices):  
    w_opt = probabilities.max()
    return(w_opt - probabilities[choices.astype(int)]).cumsum()

In [3]:
# Input bandit probabilities
True_bandit_probs = np.array([0.3, 0.60, 0.75])

# Simulate the bandits
bandits = BanditSimulator(True_bandit_probs)

# Select sampling algorithm
bandit_algo = StandardThomspon(bandits, Environment({}))

In [None]:
# Cumulative regret:
n_pulls = 1000 # number pulls for a trial
n_trials = 10 # number of trials/times we repeat the experiment to get E[regret]
expected_total_regret = np.zeros(n_pulls)
for i in range(n_trials):
    bandit_algo.initialize() # reset these variables to their blank states
    bandit_algo.sample_bandits(n_pulls)
    regret_this_trial = regret(True_bandit_probs, np.array(bandit_algo.choices))
    expected_total_regret += regret_this_trial                                   

In [None]:
plt.plot(expected_total_regret/n_trials, lw =3,
          label = "Standard Thompson")

plt.title("Expected total regret of different multi-armed bandit strategies")
plt.xlabel("Number of pulls")
plt.ylabel("Expected total regret \n after $n$ pulls")
plt.legend(loc="upper left");
plt.show()