In [None]:
import numpy as np
from math import*
import matplotlib.pyplot as plt
import random
import scipy
import scipy.stats

plt.rcParams["figure.figsize"] = (8,8)

In [None]:
class PoissonBandit :
    def __init__(self, means , randomstate = None ):
        ''' Accept an array (means) of K >= 2 positive floats (parameter lambda of each arm)
            and ( optionally ) a seed for a random number generator . '''

        self.means = means
        
        self.random = np.random.RandomState(randomstate)
        
        self.ps_regrets = []

        self.mustar = max(self.means)
        
        self.kstar = np.argmax(means)
        
        self.gaps = means[self.kstar] - means

    def get_K(self):
        ''' Return the number of actions . '''

        K = len(self.means)
        return K
        
    def play(self,action):
        ''' Accept a parameter 0 <= k < K, logs the instant pseudo - regret ,
        and return the realization of Poisson random variable with lambda
        being the mean of the given action. '''

        ps_regrets_inst = self.mustar - self.means[action]

        #méthode de conténation append
        self.ps_regrets.append(self.gaps[action])
         
        # On génère un nombre aléatoire Poisson pour chaque bras
        # ensuit on choisira ensuite le bras action
        
        samples = self.random.poisson(self.means)
        
        #samples contient K nombres (logueur de means) obtenus par échantillonnage sur leur loi
        #de Poisson respective, on choisit le nombre associée à l'action <<action>>
        
        reward = samples[action]
        
        return reward
    
    def get_cumulative_regret(self):
        ''' Return an array of the cumulative sum of pseudo - regret per round . '''
        return np.cumsum(self.ps_regrets)

In [None]:
#Algorithme Thompson Sampling for Poisson Bandit
#Référence au numéro 3-a) du travail

def ts_poisson(bandit, T, alpha, beta, seed=None, stock_alpha_beta=False):
    '''Play the given bandit over T rounds using the TS strategy for Poisson bandits with given priors 
    alpha (real>0) and beta (real>0), and (optional) random seed '''
    
    tsrand=np.random.RandomState(seed)
    
    #Obtention du nombre d'actions possibles.
    K = bandit.get_K()  
    
    #Initialisation des distributions
    alpha_actions, beta_actions = np.repeat(alpha,K), np.repeat(beta,K)
    
    if(stock_alpha_beta == True):
        alpha_stock = []
        beta_stock = []
        for k in range(K):
            alpha_stock.append([])
            beta_stock.append([])
            
            alpha_stock[k].append(alpha)
            beta_stock[k].append(beta)
    
    for t in range(T):  
        
        # Échantillonner selon la loi de gamma, attention la fonction np.random.gamma échantillonne 
        # une valeur selon la loi de gamma avec deuxième paramètre theta=1/beta.
        theta = tsrand.gamma(shape=alpha_actions, scale=1/beta_actions)
        
        # Jouer l'action choisie (l'action avec le theta max)
        k_t = np.argmax(theta)
        
        r_t = bandit.play(k_t)
        
        #On met à jour la distribution posterior de l'action qui a été jouée (k_t).
        
        alpha_actions[k_t] = alpha_actions[k_t] + r_t
        beta_actions[k_t] = beta_actions[k_t] + 1
        
        #On laisse inchangé les distributions posterior des actions non-jouées.
        
        if(stock_alpha_beta == True):
           
            for k in range(K):
                if(k == k_t):
                    alpha_stock[k].append(alpha_actions[k])
                    beta_stock[k].append(beta_actions[k])
                else:
                    alpha_stock[k].append(alpha_stock[k][len(alpha_stock[k])-1])
                    beta_stock[k].append(beta_stock[k][len(beta_stock[k])-1])
    
    if(stock_alpha_beta == True):
        return(alpha_stock, beta_stock)


In [None]:
#Code pour tester l'algorithme Thompson Sampling pour des bandits à distributions de Poisson.
#On s'intéresse à un bandit à 2 brase avec moyenne lambda_1=6 et lambda_2=7
#Le code affiche les distributions posteriors à chacun des 100 tours.

banditest=PoissonBandit(np.array([6,7]),10)
alpha, beta = 5/2, 1/2

T=100

al,be = ts_poisson(banditest,T,alpha,beta,seed=9,stock_alpha_beta=True)

x=np.linspace(0,10,100)
for i in range(T+1):
    plt.plot(x,scipy.stats.gamma.pdf(x, al[0][i], scale=1/be[0][i]), color ='red',
             label="posterior de l'action 1")
    plt.plot(x,scipy.stats.gamma.pdf(x, al[1][i], scale=1/be[1][i]), color ='blue',
             label="posterior de l'action 2")
    
    plt.xlabel('Valeurs de $\lambda$ possibles')
    plt.ylabel('Valeurs de la fonction de densité')
    plt.title("Courbes des densités postriors\n pour l'action 1 et l'action 2 après {} tour(s)".format(i))
    
    plt.legend()
    plt.show()


In [None]:
# On génère ici N instances de bandits à deux bras à distribution de Poissons avec les moyennes choisient 
# uniformément dans l'intervalle[0,10]
generation=np.random.RandomState(10)

N, T = 1000, 1000

means = 10*generation.rand(N,2)

In [None]:
#Expérience sur des variances trop petites

#on teste des priors différents (ici la moyenne est toujours de 5 (centre de l'intervalle))
#teste de l'effet d'une variance trop petite

moyennes = np.array([5,5,5,5,5])
variances = np.array([10,5,1,1/2,1/4])

alpha = (moyennes**2)/(variances)
beta = (moyennes)/(variances)

cum_regrets_a0 = []
cum_regrets_a1 = []
cum_regrets_a2 = []
cum_regrets_a3 = []
cum_regrets_a4 = []

for i in range(N):
    banditest = PoissonBandit(means[i,],randomstate=i)
    ts_poisson(banditest, T, alpha=alpha[0], beta=beta[0], seed=i)
    cum_regrets_a0.append(banditest.get_cumulative_regret())

cum_regrets_a0_moy = np.apply_along_axis(np.mean, 0, cum_regrets_a0)
cum_regrets_a0_sd = np.apply_along_axis(np.std, 0, cum_regrets_a0)

plt.plot(range(T), cum_regrets_a0_moy, color='red',
         label = r'$\alpha_0$ = {}, $\beta_0$ = {},  $\mu_0$ = {}, $\sigma^2_0$ = {}'.format(alpha[0],beta[0], moyennes[0],variances[0]))
plt.plot(range(T),cum_regrets_a0_moy + cum_regrets_a0_sd, color = 'red', linestyle='dashed')
    
for i in range(N):
    banditest = PoissonBandit(means[i,],randomstate=i)
    ts_poisson(banditest, T, alpha=alpha[1], beta=beta[1], seed=i)
    cum_regrets_a1.append(banditest.get_cumulative_regret())
    

cum_regrets_a1_moy = np.apply_along_axis(np.mean, 0, cum_regrets_a1)
cum_regrets_a1_sd = np.apply_along_axis(np.std, 0, cum_regrets_a1)


plt.plot(range(T), cum_regrets_a1_moy, color='blue', 
         label = r'$\alpha_0$ = {}, $\beta_0$ = {},  $\mu_0$ = {}, $\sigma^2_0$ = {}'.format(alpha[1],beta[1],moyennes[1],variances[1]))
plt.plot(range(T),cum_regrets_a1_moy + cum_regrets_a1_sd, color = 'blue', linestyle='dashed')

for i in range(N):
    banditest = PoissonBandit(means[i,],randomstate=i)
    ts_poisson(banditest, T, alpha=alpha[2], beta=beta[2], seed=i)
    cum_regrets_a2.append(banditest.get_cumulative_regret())
    
cum_regrets_a2_moy = np.apply_along_axis(np.mean, 0, cum_regrets_a2)
cum_regrets_a2_sd = np.apply_along_axis(np.std, 0, cum_regrets_a2)

plt.plot(range(T), cum_regrets_a2_moy, color='green', 
         label = r'$\alpha_0$ = {}, $\beta_0$ = {},  $\mu_0$ = {}, $\sigma^2_0$ = {}'.format(alpha[2],beta[2],moyennes[2],variances[2]))
plt.plot(range(T),cum_regrets_a2_moy + cum_regrets_a2_sd, color = 'green', linestyle='dashed')

for i in range(N):
    banditest = PoissonBandit(means[i,],randomstate=i)
    ts_poisson(banditest, T, alpha=alpha[3], beta=beta[3], seed=i)
    cum_regrets_a3.append(banditest.get_cumulative_regret())
    
cum_regrets_a3_moy = np.apply_along_axis(np.mean, 0, cum_regrets_a3)
cum_regrets_a3_sd = np.apply_along_axis(np.std, 0, cum_regrets_a3)

plt.plot(range(T), cum_regrets_a3_moy, color='yellow', 
         label = r'$\alpha_0$ = {}, $\beta_0$ = {},  $\mu_0$ = {}, $\sigma^2_0$ = {}'.format(alpha[3],beta[3],moyennes[3],variances[3]))
plt.plot(range(T),cum_regrets_a3_moy + cum_regrets_a3_sd, color = 'yellow', linestyle='dashed')

for i in range(N):
    banditest = PoissonBandit(means[i,],randomstate=i)
    ts_poisson(banditest, T, alpha=alpha[4], beta=beta[4], seed=i)
    cum_regrets_a4.append(banditest.get_cumulative_regret())
    
cum_regrets_a4_moy = np.apply_along_axis(np.mean, 0, cum_regrets_a4)
cum_regrets_a4_sd = np.apply_along_axis(np.std, 0, cum_regrets_a4)

plt.plot(range(T), cum_regrets_a4_moy, color='purple',
         label = r'$\alpha_0$ = {}, $\beta_0$ = {},  $\mu_0$ = {}, $\sigma^2_0$ = {}'.format(alpha[4],beta[4],moyennes[4],variances[4]))
plt.plot(range(T),cum_regrets_a4_moy + cum_regrets_a4_sd, color = 'purple', linestyle='dashed')

plt.xlabel('Pas de temps')
plt.ylabel('Pseudo regret cumulatif moyen')
plt.title('Test pour Thomas Sampling, N={}, T={},\n impact de la variance des priors'.format(N,T))
plt.legend(bbox_to_anchor=(1.05, 1))


In [None]:
# On génère ici N instances de bandits à deux bras à distribution de Poissons avec les moyennes choisient 
# uniformément dans l'intervalle[0,10]

generation=np.random.RandomState(10)

N, T = 1000, 1000

means = 10*generation.rand(N,2)

In [None]:
#Expérience sur des variances trop grandes

#on teste des priors différents (ici la moyenne est toujours de 5 (centre de l'intervalle))
#teste de l'effet d'une variance trop grande

moyennes = np.array([5,5,5,5,5])
variances = np.array([10,20,50,100,1000])

alpha = (moyennes**2)/(variances)
beta = (moyennes)/(variances)

cum_regrets_a0 = []
cum_regrets_a1 = []
cum_regrets_a2 = []
cum_regrets_a3 = []
cum_regrets_a4 = []

for i in range(N):
    banditest = PoissonBandit(means[i,],randomstate=i)
    ts_poisson(banditest, T, alpha=alpha[0], beta=beta[0], seed=i)
    cum_regrets_a0.append(banditest.get_cumulative_regret())

cum_regrets_a0_moy = np.apply_along_axis(np.mean, 0, cum_regrets_a0)
cum_regrets_a0_sd = np.apply_along_axis(np.std, 0, cum_regrets_a0)

plt.plot(range(T), cum_regrets_a0_moy, color='red',
         label = r'$\alpha_0$ = {}, $\beta_0$ = {},  $\mu_0$ = {}, $\sigma^2_0$ = {}'.format(alpha[0],beta[0], moyennes[0],variances[0]))
plt.plot(range(T),cum_regrets_a0_moy + cum_regrets_a0_sd, color = 'red', linestyle='dashed')
    
for i in range(N):
    banditest = PoissonBandit(means[i,],randomstate=i)
    ts_poisson(banditest, T, alpha=alpha[1], beta=beta[1], seed=i)
    cum_regrets_a1.append(banditest.get_cumulative_regret())
    

cum_regrets_a1_moy = np.apply_along_axis(np.mean, 0, cum_regrets_a1)
cum_regrets_a1_sd = np.apply_along_axis(np.std, 0, cum_regrets_a1)


plt.plot(range(T), cum_regrets_a1_moy, color='blue', 
         label = r'$\alpha_0$ = {}, $\beta_0$ = {},  $\mu_0$ = {}, $\sigma^2_0$ = {}'.format(alpha[1],beta[1],moyennes[1],variances[1]))
plt.plot(range(T),cum_regrets_a1_moy + cum_regrets_a1_sd, color = 'blue', linestyle='dashed')

for i in range(N):
    banditest = PoissonBandit(means[i,],randomstate=i)
    ts_poisson(banditest, T, alpha=alpha[2], beta=beta[2], seed=i)
    cum_regrets_a2.append(banditest.get_cumulative_regret())
    
cum_regrets_a2_moy = np.apply_along_axis(np.mean, 0, cum_regrets_a2)
cum_regrets_a2_sd = np.apply_along_axis(np.std, 0, cum_regrets_a2)

plt.plot(range(T), cum_regrets_a2_moy, color='green', 
         label = r'$\alpha_0$ = {}, $\beta_0$ = {},  $\mu_0$ = {}, $\sigma^2_0$ = {}'.format(alpha[2],beta[2],moyennes[2],variances[2]))
plt.plot(range(T),cum_regrets_a2_moy + cum_regrets_a2_sd, color = 'green', linestyle='dashed')

for i in range(N):
    banditest = PoissonBandit(means[i,],randomstate=i)
    ts_poisson(banditest, T, alpha=alpha[3], beta=beta[3], seed=i)
    cum_regrets_a3.append(banditest.get_cumulative_regret())
    
cum_regrets_a3_moy = np.apply_along_axis(np.mean, 0, cum_regrets_a3)
cum_regrets_a3_sd = np.apply_along_axis(np.std, 0, cum_regrets_a3)

plt.plot(range(T), cum_regrets_a3_moy, color='yellow', 
         label = r'$\alpha_0$ = {}, $\beta_0$ = {},  $\mu_0$ = {}, $\sigma^2_0$ = {}'.format(alpha[3],beta[3],moyennes[3],variances[3]))
plt.plot(range(T),cum_regrets_a3_moy + cum_regrets_a3_sd, color = 'yellow', linestyle='dashed')

for i in range(N):
    banditest = PoissonBandit(means[i,],randomstate=i)
    ts_poisson(banditest, T, alpha=alpha[4], beta=beta[4], seed=i)
    cum_regrets_a4.append(banditest.get_cumulative_regret())
    
cum_regrets_a4_moy = np.apply_along_axis(np.mean, 0, cum_regrets_a4)
cum_regrets_a4_sd = np.apply_along_axis(np.std, 0, cum_regrets_a4)

plt.plot(range(T), cum_regrets_a4_moy, color='purple',
         label = r'$\alpha_0$ = {}, $\beta_0$ = {},  $\mu_0$ = {}, $\sigma^2_0$ = {}'.format(alpha[4],beta[4],moyennes[4],variances[4]))
plt.plot(range(T),cum_regrets_a4_moy + cum_regrets_a4_sd, color = 'purple', linestyle='dashed')

plt.xlabel('Pas de temps')
plt.ylabel('Pseudo regret cumulatif moyen')
plt.title('Test pour Thomas Sampling, N={}, T={},\n impact de la variance des priors'.format(N,T))
plt.legend(bbox_to_anchor=(1.05, 1))


In [None]:
# On génère ici N instances de bandits à deux bras à distribution de Poissons avec les moyennes choisient 
# uniformément dans l'intervalle[10,20]

generation=np.random.RandomState(5)

N, T = 1000, 1000

#Moyennes lambda générées entre 10 et 20.
means = 10*generation.rand(N,2)+10


In [None]:
#Effet de la moyenne de la loi prior avec variance «bonne»

#variance égale à la longueur de l'intervalle (10)
#on teste des priors différents, moyennes = 5,12,15,17,25

moyennes = np.array([5,12,15,17,25])
variances = np.array([10,10,10,10,10])

alpha = (moyennes**2)/(variances)
beta = (moyennes)/(variances)

cum_regrets_a0 = []
cum_regrets_a1 = []
cum_regrets_a2 = []
cum_regrets_a3 = []
cum_regrets_a4 = []

for i in range(N):
    banditest = PoissonBandit(means[i,],randomstate=i)
    ts_poisson(banditest, T, alpha=alpha[0], beta=beta[0], seed=i)
    cum_regrets_a0.append(banditest.get_cumulative_regret())

cum_regrets_a0_moy = np.apply_along_axis(np.mean, 0, cum_regrets_a0)
cum_regrets_a0_sd = np.apply_along_axis(np.std, 0, cum_regrets_a0)

plt.plot(range(T), cum_regrets_a0_moy, color='red',
         label = r'$\alpha_0$ = {}, $\beta_0$ = {},  $\mu_0$ = {}, $\sigma^2_0$ = {}'.format(alpha[0],beta[0], moyennes[0],variances[0]))
plt.plot(range(T),cum_regrets_a0_moy + cum_regrets_a0_sd, color = 'red', linestyle='dashed')
    
for i in range(N):
    banditest = PoissonBandit(means[i,],randomstate=i)
    ts_poisson(banditest, T, alpha=alpha[1], beta=beta[1], seed=i)
    cum_regrets_a1.append(banditest.get_cumulative_regret())
    

cum_regrets_a1_moy = np.apply_along_axis(np.mean, 0, cum_regrets_a1)
cum_regrets_a1_sd = np.apply_along_axis(np.std, 0, cum_regrets_a1)


plt.plot(range(T), cum_regrets_a1_moy, color='blue', 
         label = r'$\alpha_0$ = {}, $\beta_0$ = {},  $\mu_0$ = {}, $\sigma^2_0$ = {}'.format(alpha[1],beta[1],moyennes[1],variances[1]))
plt.plot(range(T),cum_regrets_a1_moy + cum_regrets_a1_sd, color = 'blue', linestyle='dashed')

for i in range(N):
    banditest = PoissonBandit(means[i,],randomstate=i)
    ts_poisson(banditest, T, alpha=alpha[2], beta=beta[2], seed=i)
    cum_regrets_a2.append(banditest.get_cumulative_regret())
    
cum_regrets_a2_moy = np.apply_along_axis(np.mean, 0, cum_regrets_a2)
cum_regrets_a2_sd = np.apply_along_axis(np.std, 0, cum_regrets_a2)

plt.plot(range(T), cum_regrets_a2_moy, color='green', 
         label = r'$\alpha_0$ = {}, $\beta_0$ = {},  $\mu_0$ = {}, $\sigma^2_0$ = {}'.format(alpha[2],beta[2],moyennes[2],variances[2]))
plt.plot(range(T),cum_regrets_a2_moy + cum_regrets_a2_sd, color = 'green', linestyle='dashed')

for i in range(N):
    banditest = PoissonBandit(means[i,],randomstate=i)
    ts_poisson(banditest, T, alpha=alpha[3], beta=beta[3], seed=i)
    cum_regrets_a3.append(banditest.get_cumulative_regret())
    
cum_regrets_a3_moy = np.apply_along_axis(np.mean, 0, cum_regrets_a3)
cum_regrets_a3_sd = np.apply_along_axis(np.std, 0, cum_regrets_a3)

plt.plot(range(T), cum_regrets_a3_moy, color='yellow', 
         label = r'$\alpha_0$ = {}, $\beta_0$ = {},  $\mu_0$ = {}, $\sigma^2_0$ = {}'.format(alpha[3],beta[3],moyennes[3],variances[3]))
plt.plot(range(T),cum_regrets_a3_moy + cum_regrets_a3_sd, color = 'yellow', linestyle='dashed')

for i in range(N):
    banditest = PoissonBandit(means[i,],randomstate=i)
    ts_poisson(banditest, T, alpha=alpha[4], beta=beta[4], seed=i)
    cum_regrets_a4.append(banditest.get_cumulative_regret())
    
cum_regrets_a4_moy = np.apply_along_axis(np.mean, 0, cum_regrets_a4)
cum_regrets_a4_sd = np.apply_along_axis(np.std, 0, cum_regrets_a4)

plt.plot(range(T), cum_regrets_a4_moy, color='purple',
         label = r'$\alpha_0$ = {}, $\beta_0$ = {},  $\mu_0$ = {}, $\sigma^2_0$ = {}'.format(alpha[4],beta[4],moyennes[4],variances[4]))
plt.plot(range(T),cum_regrets_a4_moy + cum_regrets_a4_sd, color = 'purple', linestyle='dashed')

plt.xlabel('Pas de temps')
plt.ylabel('Pseudo regret cumulatif moyen')
plt.title('Test pour Thomas Sampling, N={}, T={},\n impact de la variance des priors'.format(N,T))
plt.legend(bbox_to_anchor=(1.05, 1))


In [None]:
# On génère ici N instances de bandits à deux bras à distribution de Poissons avec les moyennes choisient 
# uniformément dans l'intervalle[10,20]

generation=np.random.RandomState(5)

N, T = 1000, 1000

#Moyennes lambda générées entre 10 et 20.
means = 10*generation.rand(N,2)+10

In [None]:
#Effet de la moyenne de la loi prior avec variance plus petite

#variance des priors égale à 3
#on teste des priors différents, moyennes = 9,12,15,17,21

moyennes = np.array([9,12,15,17,21])
variances = np.array([3,3,3,3,3])

alpha = (moyennes**2)/(variances)
beta = (moyennes)/(variances)

cum_regrets_a0 = []
cum_regrets_a1 = []
cum_regrets_a2 = []
cum_regrets_a3 = []
cum_regrets_a4 = []

for i in range(N):
    banditest = PoissonBandit(means[i,],randomstate=i)
    ts_poisson(banditest, T, alpha=alpha[0], beta=beta[0], seed=i)
    cum_regrets_a0.append(banditest.get_cumulative_regret())

cum_regrets_a0_moy = np.apply_along_axis(np.mean, 0, cum_regrets_a0)
cum_regrets_a0_sd = np.apply_along_axis(np.std, 0, cum_regrets_a0)

plt.plot(range(T), cum_regrets_a0_moy, color='red',
         label = r'$\alpha_0$ = {}, $\beta_0$ = {},  $\mu_0$ = {}, $\sigma^2_0$ = {}'.format(alpha[0],beta[0], moyennes[0],variances[0]))
plt.plot(range(T),cum_regrets_a0_moy + cum_regrets_a0_sd, color = 'red', linestyle='dashed')
    
for i in range(N):
    banditest = PoissonBandit(means[i,],randomstate=i)
    ts_poisson(banditest, T, alpha=alpha[1], beta=beta[1], seed=i)
    cum_regrets_a1.append(banditest.get_cumulative_regret())
    

cum_regrets_a1_moy = np.apply_along_axis(np.mean, 0, cum_regrets_a1)
cum_regrets_a1_sd = np.apply_along_axis(np.std, 0, cum_regrets_a1)


plt.plot(range(T), cum_regrets_a1_moy, color='blue', 
         label = r'$\alpha_0$ = {}, $\beta_0$ = {},  $\mu_0$ = {}, $\sigma^2_0$ = {}'.format(alpha[1],beta[1],moyennes[1],variances[1]))
plt.plot(range(T),cum_regrets_a1_moy + cum_regrets_a1_sd, color = 'blue', linestyle='dashed')

for i in range(N):
    banditest = PoissonBandit(means[i,],randomstate=i)
    ts_poisson(banditest, T, alpha=alpha[2], beta=beta[2], seed=i)
    cum_regrets_a2.append(banditest.get_cumulative_regret())
    
cum_regrets_a2_moy = np.apply_along_axis(np.mean, 0, cum_regrets_a2)
cum_regrets_a2_sd = np.apply_along_axis(np.std, 0, cum_regrets_a2)

plt.plot(range(T), cum_regrets_a2_moy, color='green', 
         label = r'$\alpha_0$ = {}, $\beta_0$ = {},  $\mu_0$ = {}, $\sigma^2_0$ = {}'.format(alpha[2],beta[2],moyennes[2],variances[2]))
plt.plot(range(T),cum_regrets_a2_moy + cum_regrets_a2_sd, color = 'green', linestyle='dashed')

for i in range(N):
    banditest = PoissonBandit(means[i,],randomstate=i)
    ts_poisson(banditest, T, alpha=alpha[3], beta=beta[3], seed=i)
    cum_regrets_a3.append(banditest.get_cumulative_regret())
    
cum_regrets_a3_moy = np.apply_along_axis(np.mean, 0, cum_regrets_a3)
cum_regrets_a3_sd = np.apply_along_axis(np.std, 0, cum_regrets_a3)

plt.plot(range(T), cum_regrets_a3_moy, color='yellow', 
         label = r'$\alpha_0$ = {}, $\beta_0$ = {},  $\mu_0$ = {}, $\sigma^2_0$ = {}'.format(alpha[3],beta[3],moyennes[3],variances[3]))
plt.plot(range(T),cum_regrets_a3_moy + cum_regrets_a3_sd, color = 'yellow', linestyle='dashed')

for i in range(N):
    banditest = PoissonBandit(means[i,],randomstate=i)
    ts_poisson(banditest, T, alpha=alpha[4], beta=beta[4], seed=i)
    cum_regrets_a4.append(banditest.get_cumulative_regret())
    
cum_regrets_a4_moy = np.apply_along_axis(np.mean, 0, cum_regrets_a4)
cum_regrets_a4_sd = np.apply_along_axis(np.std, 0, cum_regrets_a4)

plt.plot(range(T), cum_regrets_a4_moy, color='purple',
         label = r'$\alpha_0$ = {}, $\beta_0$ = {},  $\mu_0$ = {}, $\sigma^2_0$ = {}'.format(alpha[4],beta[4],moyennes[4],variances[4]))
plt.plot(range(T),cum_regrets_a4_moy + cum_regrets_a4_sd, color = 'purple', linestyle='dashed')

plt.xlabel('Pas de temps')
plt.ylabel('Pseudo regret cumulatif moyen')
plt.title('Test pour Thomas Sampling, N={}, T={},\n impact de la variance des priors'.format(N,T))
plt.legend(bbox_to_anchor=(1.05, 1))
