In [1]:
import numpy as np
import pandas as pd
import time
from functools import partial

from scipy.stats import randint, uniform

from utils import validate_policy

pd.options.mode.chained_assignment = None

In [2]:
def eps_greedy(history: pd.DataFrame, eps: float):
    if uniform.rvs() < eps:
        n = history.shape[0]
        return history.index[randint.rvs(0, n)]

    ctr = history['clicks'] / (history['impressions'] + 10)
    n = np.argmax(ctr)
    return history.index[n]

policy = partial(eps_greedy, eps=0.06)

In [3]:
output = validate_policy(
    policy=policy,
    n=200_000,    
    seed=18475    
)

Impressions simulated: 1 |   Regret per round: 0.114
Impressions simulated: 10001 |   Regret per round: 0.0327
Impressions simulated: 20001 |   Regret per round: 0.0207
Impressions simulated: 30001 |   Regret per round: 0.0162
Impressions simulated: 40001 |   Regret per round: 0.0136
Impressions simulated: 50001 |   Regret per round: 0.0121
Impressions simulated: 60001 |   Regret per round: 0.0112
Impressions simulated: 70001 |   Regret per round: 0.0106
Impressions simulated: 80001 |   Regret per round: 0.01
Impressions simulated: 90001 |   Regret per round: 0.0095
Impressions simulated: 100001 |   Regret per round: 0.0093
Impressions simulated: 110001 |   Regret per round: 0.009
Impressions simulated: 120001 |   Regret per round: 0.0087
Impressions simulated: 130001 |   Regret per round: 0.0085
Impressions simulated: 140001 |   Regret per round: 0.0084
Impressions simulated: 150001 |   Regret per round: 0.0083
Impressions simulated: 160001 |   Regret per round: 0.0081
Impressions sim

In [4]:
# baseline regret
output['regret'], output['regret']/output['rounds'],  output['total_banners']

(1540.7609683932544, 0.007703804841966272, 184)

In [5]:
output['history']

Unnamed: 0,impressions,clicks,lifetime,p
153,18970.0,4199.0,18003.025431,0.220134
162,228.0,26.0,1537.166719,0.11378
172,163.0,32.0,19648.592394,0.219968
173,170.0,18.0,12771.47499,0.122694
180,26.0,3.0,4655.819793,0.020061
182,6.0,0.0,889.624649,0.004621
183,1.0,0.0,15187.163761,0.073886
