In [2]:
import numpy as np
import pandas as pd
import time
from functools import partial

from scipy.stats import randint, uniform
from sim_lib import simulation

pd.options.mode.chained_assignment = None


In [2]:
def eps_greedy(history: pd.DataFrame, eps: float):
    if uniform.rvs() < eps:
        n = history.shape[0]
        return history.index[randint.rvs(0, n)]

    ctr = history['clicks'] / (history['impressions'] + 10)
    n = np.argmax(ctr)
    return history.index[n]

policy = partial(eps_greedy, eps=0.06)

In [3]:
# seed for homework
seed = 18475
np.random.seed(seed=seed)

start = time.time()
output = simulation(policy, n=200000, seed=seed)
end = time.time()
end - start

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


197.36377096176147

In [4]:
# baseline regret
output['regret'], output['regret']/output['rounds'],  output['total_banners']

(1540.7609683932544, 0.007703804841966272, 184)

In [5]:
output['history']

Unnamed: 0,impressions,clicks,lifetime,p
153,18970.0,4199.0,18003.025431,0.220134
162,228.0,26.0,1537.166719,0.11378
172,163.0,32.0,19648.592394,0.219968
173,170.0,18.0,12771.47499,0.122694
180,26.0,3.0,4655.819793,0.020061
182,6.0,0.0,889.624649,0.004621
183,1.0,0.0,15187.163761,0.073886


In [3]:
# Будем использовать softmax
def softmax(x):
  return np.exp(x)/np.sum(np.exp(x))

def soft_max(history: pd.DataFrame, eps: float):
    ctr = history['clicks'] / (history['impressions'] + 10)
    probs = softmax(ctr/eps)
    n = history.shape[0]
    return history.index[np.random.choice(n, p=probs)]

policy_soft_max = partial(soft_max, eps=0.02)

In [16]:
# seed for homework
seed = 18475
np.random.seed(seed=seed)

start = time.time()
output = simulation(policy_soft_max, n=200000, seed=seed)
end = time.time()
end - start

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


223.2105848789215

In [17]:
# Результат
output['regret'], output['regret']/output['rounds'],  output['total_banners']

(534.0452256127945, 0.0026702261280639726, 184)

In [18]:
# Получили regret меньше -- бейзлайн побит)

In [4]:
# Теперь подберём лучший параметр
epsilons = [0.02, 0.01, 0.005, 0.001, 0.1, 0.5, 1.0, 1.5, 0.0001]
for eps in epsilons:
    policy_soft_max_with_eps = partial(soft_max, eps=eps)
    seed = 18475
    np.random.seed(seed=seed)

    start = time.time()
    output = simulation(policy_soft_max_with_eps, n=200000, seed=seed)
    print('current eps=', eps)
    output['regret'], output['regret']/output['rounds'],  output['total_banners']
    print(output['regret'], output['regret']/output['rounds'],  output['total_banners'])
    end = time.time()
    print(end - start)

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated
current eps= 0.02
534.0452256127945 0.0026702261280639726 184
225.76462507247925
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001

  result = getattr(ufunc, method)(*inputs, **kwargs)


ValueError: probabilities contain NaN

In [6]:
# Много значений лишних (особенно те, что больше 1). Попробую ещё некоторые, около 0.02

In [7]:
epsilons = [0.03, 0.04, 0.025, 0.015]
for eps in epsilons:
    policy_soft_max_with_eps = partial(soft_max, eps=eps)
    seed = 18475
    np.random.seed(seed=seed)

    start = time.time()
    output = simulation(policy_soft_max_with_eps, n=200000, seed=seed)
    print('current eps=', eps)
    output['regret'], output['regret']/output['rounds'],  output['total_banners']
    print(output['regret'], output['regret']/output['rounds'],  output['total_banners'])
    end = time.time()
    print(end - start)

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated
current eps= 0.03
889.6636396919081 0.00444831819845954 184
225.60396790504456
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 i

In [8]:
# Ничего лучше не нашли(
# Итог: лучший eps=0.02; результат для него:
# (534.0452256127945, 0.0026702261280639726, 184)