In [1]:
import numpy as np
import pandas as pd
import time
from functools import partial

from scipy.stats import randint, uniform
from sim_lib import simulation

pd.options.mode.chained_assignment = None


In [2]:
def eps_greedy(history: pd.DataFrame, eps: float):
    if uniform.rvs() < eps:
        n = history.shape[0]
        return history.index[randint.rvs(0, n)]

    ctr = history['clicks'] / (history['impressions'] + 10)
    n = np.argmax(ctr)
    return history.index[n]

policy = partial(eps_greedy, eps=0.06)

In [3]:
%%time
# seed for homework
seed = 18475
np.random.seed(seed=seed)

start = time.time()
output = simulation(policy, n=200000, seed=seed)
end = time.time()
end - start

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated
CPU times: user 8min 42s, sys: 1.47 s, total: 8min 43s
Wall time: 8min 50s


530.339742898941

In [4]:
# baseline regret
output['regret'], output['regret']/output['rounds'],  output['total_banners']

(1540.7609683932544, 0.007703804841966272, 184)

In [5]:
output['history']

Unnamed: 0,impressions,clicks,lifetime,p
153,18970.0,4199.0,18003.025431,0.220134
162,228.0,26.0,1537.166719,0.11378
172,163.0,32.0,19648.592394,0.219968
173,170.0,18.0,12771.47499,0.122694
180,26.0,3.0,4655.819793,0.020061
182,6.0,0.0,889.624649,0.004621
183,1.0,0.0,15187.163761,0.073886


# Tompson Sampling


Будем использовать Thompson sampling как policy

In [6]:
def ts(history: pd.DataFrame, alpha: float, beta: float):
    
    
    a = history['clicks'] + alpha
    b = history['impressions'] + beta - history['clicks']
    
    sample = np.random.beta(a, b)
    n = np.argmax(sample)
    return history.index[n]

policy_ts = partial(ts, alpha = 1., beta = 1.)

In [7]:
%%time
# seed for homework
seed = 18475
np.random.seed(seed=seed)

start = time.time()
output = simulation(policy_ts, n=200000, seed=seed)
end = time.time()
end - start

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated
CPU times: user 9min 56s, sys: 2.9 s, total: 9min 59s
Wall time: 10min 39s


639.320315361023

In [8]:
# ts regret
output['regret'], output['regret']/output['rounds'],  output['total_banners']

(1223.6103636831504, 0.006118051818415752, 184)

Ура, безлайн побит.

Теперь будем подбирать параметры для уменьшение регрета 

In [None]:
alpha_value = [0.01, 0.1, 0.5, 1, 4]
beta_value_scaled = [0.1, 1, 10]

min_regret = 1e9
min_alpha = 1
min_beta = 1

In [9]:
%%time


for alpha in alpha_value:
    for beta_scaled in beta_value_scaled:
        beta = alpha * beta_scaled
        
        
        policy_ts = partial(ts, alpha = alpha, beta = beta)
        seed = 18475
        np.random.seed(seed=seed)

        start = time.time()
        output = simulation(policy_ts, n=200000, seed=seed)
        end = time.time()
        
        if output['regret'] < min_regret:
            min_regret = output['regret']
            min_alpha = alpha
            min_beta = beta
        
        print('-------------START CASE-----------')
        print('time: ', end - start)
        print('ALPHA: ', alpha, '     BETA: ', beta)
        print(output['regret'], output['regret']/output['rounds'],  output['total_banners'])
        print('-------------END CASE-----------')
        

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated
-------------START CASE-----------
time:  568.9773700237274
ALPHA:  0.01      BETA:  0.001
666.7858784483843 0.0033339293922419215 184
-------------END CASE-----------
1 impressions have been simulated
10001 impressions have been simu

190001 impressions have been simulated
-------------START CASE-----------
time:  578.8882503509521
ALPHA:  0.5      BETA:  5.0
901.9163165037505 0.004509581582518752 184
-------------END CASE-----------
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated
-------------START CASE--------

In [10]:
print('regret: ', min_regret, '    ALPHA: ', min_alpha, '   BETA: ', min_beta)

regret:  577.0104984857528     ALPHA:  0.01    BETA:  0.1


Имеет смысл либо уменьшать alpha и beta, либо увеличивать

In [11]:
for alpha, beta in [(0.001, 0.01), (0.01, 0.1), (8, 80)]:



    policy_ts = partial(ts, alpha = alpha, beta = beta)
    seed = 18475
    np.random.seed(seed=seed)

    start = time.time()
    output = simulation(policy_ts, n=200000, seed=seed)
    end = time.time()

    if output['regret'] < min_regret:
        min_regret = output['regret']
        min_alpha = alpha
        min_beta = beta

    print('-------------START CASE-----------')
    print('time: ', end - start)
    print('ALPHA: ', alpha, '     BETA: ', beta)
    print(output['regret'], output['regret']/output['rounds'],  output['total_banners'])
    print('-------------END CASE-----------')


1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated
-------------START CASE-----------
time:  520.8428318500519
ALPHA:  0.001      BETA:  0.01
725.6413961669447 0.0036282069808347235 184
-------------END CASE-----------
1 impressions have been simulated
10001 impressions have been simu

In [12]:
print('regret: ', min_regret, '    ALPHA: ', min_alpha, '   BETA: ', min_beta)

regret:  284.74812933458077     ALPHA:  8    BETA:  80


Подбор параметров улучшил результаты, но до конца я его не дoвел из-за длительности расчетов.