In [95]:
import numpy as np
import pandas as pd
import time
from functools import partial

from scipy.stats import randint, uniform
from sim_lib import simulation

import optuna

pd.options.mode.chained_assignment = None
seed = 18475

Для решения задачи хочу попробывать реализовать Upper Confidence Bound.

In [83]:
def ucb(history: pd.DataFrame,alpha: float, b: float):
    ucb.t +=1
    n_i = (history['impressions'] + b)
    explonation = history['clicks'] / n_i
    exploration = np.sqrt(2 * np.log(ucb.t) / n_i)
    n = np.argmax(explonation + alpha * exploration)
    return history.index[n]

Сделаем удобный метод чтобы подобрать оптимальные гиперпараметры

In [84]:
def testing(alpha: float, b: float):
    print(f"alpha = {alpha}; bias={b}")
    np.random.seed(seed)
    start = time.time()
    ucb.t = 0
    policy = partial(ucb,alpha=alpha,b=b)
    output = simulation(policy, n=200000, seed=seed)
    end = time.time()
    print(end - start)
    print(f"{output['regret']}| {output['regret']/output['rounds']} | {output['total_banners']}")
    print("🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰")
    return output['regret'],output['rounds'],output['total_banners']


Поиск лучших гиперпарамертов. print imrepssions в коде sim_lib.py закомментирован, чтобы не переполнять ноутбук логами.

In [86]:
best = float("inf")
s = None
params = None
for i in [0.001,0.01,0.1,0.5,0.7,0.9,1]:
    for j in [0.1,0.5,0.7,0.9,1,10]:
        regret, r, total = testing(i,j)
        if regret < best:
            best = regret
            s = f"{regret}| {regret/r} | {total}"
            params = f"alpha = {i}; bias={j}"

alpha = 0.001; bias=0.1
143.23270201683044
7570.250349143192| 0.03785125174571596 | 184
🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰
alpha = 0.001; bias=0.5
144.68812918663025
7570.250349143192| 0.03785125174571596 | 184
🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰
alpha = 0.001; bias=0.7
143.61075592041016
7570.250349143192| 0.03785125174571596 | 184
🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰
alpha = 0.001; bias=0.9
774.8987948894501
7570.250349143192| 0.03785125174571596 | 184
🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰
alpha = 0.001; bias=1
141.61712002754211
7570.250349143192| 0.03785125174571596 | 184
🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰
alpha = 0.001; bias=10
142.3335211277008
7570.250349143192| 0.03785125174571596 | 184
🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰
alpha = 0.01; bias=0.1
142.8481740951538
9714.3448342918| 0.048571724171459 | 184
🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰
alpha = 0.01; bias=0.5
143.63303422927856
8430.46331871147| 0.04215231659355735 | 184
🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰
alpha = 0.01; bias=0.7
144.01512503623962
8430.46331871147| 0.04215231659355735 | 184
🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰🌟🍰
alpha = 0.01; bias=0.9
142.6859438419342
84

In [88]:
print(s)
print(params)

228.75722434605944| 0.0011437861217302972 | 184
alpha = 0.1; bias=0.9


Побили бейзлайн, но хочеться еще посмотреть значения в окретсности 0.1 для alpha и поварировать значения сдвига. Можно применить Баесовскую оптимизацию, чтобы подобрать наилучший гиперпареметр вблизи 0.1 и 0.9.

In [108]:
def objective(trail):
  param={
      'alpha':trail.suggest_float('alpha' , 0.07,0.1),
      'b' :trail.suggest_float('b' , 0.7 , 1),
      }
  np.random.seed(seed)
  start = time.time()
  ucb.t = 0
  policy = partial(ucb,**param)
  output = simulation(policy, n=200000, seed=seed)
  end = time.time()
  print(end - start)
  return output['regret']

In [109]:
find_param=optuna.create_study(direction="minimize")
find_param.optimize(objective,n_trials=10, show_progress_bar=1)

[I 2023-12-07 16:56:23,660] A new study created in memory with name: no-name-a0d200ba-e53f-462d-916e-5f6b830649bf


  0%|          | 0/10 [00:00<?, ?it/s]

148.96102285385132
[I 2023-12-07 16:58:52,629] Trial 0 finished with value: 177.93991807630655 and parameters: {'alpha': 0.07342882079998947, 'b': 0.8130319169237833}. Best is trial 0 with value: 177.93991807630655.
147.59166193008423
[I 2023-12-07 17:01:20,221] Trial 1 finished with value: 245.11785482905324 and parameters: {'alpha': 0.09148471870393264, 'b': 0.7549926178201269}. Best is trial 0 with value: 177.93991807630655.
146.6683280467987
[I 2023-12-07 17:03:46,891] Trial 2 finished with value: 138.26318839822264 and parameters: {'alpha': 0.07255732437833178, 'b': 0.7672465927542269}. Best is trial 2 with value: 138.26318839822264.
145.96706581115723
[I 2023-12-07 17:06:12,859] Trial 3 finished with value: 194.1976466975662 and parameters: {'alpha': 0.08077712172831075, 'b': 0.8804039021487556}. Best is trial 2 with value: 138.26318839822264.
145.08413004875183
[I 2023-12-07 17:08:37,945] Trial 4 finished with value: 175.84765900928002 and parameters: {'alpha': 0.085963210140851

Брать значения выше 0.7 не дало лучшего результата, но смогли достичь 138.263. Побробуем посмотреть значения меньше.

In [113]:
def objective(trail):
  param={
      'alpha':trail.suggest_float('alpha' , 0.06,0.07),
      'b' :trail.suggest_float('b' , 0.001 , 1),
      }
  np.random.seed(seed)
  start = time.time()
  ucb.t = 0
  policy = partial(ucb,**param)
  output = simulation(policy, n=200000, seed=seed)
  end = time.time()
  print(end - start)
  return output['regret']

In [114]:
find_param=optuna.create_study(direction="minimize")
find_param.optimize(objective,n_trials=10, show_progress_bar=1)

[I 2023-12-07 21:24:14,915] A new study created in memory with name: no-name-833c0c30-8756-4468-b5f4-f6c67e4de49e


  0%|          | 0/10 [00:00<?, ?it/s]

145.76109790802002
[I 2023-12-07 21:26:40,684] Trial 0 finished with value: 238.66036029094235 and parameters: {'alpha': 0.06890170851238549, 'b': 0.12140007282560375}. Best is trial 0 with value: 238.66036029094235.
144.26362299919128
[I 2023-12-07 21:29:04,949] Trial 1 finished with value: 182.29488041121277 and parameters: {'alpha': 0.06409830302734475, 'b': 0.6024611677281895}. Best is trial 1 with value: 182.29488041121277.
144.04168605804443
[I 2023-12-07 21:31:28,992] Trial 2 finished with value: 172.27101883811247 and parameters: {'alpha': 0.06668505943542122, 'b': 0.7927339726973242}. Best is trial 2 with value: 172.27101883811247.
144.60155606269836
[I 2023-12-07 21:33:53,595] Trial 3 finished with value: 182.7635188942302 and parameters: {'alpha': 0.0686627175471308, 'b': 0.3731882335216777}. Best is trial 2 with value: 172.27101883811247.
144.84742999076843
[I 2023-12-07 21:36:18,443] Trial 4 finished with value: 131.46047758172622 and parameters: {'alpha': 0.06427250050733

Еще раз побили бейзлайн. Последняя попытка попробывать зафиксировать bias = 1

In [115]:
def objective(trail):
  param={
      'alpha':trail.suggest_float('alpha' , 0.06,0.07),
      'b' :trail.suggest_float('b' , 1 , 1),
      }
  np.random.seed(seed)
  start = time.time()
  ucb.t = 0
  policy = partial(ucb,**param)
  output = simulation(policy, n=200000, seed=seed)
  end = time.time()
  print(end - start)
  return output['regret']

In [116]:
find_param=optuna.create_study(direction="minimize")
find_param.optimize(objective,n_trials=10, show_progress_bar=1)

[I 2023-12-07 21:48:58,674] A new study created in memory with name: no-name-d827554e-b8ae-4a33-8ee8-74ecbf38272e


  0%|          | 0/10 [00:00<?, ?it/s]

144.9676411151886
[I 2023-12-07 21:51:23,647] Trial 0 finished with value: 112.81225115860292 and parameters: {'alpha': 0.06003024869652497, 'b': 1.0}. Best is trial 0 with value: 112.81225115860292.
146.50388312339783
[I 2023-12-07 21:53:50,152] Trial 1 finished with value: 99.35845293033039 and parameters: {'alpha': 0.06149784492511529, 'b': 1.0}. Best is trial 1 with value: 99.35845293033039.
146.04919815063477
[I 2023-12-07 21:56:16,202] Trial 2 finished with value: 134.29522854827914 and parameters: {'alpha': 0.06498237991666139, 'b': 1.0}. Best is trial 1 with value: 99.35845293033039.
143.71588921546936
[I 2023-12-07 21:58:39,919] Trial 3 finished with value: 99.81292684737755 and parameters: {'alpha': 0.061866999277234436, 'b': 1.0}. Best is trial 1 with value: 99.35845293033039.
143.8247196674347
[I 2023-12-07 22:01:03,745] Trial 4 finished with value: 158.48988935719385 and parameters: {'alpha': 0.06683486127711089, 'b': 1.0}. Best is trial 1 with value: 99.35845293033039.
14

Лучший результат 99.3585  {'alpha': 0.06149784492511529, 'b': 1.0} 