In [None]:
import numpy as np
import pandas as pd
import time
from functools import partial

from scipy.stats import randint, uniform
from sim_lib import simulation

pd.options.mode.chained_assignment = None

In [None]:
def eps_greedy(history: pd.DataFrame, eps: float):
    if uniform.rvs() < eps:
        n = history.shape[0]
        return history.index[randint.rvs(0, n)]

    ctr = history['clicks'] / (history['impressions'] + 10)
    n = np.argmax(ctr)
    return history.index[n]

policy = partial(eps_greedy, eps=0.06)

In [None]:
# seed for homework
seed = 18475
np.random.seed(seed=seed)

start = time.time()
output = simulation(policy, n=200000, seed=seed)
end = time.time()
end - start

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


639.7598621845245

In [None]:
# baseline regret
output['regret'], output['regret']/output['rounds'],  output['total_banners']

(1540.7609683932544, 0.007703804841966272, 184)

In [None]:
output['history']

Unnamed: 0,impressions,clicks,lifetime,p
153,18970.0,4199.0,18003.025431,0.220134
162,228.0,26.0,1537.166719,0.11378
172,163.0,32.0,19648.592394,0.219968
173,170.0,18.0,12771.47499,0.122694
180,26.0,3.0,4655.819793,0.020061
182,6.0,0.0,889.624649,0.004621
183,1.0,0.0,15187.163761,0.073886


Реализуем Upper Confidence Bound (UCB)

In [None]:
def ucb_policy(history: pd.DataFrame, C):
    t = history['impressions'].sum() + 1
    ucb_scores = history.apply(
        lambda x: (x['clicks'] / (x['impressions'] + 1)) +
                  C * np.sqrt(2 * np.log(t) / (x['impressions'] + 1)),
        axis=1
    )
    return history.index[np.argmax(ucb_scores)]

In [None]:
def upper_confidence_bound(history: pd.DataFrame, balance: float = 1.0):
    t = history['impressions'].sum()
    exploitation_part = history['clicks'] / (history['impressions'] + 1)
    exploration_part = np.sqrt(2 * np.log(t + 1) / (history['impressions'] + 1))

    n = np.argmax(exploitation_part + balance * exploration_part)
    return history.index[n]

In [None]:
# Создание DataFrame для хранения результатов тестирования
results_df = pd.DataFrame(columns=['C_value', 'Regret', 'Avg_Regret_per_Round'])

seed = 18475  # заданный сид
np.random.seed(seed=seed)
n_iterations = 200000

# Тестирование с разными значениями C
C_values = [0.001, 0.01, 0.1, 0.5, 1, 1.5]
for C in C_values:
    print(f"Testing with C = {C}")
    policy = partial(ucb_policy, С=C)
    output = simulation(policy, n=200000)
    print("Regret:", output['regret'])
    print("Average Regret per Round:", output['regret']/output['rounds'])

    # Добавление результатов в DataFrame
    new_row = {
    'C_value': C,
    'Regret': output['regret'],
    'Avg_Regret_per_Round': output['regret'] / output['rounds']
    }

    results_df = pd.concat([results_df, pd.DataFrame([new_row])], ignore_index=True)
    print("------------------------------------------------------")


Testing with C = 0.001
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated
Regret: 10991.08166216789
Average Regret per Round: 0.05495540831083945
------------------------------------------------------
Testing with C = 0.01
1 impressions have been simulated
10001 impressions have been 

In [None]:
results_df

Unnamed: 0,C_value,Regret,Avg_Regret_per_Round
0,0.001,10991.081662,0.054955
1,0.01,9180.312403,0.045902
2,0.1,894.174543,0.004471
3,0.5,4910.05596,0.02455
4,1.0,9240.582095,0.046203
5,1.5,8914.231413,0.044571


Только 0.1 бьет бейзлайл. Посмотрим окружение 0.1, возможно, найдутся значения лучше

In [None]:
# Создание DataFrame для хранения результатов тестирования
results_df_2 = pd.DataFrame(columns=['C_value', 'Regret', 'Avg_Regret_per_Round'])

seed = 18475  # заданный сид
np.random.seed(seed=seed)
n_iterations = 200000

# Тестирование с разными значениями C
C_values =  [0.08, 0.09, 0.1, 0.11, 0.12]
for C in C_values:
    print(f"Testing with C = {C}")
    policy = partial(ucb_policy, С=C)
    output = simulation(policy, n=200000)
    print("Regret:", output['regret'])
    print("Average Regret per Round:", output['regret']/output['rounds'])

    # Добавление результатов в DataFrame
    new_row = {
    'C_value': C,
    'Regret': output['regret'],
    'Avg_Regret_per_Round': output['regret'] / output['rounds']
    }

    results_df_2 = pd.concat([results_df_2, pd.DataFrame([new_row])], ignore_index=True)
    print("------------------------------------------------------")


Testing with C = 0.08
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated
Regret: 1463.2741292503472
Average Regret per Round: 0.007316370646251736
------------------------------------------------------
Testing with C = 0.09
1 impressions have been simulated
10001 impressions have been

In [None]:
results_df_2

Unnamed: 0,C_value,Regret,Avg_Regret_per_Round
0,0.08,1463.274129,0.007316
1,0.09,1403.322428,0.007017
2,0.1,894.174543,0.004471
3,0.11,790.430449,0.003952
4,0.12,575.545874,0.002878


Можем продолжить и посмотрим после 0.12

In [None]:
# Создание DataFrame для хранения результатов тестирования
results_df_3 = pd.DataFrame(columns=['C_value', 'Regret', 'Avg_Regret_per_Round'])

seed = 18475  # заданный сид
np.random.seed(seed=seed)
n_iterations = 200000

# Тестирование с разными значениями C
C_values =  [0.13, 0.14, 0.15]
for C in C_values:
    print(f"Testing with C = {C}")
    policy = partial(ucb_policy, С=C)
    output = simulation(policy, n=200000)
    print("Regret:", output['regret'])
    print("Average Regret per Round:", output['regret']/output['rounds'])

    # Добавление результатов в DataFrame
    new_row = {
    'C_value': C,
    'Regret': output['regret'],
    'Avg_Regret_per_Round': output['regret'] / output['rounds']
    }

    results_df_3 = pd.concat([results_df_3, pd.DataFrame([new_row])], ignore_index=True)
    print("------------------------------------------------------")


Testing with C = 0.13
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated
Regret: 853.3820805268102
Average Regret per Round: 0.004266910402634051
------------------------------------------------------
Testing with C = 0.14
1 impressions have been simulated
10001 impressions have been 

In [None]:
results_df_3

Unnamed: 0,C_value,Regret,Avg_Regret_per_Round
0,0.13,853.382081,0.004267
1,0.14,874.935961,0.004375
2,0.15,1114.057758,0.00557


После 0.12 показатель увеличивается

Итого:

C = 0.12 побил бейзлайн

Regret: 575.545 против 1540.761
