# Шахвалиева Юлиана

### Импорт необходимых библиотек

In [1]:
import numpy as np
import pandas as pd
import time
from functools import partial

from scipy.stats import randint, uniform
from sim_lib import simulation
from sklearn.model_selection import ParameterGrid

pd.options.mode.chained_assignment = None

### Baseline

In [2]:
def eps_greedy(history: pd.DataFrame, eps: float):
    if uniform.rvs() < eps:
        n = history.shape[0]
        return history.index[randint.rvs(0, n)]

    ctr = history['clicks'] / (history['impressions'] + 10)
    n = np.argmax(ctr)
    return history.index[n]

policy = partial(eps_greedy, eps=0.06)

In [3]:
# seed for homework
seed = 18475
np.random.seed(seed=seed)

start = time.time()
output = simulation(policy, n=200000, seed=seed)
end = time.time()
end - start

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


371.93202924728394

In [4]:
# baseline regret
output['regret'], output['regret']/output['rounds'],  output['total_banners']

(1540.7609683932544, 0.007703804841966272, 184)

In [5]:
output['history']

Unnamed: 0,impressions,clicks,lifetime,p
153,18970.0,4199.0,18003.025431,0.220134
162,228.0,26.0,1537.166719,0.11378
172,163.0,32.0,19648.592394,0.219968
173,170.0,18.0,12771.47499,0.122694
180,26.0,3.0,4655.819793,0.020061
182,6.0,0.0,889.624649,0.004621
183,1.0,0.0,15187.163761,0.073886


### Моя модель

In [33]:
# Функция Tompson Sampling, удовлетворяющая policy спецификации
def tompson_sampling(history: pd.DataFrame, alpha: float, beta: float):
    
    # Инициализация параметров для дальнейшего семплирования
    a = history['clicks'] + alpha
    b = history['impressions'] + beta
    
    # Сэмплирование из бета распределения 
    sample = np.random.beta(a, b)
    
    # Индекс максимального числа
    n = np.argmax(sample)
    
    return history.index[n]

In [34]:
# Функция для тестирования политики с конкретными гиперпараметрами альфа и бета
def get_mean_regret(alpha, beta):
    # Инициализация политики
    policy = partial(tompson_sampling, alpha=alpha, beta=beta)
    
    # Фиксация seed
    seed = 18475
    np.random.seed(seed=seed)

    # Запуск симуляции
    start = time.time()
    output = simulation(policy, n=200000, seed=seed)
    end = time.time()
    
    # Вывод результатов в консоль 
    regret = output['regret']
    print(f'\nRegret: \t\t{regret}')
    mean_regret = output['regret']/output['rounds']
    print(f'Mean regret: \t\t{mean_regret}')
    print(f'Spent time (min): \t{(end - start) / 60}')
    
    return regret, mean_regret

In [31]:
# Функция для форматирования текста
def get_pretty_title(text):
    return ('\033[1m' + text + '\033[0m').center(55)

# Функция для форматирования вывода в консоль
def pretty_print(text, values):
    print(f'\n\n{get_pretty_title(text)}', values, sep='\n\n')

In [32]:
# Фунуция для подбора гиперпараметров
def tuning(parameters):
    # Включение таймера
    start = time.time()
    
    # Инициализация grid search   
    grid_search = ParameterGrid(parameters)
    
    # Установка начальных значений для последующего перебора
    best_mean_regret = np.inf
    best_alpha = None
    best_beta = None
    best_regret = None

    # Подбор гиперпараметров
    for i, parameter in enumerate(grid_search):
        alpha = parameter['alpha']
        beta = alpha * parameter['beta_decay']
    
        params = f'Alpha: \t{alpha}\nBeta: \t{beta}\n'
        pretty_print(f'Round ({i + 1}/{len(grid_search)}):', params)
        regret, mean_regret = get_mean_regret(alpha, beta)
    
        if mean_regret < best_mean_regret:
            best_mean_regret = mean_regret
            best_alpha = alpha
            best_beta = beta
            best_regret = regret
    
    # Остановка таймера
    end = time.time()
    spent_time = round((end - start) / 60, 2)
    
    # Вывод подобранных параметров
    best_parameter = f'Alpha: \t{best_alpha}\nBeta: \t{best_beta}'
    pretty_print('Best parameters:', best_parameter)
    
    # Вывод результатов
    best_result = f'Regret: \t{best_regret}\nMean regret: \t{best_mean_regret}'
    pretty_print('Best result:', best_result)
    
    # Затраченное время
    pretty_print('Spent time (min):', spent_time)  

In [36]:
# Инициализация cетки по перебору гиперпараметров 
parameters = {'alpha': [0.001, 0.01, 0.1, 0.3, 0.5, 0.8], 
              'beta_decay': [0.001, 0.01, 0.1, 0.3, 0.5, 0.8]}

# Подбор гиперпараметров
tuning(parameters)



                 [1mRound (1/36):[0m                 

Alpha: 	0.001
Beta: 	1e-06

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated

Regret: 		862.8312853805198
Mean regret: 		0.004314156426902599
Spent time (min): 	5.60490061044693


                 [1mRound (2/36):[0m      

130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated

Regret: 		825.6078179057134
Mean regret: 		0.004128039089528567
Spent time (min): 	5.580027723312378


                 [1mRound (10/36):[0m                

Alpha: 	0.01
Beta: 	0.003

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 

20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated

Regret: 		1080.3939128356974
Mean regret: 		0.005401969564178487
Spent time (min): 	5.559167591730754


                 [1mRound (19/36):[0m                

Alpha: 	0.3
Beta: 	0.0003

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 i

170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated

Regret: 		1333.4832516233396
Mean regret: 		0.006667416258116698
Spent time (min): 	5.564127782980601


                 [1mRound (27/36):[0m                

Alpha: 	0.5
Beta: 	0.05

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 i

70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated

Regret: 		1436.2774413377767
Mean regret: 		0.007181387206688884
Spent time (min): 	5.679717739423116


                 [1mRound (36/36):[0m                

Alpha: 	0.8
Beta: 	0.6400000000000001

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simul

Получилось уменьшить regret в сравнение с бейзлайном