In [5]:
from scipy.stats import beta
import numpy as np
from np.
import collections
import time

def I(a, b):
    return beta.cdf(1/2, a, b)

def h(x):
    return max(x, 1 - x)

def R1(a, b):
    return h(I(a + 1, b)) - h(I(a, b))

def R2(a, b):
    return h(I(a, b + 1)) - h(I(a, b))

def KG_score(a, b):
    return a / (a + b) * R1(a, b) + b / (a + b) * R2(a, b)

def KG_policy(T, alpha):
    return Priority_policy(T=T, score=KG_score, alpha=alpha, initial=collections.defaultdict(float, {(0, 0): 1.0}))


In [25]:
def reward(a, b):
    if a < b:
        return beta.cdf(1/2, a + 1, b + 1)
    return 1 - beta.cdf(1/2, a + 1, b + 1)

def simulate(T, alpha, N, M, score):
    state_score = {(i, j): score(i + 1, j + 1) for i in range(T + 1) for j in range(T + 1)}
    start = time.time()
    rewards = []
    for _ in range(M):
        curr_state = {(0, 0): N}
        for __ in range(T):
            next_state = collections.defaultdict(int)
            budget = int(alpha * N)
            sorted_state = sorted(curr_state.keys(), key=lambda x: state_score[x], reverse=True)
            for s in sorted_state:
                pulled = min(curr_state[s], budget)
                budget = budget - pulled
                a, b = s
                tmp = np.random.binomial(pulled, (a + 1) / (a + b + 2))
                next_state[(a + 1, b)] += tmp
                next_state[(a, b + 1)] += pulled - tmp
                next_state[(a, b)] += curr_state[s] - pulled
            curr_state = next_state
        rewards.append(sum([curr_state[s] * reward(s[0], s[1]) for s in curr_state]))
    end = time.time()
    mean, std = np.mean(rewards), np.std(rewards) / np.sqrt(M)
    return mean, std


def simulate_benchmark(T, alpha, N, M):
    start = time.time()
    rewards = []
    for _ in range(M):
        curr_state = {(0, 0): N}
        for __ in range(T):
            next_state = collections.defaultdict(int)
            budget = int(alpha * N)
            sorted_state = curr_state.keys()
            for s in sorted_state:
                pulled = min(curr_state[s], budget)
                budget = budget - pulled
                a, b = s
                tmp = np.random.binomial(pulled, (a + 1) / (a + b + 2))
                next_state[(a + 1, b)] += tmp
                next_state[(a, b + 1)] += pulled - tmp
                next_state[(a, b)] += curr_state[s] - pulled
            curr_state = next_state
        rewards.append(sum([curr_state[s] * reward(s[0], s[1]) for s in curr_state]))
    end = time.time()
    mean, std = np.mean(rewards), np.std(rewards) / np.sqrt(M)
    return mean, std


In [26]:
cProfile.run("simulate_benchmark(T=6, alpha=1/4, N=100, M=400)")

         2478842 function calls (2344442 primitive calls) in 3.774 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
    11200    0.011    0.000    0.215    0.000 <__array_function__ internals>:2(any)
    11200    0.011    0.000    0.174    0.000 <__array_function__ internals>:2(atleast_1d)
    33600    0.038    0.000    1.027    0.000 <__array_function__ internals>:2(extract)
        1    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(mean)
    33600    0.026    0.000    0.143    0.000 <__array_function__ internals>:2(nonzero)
    33600    0.034    0.000    0.249    0.000 <__array_function__ internals>:2(place)
    67200    0.052    0.000    0.428    0.000 <__array_function__ internals>:2(ravel)
    11200    0.014    0.000    0.051    0.000 <__array_function__ internals>:2(shape)
        1    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(std)
    33600    0.028    0.000    0.215    0.00

In [27]:
cProfile.run("simulate(T=6, alpha=1/4, N=100, M=400, score=KG_score)")

         2546714 function calls (2409962 primitive calls) in 3.736 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
    11396    0.010    0.000    0.210    0.000 <__array_function__ internals>:2(any)
    11396    0.011    0.000    0.167    0.000 <__array_function__ internals>:2(atleast_1d)
    34188    0.036    0.000    1.035    0.000 <__array_function__ internals>:2(extract)
        1    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(mean)
    34188    0.025    0.000    0.139    0.000 <__array_function__ internals>:2(nonzero)
    34188    0.033    0.000    0.275    0.000 <__array_function__ internals>:2(place)
    68376    0.051    0.000    0.415    0.000 <__array_function__ internals>:2(ravel)
    11396    0.014    0.000    0.048    0.000 <__array_function__ internals>:2(shape)
        1    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(std)
    34188    0.027    0.000    0.247    0.00