In [1]:
import numpy as np
from collections import defaultdict
import gurobipy as gp
from gurobipy import GRB
import pandas as pd

from fluid_model import fluid_model
import json

In [2]:
def read_params(file_name):
    params = json.load(open(file_name))
    for key in params:
        if isinstance(params[key], list):
            params[key] = np.array(params[key])
    params["budgets"] = [params["budgets"]] * params["T"]
    return params

In [3]:
file_name = "fu2019.json"
params = read_params(file_name)
params

{'num_actions': 2,
 'num_states': 4,
 'T': 100,
 'gamma': 0.5,
 'r': array([[-1, -1],
        [ 0,  0],
        [ 0,  0],
        [ 1,  1]]),
 'init_occupation': array([0.16666667, 0.33333333, 0.5       , 0.        ]),
 'P1': array([[0.5, 0.5, 0. , 0. ],
        [0. , 0.5, 0.5, 0. ],
        [0. , 0. , 0.5, 0.5],
        [0.5, 0. , 0. , 0.5]]),
 'P0': array([[0.5, 0. , 0. , 0.5],
        [0.5, 0.5, 0. , 0. ],
        [0. , 0.5, 0.5, 0. ],
        [0. , 0. , 0.5, 0.5]]),
 'budgets': [0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0

In [4]:
fluid = fluid_model(params)
fluid.solve()

Using license file /Users/xiangyuzhang/gurobi.lic
Academic license - for non-commercial use only
Gurobi Optimizer version 9.0.2 build v9.0.2rc0 (mac64)
Optimize a model with 500 rows, 800 columns and 2784 nonzeros
Model fingerprint: 0xc7af7caa
Coefficient statistics:
  Matrix range     [5e-01, 1e+00]
  Objective range  [2e-30, 1e+00]
  Bounds range     [0e+00, 0e+00]
  RHS range        [2e-01, 5e-01]
Presolve removed 5 rows and 10 columns
Presolve time: 0.01s
Presolved: 495 rows, 790 columns, 2758 nonzeros

Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    1.0107583e+00   4.992855e+02   0.000000e+00      0s
Extra 33 simplex iterations after uncrush
     426    5.5555079e-02   0.000000e+00   0.000000e+00      0s

Solved in 426 iterations and 0.02 seconds
Optimal objective  5.555507872e-02


In [5]:
whittle = fluid.deterministic_simulate_index((2, 1, 0, 3))

print(
f"""
Fluid-balance reward per arm: {fluid.model.objVal}
Whittle reward per arm: {whittle},
Fluid-balance beats Whittle by {(fluid.model.objVal - whittle) / whittle * 100}%.
"""
)


Fluid-balance reward per arm: 0.05555507872379466
Whittle reward per arm: 0.04166666667224994,
Fluid-balance beats Whittle by 33.33218891924085%.



In [6]:
print(
"Check whether this problem is indexable\n",
    fluid.is_feasible([False, False, True, False], 2), "\n",
    fluid.is_feasible([False, True, True, False], 1), "\n",
    fluid.is_feasible([True, True, True, False], 0), "\n",
    fluid.is_feasible([True, True, True, True], 3)
)

Check whether this problem is indexable
 (True, 0.39999999999999997) 
 (True, 0.25) 
 (True, -0.25) 
 (True, -0.4)


In [9]:
n, m = 1200, 1200
reward = fluid.simulate_index((2, 1, 0, 3), n, m)

100%|██████████| 1200/1200 [00:30<00:00, 39.48it/s]


In [10]:
np.mean(reward / n), np.std(reward / n) / np.sqrt(m)

(0.04013724111193085, 0.00036360062385718174)