In [1]:
using OPCSPs
using POMCP
using POMDPToolbox
using POMDPs

In [2]:
N = 100
problems = Array(Any, N)
naive_rewards = Array(Float64, N)
feedback_rewards = Array(Float64, N)
cheat_rewards = Array(Float64, N)
mcts_rewards = Array(Float64, N);

In [3]:
r = [0, 10.1, 5, 5, 5, 0]
cov = Float64[ 0 0 0 0 0 0; 0 0 0 0 0 0; 0 0 2 2 -2 0; 0 0 2 4 0 0; 0 0 -2 0 4 0; 0 0 0 0 0 0]
positions = Vector{Float64}[[0, 0], [0,-1.71], [0,1], [1,1], [-1,1], [0,0]];
rng = MersenneTwister(1);

In [4]:
for j in 1:N
    p = problems[j] = gen_opcsp(r, positions, cov, 3.43, 1, 6, rng=rng)
    cheat_rewards[j] = reward(p, cheat(p))
    naive_rewards[j] = reward(p, solve_op(p))
    feedback_rewards[j] = reward(p, solve_opcsp_feedback(p))

    solver = POMCPSolver(rollout_policy=SolveMeanFeedback(p),
                         c=sum(p.r),
                         tree_queries=50,
                         rng=MersenneTwister(j),
                         updater=OPCSPUpdater(p)
                        )
    policy = solve(solver, p)
    sim = HistoryRecorder(rng=MersenneTwister(1))
    u = updater(policy)
    ib = convert_belief(u, initial_belief(p))
    simulate(sim, p, policy, u, ib)
    path = Int[s.i for s in sim.state_hist]
    mcts_rewards[j] = reward(p, path)
end

In [6]:
@show mean(cheat_rewards)
@show mean(naive_rewards)
@show mean(feedback_rewards)
@show mean(mcts_rewards);

mean(cheat_rewards) = 11.438143114708593
mean(naive_rewards) = 10.099999999999998
mean(feedback_rewards) = 10.099999999999998
mean(mcts_rewards) = 11.438143114708593
