In [1]:
include("../../MCVI_Julia/src/Planner.jl")
using Plots
using RockSample

## 1. First prepare the POMDP and some basic parameters

In [2]:
pomdp = RockSamplePOMDP(7,8)

action_space = actions(pomdp)
obs_space = observations(pomdp)

nb_particles_b0 = 500
max_node_size = 10000

b0 = initialstate(pomdp)
b0_particles = []
for i in 1:nb_particles_b0
    push!(b0_particles, rand(b0))
end

## 2. Prepare $V_{mdp}$ heuristic

In [3]:
# define Q learning
Q_table = Dict{Any, Dict{Int64, Float64}}()
learning_rate = 0.9
explore_rate = 0.65
nb_particles_b0 = 10000

b0_particles = []
for i in 1:nb_particles_b0
    push!(b0_particles, rand(b0))
end
Q_learning_policy = Qlearning(Q_table, learning_rate, explore_rate, action_space)
RL = FindRLower(pomdp, b0, action_space)

nb_episode_size = 5
nb_max_episode = 5
nb_sim = 20
epsilon_Q_learning = 0.01
Training(Q_learning_policy, nb_episode_size, nb_max_episode, nb_sim, epsilon_Q_learning, b0_particles, pomdp)

------ Episode: 0 ------
Avg Value: 30.37761804170949
------ Episode: 1 ------
Avg Value: 31.747580182056407
------ Episode: 2 ------
Avg Value: 31.753876747761627


## 3. Create initial FSC and belief Tree Node

In [4]:
a, U = EvaluateUpperBound(b0_particles, Q_learning_policy)
root_b_tree_node = BeliefTreeNode(b0_particles, Dict{Pair{Any, Any}, BeliefTreeNode}(), a, Dict{Any, Float64}(),U, RL, -1)
fsc = InitFSC(max_node_size, action_space, obs_space)

FSC(Dict{Pair{Any, Any}, Int64}[Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict()  …  Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict()], FscNode[], 1:13, 1:3, 1)

## 4. MCVI planning

In [None]:
MCVIPlanning(b0_particles, fsc, pomdp, RL, 30, 200, 0.1, 30, Q_learning_policy, root_b_tree_node)

--- Iter 1 ---
Tr_root upper bound:31.76204866367206
Tr_root lower bound:0.0
Belief Expand Process
BackUp Process
--- Iter 2 ---
Tr_root upper bound:31.76204866367206
Tr_root lower bound:8.061863745304231
Belief Expand Process
BackUp Process
--- Iter 3 ---
Tr_root upper bound:31.76204866367206
Tr_root lower bound:7.701389207614878
Belief Expand Process
BackUp Process
--- Iter 4 ---
Tr_root upper bound:31.76204866367206
Tr_root lower bound:7.643257963315218
Belief Expand Process
BackUp Process
--- Iter 5 ---
Tr_root upper bound:31.76204866367206
Tr_root lower bound:7.729216772485968
Belief Expand Process
BackUp Process
--- Iter 6 ---
Tr_root upper bound:31.76204866367206
Tr_root lower bound:9.625840249923861
Belief Expand Process
BackUp Process
--- Iter 7 ---
Tr_root upper bound:31.76204866367206
Tr_root lower bound:9.887235045841415
Belief Expand Process
BackUp Process
--- Iter 8 ---
Tr_root upper bound:31.76204866367206
Tr_root lower bound:12.841460083405593
Belief Expand Process
Back

## 4. Evaluation

In [None]:
SimulationWithFSC(b0, pomdp, fsc, 20) # Simulate One Time

In [None]:
EvaluationWithSimulationFSC(b0, pomdp, fsc, discount(pomdp), 10000)

In [None]:
EvaluationWithSimulationFSC(b0, pomdp, fsc, discount(pomdp), 1000, 50)

In [None]:
fsc._nodes[1]

In [None]:
fsc._eta