In [1]:
include("../../MCVI_Julia/src/Planner.jl")
using Plots
using RockSample

## 1. First prepare the POMDP and some basic parameters

In [8]:
pomdp = RockSamplePOMDP(7,8)

action_space = actions(pomdp)
obs_space = observations(pomdp)

nb_particles_b0 = 500
max_node_size = 10000

b0 = initialstate(pomdp)
b0_particles = []
for i in 1:nb_particles_b0
    push!(b0_particles, rand(b0))
end

## 2. Prepare $V_{mdp}$ heuristic

In [3]:
# define Q learning
Q_table = Dict{Any, Dict{Int64, Float64}}()
learning_rate = 0.9
explore_rate = 0.65
nb_particles_b0 = 10000

b0_particles = []
for i in 1:nb_particles_b0
    push!(b0_particles, rand(b0))
end
Q_learning_policy = Qlearning(Q_table, learning_rate, explore_rate, action_space)
RL = FindRLower(pomdp, b0, action_space)

nb_episode_size = 10
nb_max_episode = 10
nb_sim = 20
epsilon_Q_learning = 0.01
Training(Q_learning_policy, nb_episode_size, nb_max_episode, nb_sim, epsilon_Q_learning, b0_particles, pomdp)

------ Episode: 0 ------
Avg Value: 27.93918380177924
------ Episode: 1 ------
Avg Value: 29.41656854234094
------ Episode: 2 ------
Avg Value: 29.449905618492956
------ Episode: 3 ------
Avg Value: 29.481641523734474
------ Episode: 4 ------
Avg Value: 29.573154720548256
------ Episode: 5 ------


LoadError: InterruptException:

## 3. Create initial FSC and belief Tree Node

In [4]:
a, U = EvaluateUpperBound(b0_particles, Q_learning_policy)
root_b_tree_node = BeliefTreeNode(b0_particles, Dict{Pair{Any, Any}, BeliefTreeNode}(), a, Dict{Any, Float64}(),U, RL, -1)
fsc = InitFSC(max_node_size, action_space, obs_space)

FSC(Dict{Pair{Any, Int64}, Int64}[Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict()  …  Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict()], FscNode[], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], 1:3)

## 4. MCVI planning

In [5]:
MCVIPlanning(b0_particles, fsc, pomdp, RL, 20, 50, 0.1, 100, Q_learning_policy, root_b_tree_node)

--- Iter 1 ---
Belief Expand Process
BackUp Process
nI_new 2, V 0.0
nI_new 3, V 0.0
nI_new 4, V 0.0
nI_new 5, V 0.0
nI_new 6, V 0.0
nI_new 7, V 0.0
nI_new 8, V 0.0
nI_new 9, V 0.0
nI_new 10, V 0.0
nI_new 11, V 0.0
nI_new 12, V 0.0
nI_new 13, V 0.0
nI_new 14, V 0.0
nI_new 15, V 0.0
nI_new 16, V 0.0
nI_new 17, V 0.0
nI_new 18, V 0.0
nI_new 19, V 0.0
nI_new 20, V 0.0
nI_new 1, V 0.0
--- Iter 2 ---
Belief Expand Process
BackUp Process
nI_new 21, V 0.0
nI_new 22, V 0.0
nI_new 23, V 0.0
nI_new 24, V 0.0
nI_new 25, V 0.0
nI_new 26, V 0.0
nI_new 27, V 0.0
nI_new 28, V 0.0
nI_new 29, V 0.0
nI_new 30, V 0.0
nI_new 31, V 0.0
nI_new 32, V 0.0
nI_new 33, V 0.0
nI_new 34, V 0.0
nI_new 35, V 0.0
nI_new 36, V 0.0
nI_new 37, V 0.0
nI_new 38, V 0.0
nI_new 39, V 0.0
nI_new 1, V 0.25083909253383707
--- Iter 3 ---
Belief Expand Process
BackUp Process
nI_new 40, V 0.0
nI_new 41, V 0.0
nI_new 42, V 0.0
nI_new 43, V 0.0
nI_new 44, V 0.0
nI_new 45, V 0.0
nI_new 46, V 0.0
nI_new 47, V 0.0
nI_new 48, V 0.0
nI_ne

LoadError: InterruptException:

## 4. Evaluation

In [6]:
SimulationWithFSC(b0, pomdp, fsc, 40) # Simulate One Time

---------
step: 1
state:RSState{8}([1, 1], Bool[1, 0, 1, 1, 0, 0, 1, 0])
perform action:8
recieve obs:1
nI:1
nI value:15.227888176704287
reward:0.0
---------
step: 2
state:RSState{8}([1, 1], Bool[1, 0, 1, 1, 0, 0, 1, 0])
perform action:2
recieve obs:3
nI:76
nI value:14.054724631841173
reward:0.0
---------
step: 3
state:RSState{8}([1, 2], Bool[1, 0, 1, 1, 0, 0, 1, 0])
perform action:3
recieve obs:3
nI:177
nI value:16.166591671356695
reward:0.0
---------
step: 4
state:RSState{8}([2, 2], Bool[1, 0, 1, 1, 0, 0, 1, 0])
perform action:3
recieve obs:3
nI:165
nI value:15.869346294486713
reward:0.0
---------
step: 5
state:RSState{8}([3, 2], Bool[1, 0, 1, 1, 0, 0, 1, 0])
perform action:8
recieve obs:1
nI:147
nI value:14.463179583781244
reward:0.0
---------
step: 6
state:RSState{8}([3, 2], Bool[1, 0, 1, 1, 0, 0, 1, 0])
perform action:1
recieve obs:3
nI:143
nI value:14.439771223906241
reward:10.0
---------
step: 7
state:RSState{8}([3, 2], Bool[1, 0, 0, 1, 0, 0, 1, 0])
perform action:3
recieve obs:

In [9]:
EvaluationWithSimulationFSC(b0, pomdp, fsc, discount(pomdp), 10000)

sum_r:0.0013425810297764595
