In [1]:
include("../../MCVI_Julia/src/Planner.jl")
using Plots
using RockSample

## 1. First prepare the POMDP and some basic parameters

In [2]:
pomdp = RockSamplePOMDP(7,8)

action_space = actions(pomdp)
obs_space = observations(pomdp)

nb_particles_b0 = 500
max_node_size = 10000

b0 = initialstate(pomdp)
b0_particles = []
for i in 1:nb_particles_b0
    push!(b0_particles, rand(b0))
end

## 2. Prepare $V_{mdp}$ heuristic

In [3]:
# define Q learning
Q_table = Dict{Any, Dict{Int64, Float64}}()
learning_rate = 0.9
explore_rate = 0.65
nb_particles_b0 = 10000

b0_particles = []
for i in 1:nb_particles_b0
    push!(b0_particles, rand(b0))
end
Q_learning_policy = Qlearning(Q_table, learning_rate, explore_rate, action_space)
RL = FindRLower(pomdp, b0, action_space)

nb_episode_size = 5
nb_max_episode = 5
nb_sim = 20
epsilon_Q_learning = 0.01
Training(Q_learning_policy, nb_episode_size, nb_max_episode, nb_sim, epsilon_Q_learning, b0_particles, pomdp)

------ Episode: 0 ------
Avg Value: 28.438347968333765
------ Episode: 1 ------
Avg Value: 29.9857801540695
------ Episode: 2 ------
Avg Value: 29.985780159126296


## 3. Create initial FSC and belief Tree Node

In [4]:
a, U = EvaluateUpperBound(b0_particles, Q_learning_policy)
root_b_tree_node = BeliefTreeNode(b0_particles, Dict{Pair{Any, Any}, BeliefTreeNode}(), a, Dict{Any, Float64}(),U, RL, -1)
fsc = InitFSC(max_node_size, action_space, obs_space)

FSC(Dict{Pair{Any, Int64}, Int64}[Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict()  …  Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict()], FscNode[], 1:13, 1:3)

## 4. MCVI planning

In [5]:
MCVIPlanning(b0_particles, fsc, pomdp, RL, 20, 200, 0.1, 100, Q_learning_policy, root_b_tree_node)

--- Iter 1 ---
Belief Expand Process
BackUp Process
nI_new 2, V 0.0
nI_new 3, V 0.0
nI_new 4, V 0.0
nI_new 5, V 0.0
nI_new 6, V 0.0
nI_new 7, V 0.0
nI_new 8, V 0.0
nI_new 9, V 0.0
nI_new 10, V 0.0
nI_new 11, V 0.0
nI_new 12, V 0.0
nI_new 13, V 0.0
nI_new 14, V 0.0
nI_new 15, V 0.0
nI_new 16, V 0.0
nI_new 17, V 0.0
nI_new 18, V 0.0
nI_new 19, V 0.0
nI_new 20, V 0.0
nI_new 1, V 0.0
nI_new 2, V 0.1315903044541932
nI_new 3, V 0.8346872129460298
nI_new 4, V 0.9911230726117672
nI_new 5, V 1.52
nI_new 6, V 1.1996786735533098
nI_new 7, V 0.9555599236469081
nI_new 8, V 1.52
nI_new 9, V 0.9598194484046978
nI_new 10, V 0.8345828976133454
nI_new 11, V 1.6048562902457588
nI_new 12, V 0.9978484175009621
nI_new 13, V 0.9689836474758076
nI_new 14, V 7.350918906250022
nI_new 15, V 1.106163940276183
nI_new 16, V 1.0636428546890944
nI_new 17, V 1.111611542279709
nI_new 18, V 2.185
nI_new 19, V 1.218812284448911
nI_new 20, V 1.1298947260579026
nI_new 1, V 5.099125
nI_new 2, V 0.0
nI_new 3, V 1.0925
nI_new

LoadError: InterruptException:

## 4. Evaluation

In [6]:
SimulationWithFSC(b0, pomdp, fsc, 40) # Simulate One Time

---------
step: 1
state:RSState{8}([1, 1], Bool[1, 0, 0, 0, 1, 1, 1, 0])
perform action:1
recieve obs:3
nI:1
nI value:5.969817673562494
reward:0.0
---------
step: 2
state:RSState{8}([1, 1], Bool[1, 0, 0, 0, 1, 1, 1, 0])
perform action:2
recieve obs:3
nI:194
nI value:6.8351991037207185
reward:0.0
---------
step: 3
state:RSState{8}([1, 2], Bool[1, 0, 0, 0, 1, 1, 1, 0])
perform action:8
recieve obs:2
nI:193
nI value:7.378708245084101
reward:0.0
---------
step: 4
state:RSState{8}([1, 2], Bool[1, 0, 0, 0, 1, 1, 1, 0])
perform action:10
recieve obs:1
nI:19
nI value:7.171293543326482
reward:0.0
---------
step: 5
state:RSState{8}([1, 2], Bool[1, 0, 0, 0, 1, 1, 1, 0])
perform action:1
recieve obs:3
nI:101
nI value:10.0
reward:10.0
---------
step: 6
state:RSState{8}([1, 2], Bool[1, 0, 0, 0, 0, 1, 1, 0])
perform action:1
recieve obs:3
nI:1
nI value:5.969817673562494
reward:-10.0
---------
step: 7
state:RSState{8}([1, 2], Bool[1, 0, 0, 0, 0, 1, 1, 0])
perform action:2
recieve obs:3
nI:194
nI value

In [7]:
EvaluationWithSimulationFSC(b0, pomdp, fsc, discount(pomdp), 10000)

sum_r:0.4324435747528143
