In [1]:
# activate project environment
# include these lines of code in any future scripts/notebooks
#---
import Pkg
if !haskey(Pkg.installed(), "AA228FinalProject")
    jenv = joinpath(dirname(@__FILE__()), "..") # this assumes the notebook is in the same dir
    # as the Project.toml file, which should be in top level dir of the project. 
    # Change accordingly if this is not the case.
    Pkg.activate(jenv)
end
#---

# import necessary packages
using AA228FinalProject
using POMDPs
using POMDPPolicies
using BeliefUpdaters
using ParticleFilters
using POMDPSimulators
using Cairo
using Gtk
using Random
using Printf
using POMDPModels
using POMDPSimulators
using QMDP

┌ Info: Loading Cairo backend into Compose.jl
└ @ Compose C:\Users\zacfa\.julia\packages\Compose\BYWXX\src\Compose.jl:161
│ - If you have Compose checked out for development and have
│   added Cairo as a dependency but haven't updated your primary
│   environment's manifest file, try `Pkg.resolve()`.
│ - Otherwise you may need to report an issue with Compose
└ @ nothing nothing:837


In [2]:
sensor = Bumper() # or Bumper() for the bumper version of the environment
config = 1 # 1,2, or 3
vlist = [3.0]
omlist = [-0.5,-0.25,0,0.25,0.5]
aspace = vec(collect(RoombaAct(v, om) for v in vlist, om in omlist))

num_x_pts = 100
num_y_pts = 100
num_th_pts = 20
sspace = DiscreteRoombaStateSpace(num_x_pts,num_y_pts,num_th_pts)

m = RoombaPOMDP(sensor=sensor, mdp=RoombaMDP(config=config, aspace=aspace, sspace=sspace))

RoombaPOMDP{Bumper,Bool}(Bumper(), RoombaMDP{DiscreteRoombaStateSpace,Array{RoombaAct,1}}
  v_max: Float64 10.0
  om_max: Float64 1.0
  dt: Float64 0.5
  contact_pen: Float64 -1.0
  time_pen: Float64 -0.1
  goal_reward: Float64 10.0
  stairs_penalty: Float64 -10.0
  config: Int64 1
  sspace: DiscreteRoombaStateSpace
  room: AA228FinalProject.Room
  aspace: Array{RoombaAct}((5,))
  _amap: Dict{RoombaAct,Int64}
)

In [3]:
num_particles = 2000
resampler = BumperResampler(num_particles)

spf = SimpleParticleFilter(m, resampler)

v_noise_coefficient = 2.0
om_noise_coefficient = 0.5

belief_updater = RoombaParticleFilter(spf, v_noise_coefficient, om_noise_coefficient);

### Define a QMDP Policy

In [4]:
# initialize a solver and compute a policy
solver = QMDPSolver(max_iterations=300,
                    tolerance=1e-3,
                    verbose=true) # from QMDP
QMDPPolicy = solve(solver, m)

[Iteration 1   ] residual:       11.1 | iteration runtime:  47748.492 ms, (      47.7 s total)
[Iteration 2   ] residual:       10.5 | iteration runtime:  53038.616 ms, (       101 s total)
[Iteration 3   ] residual:         10 | iteration runtime:  54566.421 ms, (       155 s total)
[Iteration 4   ] residual:       8.66 | iteration runtime:  52812.303 ms, (       208 s total)
[Iteration 5   ] residual:       7.21 | iteration runtime:  53571.129 ms, (       262 s total)
[Iteration 6   ] residual:       6.85 | iteration runtime:  52513.403 ms, (       314 s total)
[Iteration 7   ] residual:        6.5 | iteration runtime:  55008.501 ms, (       369 s total)
[Iteration 8   ] residual:       6.18 | iteration runtime:  54931.588 ms, (       424 s total)
[Iteration 9   ] residual:       5.87 | iteration runtime:  43149.698 ms, (       467 s total)
[Iteration 10  ] residual:       5.58 | iteration runtime:  43071.539 ms, (       510 s total)
[Iteration 11  ] residual:        5.3 | iteration 

[Iteration 88  ] residual:    0.00115 | iteration runtime:  48634.612 ms, (  4.51E+03 s total)
[Iteration 89  ] residual:     0.0011 | iteration runtime:  49672.430 ms, (  4.56E+03 s total)
[Iteration 90  ] residual:    0.00104 | iteration runtime:  49826.191 ms, (  4.61E+03 s total)
[Iteration 91  ] residual:   0.000989 | iteration runtime:  49259.648 ms, (  4.66E+03 s total)


AlphaVectorPolicy{RoombaPOMDP{Bumper,Bool},RoombaAct}(RoombaPOMDP{Bumper,Bool}(Bumper(), RoombaMDP{DiscreteRoombaStateSpace,Array{RoombaAct,1}}
  v_max: Float64 10.0
  om_max: Float64 1.0
  dt: Float64 0.5
  contact_pen: Float64 -1.0
  time_pen: Float64 -0.1
  goal_reward: Float64 10.0
  stairs_penalty: Float64 -10.0
  config: Int64 1
  sspace: DiscreteRoombaStateSpace
  room: AA228FinalProject.Room
  aspace: Array{RoombaAct}((5,))
  _amap: Dict{RoombaAct,Int64}
), Array{Float64,1}[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,

### Define a policy

Here we demonstrate how to define a naive policy that attempts navigate the Roomba to the goal. The heuristic policy we define here first spins around for 25 time-steps in order to perform localization, then follows a simple proprtional control law that navigates the robot in the direction of the goal state (note that this policy fails if there is a wall in the way).

First we create a struct that subtypes the Policy abstract type, defined in the package ```POMDPPolicies.jl```. Here, we can also define certain parameters, such as a variable tracking the current time-step.

Next, we define a function that can take in our policy and the belief state and return the desired action. We do this by defining a new ```POMDPs.action``` function that will work with our policy. 

In [5]:
# Define the policy to test
mutable struct ToEnd <: Policy
    ts::Int64 # to track the current time-step.
end

# extract goal for heuristic controller
goal_xy = get_goal_xy(m)
print(goal_xy)

# define a new function that takes in the policy struct and current belief,
# and returns the desired action
function POMDPs.action(p::ToEnd, b::ParticleCollection{RoombaState})
    p.ts += 1
#     if length(particles(b)) == 0
#         return action(QMDPPolicy, uniform_belief(m))
#     end
    if AA228FinalProject.wall_contact(m,particles(b)[1])
        return RoombaAct(3.0,-pi)
    end

    a = action(QMDPPolicy, b)
    return a
end

[14.9495, 0.0505051]

### Simulation and rendering

Here, we will demonstrate how to seed the environment, run a simulation, and render the simulation. To render the simulation, we use the ```Gtk``` package. 

The simulation is carried out using the ```stepthrough``` function defined in the package ```POMDPSimulators.jl```. During a simulation, a window will open that renders the scene. It may be hidden behind other windows on your desktop.

In [7]:
# first seed the environment
Random.seed!(9)

# reset the policy
p = ToEnd(0) # here, the argument sets the time-steps elapsed to 0
# for (t, step) in enumerate(stepthrough(m, p, belief_updater, max_steps=100))
#     print("hi")
# end
# run the simulation
c = @GtkCanvas()
win = GtkWindow(c, "Roomba Environment", 600, 600)
for (t, step) in enumerate(stepthrough(m, p, belief_updater, max_steps=100))
    @guarded draw(c) do widget
        
        # the following lines render the room, the particles, and the roomba
        ctx = getgc(c)
        set_source_rgb(ctx,1,1,1)
        paint(ctx)
        render(ctx, m, step)
        
        # render some information that can help with debugging
        # here, we render the time-step, the state, and the observation
        move_to(ctx,300,400)
        show_text(ctx, @sprintf("t=%d, state=%s, o=%.3f",t,string(step.s),step.o))
    end
    show(c)
    sleep(0.1) # to slow down the simulation
end

### Evaluation 

Here, we demonstate a simple evaluation of the policy's performance for a few random seeds. This is meant to serve only as an example, and we encourage you to develop your own evaluation metrics.

We intialize the robot using five different random seeds, and simulate its performance for 100 time-steps. We then sum the rewards experienced during its interaction with the environment and track this total reward for the five trials.
Finally, we report the mean and standard error for the total reward. The standard error is the standard deviation of a sample set divided by the square root of the number of samples, and represents the uncertainty in the estimate of the mean value.

In [9]:
using Statistics

total_rewards = []
num_success = 0
num_seeds = 100

for exp = 1:num_seeds
    println(string(exp))
    
    Random.seed!(exp+30)
    #srand(exp)
    
    p = ToEnd(0)
    traj_rewards = 0
    for step in stepthrough(m,p,belief_updater, max_steps=100)
        traj_rewards += step.r
        if step.r > 5
            println("reached goal")
            num_success += 1
            push!(total_rewards, traj_rewards)
            break
        end
    end
#     traj_rewards = sum([step.r for step in stepthrough(m,p,belief_updater, max_steps=300)])
    
#     push!(total_rewards, traj_rewards)
end

success_rate = (num_success*1.0)/num_seeds
mtr = mean(total_rewards)
score = success_rate*success_rate*mtr
@printf("Percent that reached goal: %.3f%%", success_rate*100)
println()
@printf("Mean Total Reward: %.3f", mtr)
println()
x = 
@printf("Score: %.3f", score)

1
reached goal
2
reached goal
3
reached goal
4
reached goal
5
reached goal
6
reached goal
7
reached goal
8
9
reached goal
10
reached goal
11
reached goal
12
reached goal
13
reached goal
14
reached goal
15
reached goal
16
reached goal
17
reached goal
18
reached goal
19
reached goal
20
21
reached goal
22
reached goal
23
reached goal
24
reached goal
25
reached goal
26
reached goal
27
reached goal
28
reached goal
29
reached goal
30
reached goal
31
32
33
reached goal
34
reached goal
35
reached goal
36
reached goal
37
reached goal
38
reached goal
39
reached goal
40
reached goal
41
reached goal
42
reached goal
43
reached goal
44
reached goal
45
reached goal
46
reached goal
47
reached goal
48
reached goal
49
reached goal
50
reached goal
51
reached goal
52
reached goal
53
reached goal
54
55
reached goal
56
reached goal
57
58
reached goal
59
reached goal
60
reached goal
61
reached goal
62
reached goal
63
reached goal
64
reached goal
65
reached goal
66
reached goal
67
reached goal
68
reached goal