In [1]:
using QuickPOMDPs
using POMDPTools
using Distributions
using LinearAlgebra
using MCTS

In [2]:
# Define all possible states and actions (with constraints)
# [progress, power, h2o, o2]
max_progress = 5
max_power = 100
max_h2o = 10
max_o2 = 10
p_mission_regression = 0.05
p_power_fail = 0.02
p_water_loss = 0.02
energy_per_day = 2 #energy production per day

indexed = LinearIndices((max_progress, max_power, max_h2o, max_o2))
states = 1:indexed[max_progress, max_power, max_h2o, max_o2]
actions = [CartesianIndex(1, -4, -2, 0), #make mission progress
           CartesianIndex(0, -2, 1, 0), #make water
           CartesianIndex(0, -1, -1, 1), #make oxygen
           CartesianIndex(0, -0, 0, 0)] #do nothing
initialindex = indexed[1, 1, 1, 1]
#make mission progress

initialstate = Deterministic(initialindex)

Deterministic{Int64}(1)

In [5]:
# Determine if action is out of bounds
# Associated consequences are reflected in transition/reward functions
state_maxes = [max_progress, max_power, max_h2o, max_o2]
function outofbounds(state, action)
    for i in 1:length(state)
        if ((state[i] + action[i]) < 1 || (state[i] + action[i]) > state_maxes[i])
            return true
        end
    end
    return false
end

outofbounds (generic function with 1 method)

In [37]:
# Transition function
function transition(s, a)
    svec = CartesianIndices((max_progress, max_power, max_h2o, max_o2))[s]
    svec = collect(Tuple(svec))
    a = collect(Tuple(a))
    if outofbounds(svec, a)
        svec[2] = svec[2] + energy_per_day # increment energy
        if outofbounds(svec, a)
            svec[2] = max_power
        end
        spvec = svec
    else
        svec[2] = svec[2] + energy_per_day # increment energy
        if outofbounds(svec, a)
            svec[2] = max_power
        end
        x = rand()
        if (x < 0.05)
            spvec = svec # no progress
        elseif (x < p_water_loss)
            svec[3] = 0.0 # lose all water
            spvec = svec + a
        elseif (x<(p_water_loss + p_power_fail))
            svec[2] = 0.0 # lose all energy
            spvec = svec + a
        elseif (x < (p_mission_regression + p_water_loss + p_power_fail) && svec[3] >= 1)
            svec[1] = svec[1] - 1 # backwards progress mission
            spvec = svec + a
        else 
            spvec = svec + a
        end
    end
    spvec = CartesianIndex(Tuple(spvec))
    sp = indexed[spvec]
    return Deterministic(sp)
end


transition (generic function with 1 method)

In [38]:
mission = QuickMDP(
    states = states,
    actions = actions,
    initialstate = initialstate,
    discount = 0.95,
    isterminal = s -> CartesianIndices((max_progress, max_power, max_h2o, max_o2))[s][1] == max_progress,

    transition = transition,

    reward = function (s, a)
        svec = CartesianIndices((max_progress, max_power, max_h2o, max_o2))[s]
        spvec = svec + a
        if svec[1] == max_progress
            r = 1000
        elseif outofbounds(svec, a) # If invalid action
            r = -1e9
        else
            r = -1
        end
        return r
    end
)

QuickMDP{UUID("767fbe04-0a79-4d73-b59b-5c7d227e79bd"), Int64, CartesianIndex{4}, NamedTuple{(:stateindex, :isterminal, :actionindex, :initialstate, :transition, :states, :actions, :discount, :reward), Tuple{Dict{Int64, Int64}, var"#15#17", Dict{CartesianIndex{4}, Int64}, Deterministic{Int64}, typeof(transition), UnitRange{Int64}, Vector{CartesianIndex{4}}, Float64, var"#16#18"}}}((stateindex = Dict(29965 => 29965, 45120 => 45120, 30270 => 30270, 1703 => 1703, 37100 => 37100, 7685 => 7685, 3406 => 3406, 28804 => 28804, 27640 => 27640, 2015 => 2015…), isterminal = var"#15#17"(), actionindex = Dict{CartesianIndex{4}, Int64}(CartesianIndex(0, -2, 1, 0) => 2, CartesianIndex(0, 0, 0, 0) => 4, CartesianIndex(0, -1, -1, 1) => 3, CartesianIndex(1, -4, -2, 0) => 1), initialstate = Deterministic{Int64}(1), transition = transition, states = 1:50000, actions = CartesianIndex{4}[CartesianIndex(1, -4, -2, 0), CartesianIndex(0, -2, 1, 0), CartesianIndex(0, -1, -1, 1), CartesianIndex(0, 0, 0, 0)], disc

In [39]:
solver = MCTSSolver(n_iterations=1000, depth=100, exploration_constant=5.0)
planner = solve(solver, mission)

MCTSPlanner{QuickMDP{UUID("767fbe04-0a79-4d73-b59b-5c7d227e79bd"), Int64, CartesianIndex{4}, NamedTuple{(:stateindex, :isterminal, :actionindex, :initialstate, :transition, :states, :actions, :discount, :reward), Tuple{Dict{Int64, Int64}, var"#15#17", Dict{CartesianIndex{4}, Int64}, Deterministic{Int64}, typeof(transition), UnitRange{Int64}, Vector{CartesianIndex{4}}, Float64, var"#16#18"}}}, Int64, CartesianIndex{4}, MCTS.SolvedRolloutEstimator{RandomPolicy{Random._GLOBAL_RNG, QuickMDP{UUID("767fbe04-0a79-4d73-b59b-5c7d227e79bd"), Int64, CartesianIndex{4}, NamedTuple{(:stateindex, :isterminal, :actionindex, :initialstate, :transition, :states, :actions, :discount, :reward), Tuple{Dict{Int64, Int64}, var"#15#17", Dict{CartesianIndex{4}, Int64}, Deterministic{Int64}, typeof(transition), UnitRange{Int64}, Vector{CartesianIndex{4}}, Float64, var"#16#18"}}}, NothingUpdater}, Random._GLOBAL_RNG}, Random._GLOBAL_RNG}(MCTSSolver(1000, Inf, 100, 5.0, Random._GLOBAL_RNG(), RolloutEstimator(Rand

In [41]:
s = 2192
a = action(planner, s)

BoundsError: BoundsError: attempt to access 5×100×10×10 LinearIndices{4, NTuple{4, Base.OneTo{Int64}}} at index [4, 102, 3, 5]