In [24]:
using QuickPOMDPs
using POMDPs
using POMDPTools
using Distributions
using LinearAlgebra
using MCTS
using POMDPSimulators
using POMDPPolicies
#using POMDPTutorials
using POMDPModels
using POMDPModelTools

In [25]:
# Define all possible states and actions (with constraints)
# [progress, power, h2o, o2]
max_progress = 5
max_power = 50
max_h2o = 10
max_o2 = 10
max_food = 10
p_mission_regression = 0.05
p_power_fail = 0.02
p_water_loss = 0.02
p_food_loss = 0.02
p_no_mission_progress = 0.05
energy_per_day = 20 #energy production per day

# food, water, and oxygen reductions each day 
food_loss_per_day = 9
o2_loss_per_day = 8
water_loss_per_day = 1

indexed = LinearIndices((max_progress, max_power, max_h2o, max_o2, max_food))
states = 1:indexed[max_progress, max_power, max_h2o, max_o2, max_food]
# actions = [CartesianIndex(1, -0.2, 0, 0, 0), #make mission progress
#            CartesianIndex(0, -0.2, 1, 0, 0), #make water
#            CartesianIndex(0, -0.8, -0.45, 1, 0), #make oxygen
#            CartesianIndex(0, -0.8, -0.2, -0.1, 1), #make food
#            CartesianIndex(0, 0, 0, 0, 0)] #do nothing
# scaling
actions = [CartesianIndex(1, -2, 0, 0, 0), #make mission progress
           CartesianIndex(0, -2, 5, 0, 0), #make water
           CartesianIndex(0, -8, -2, 10, 0), #make oxygen
           CartesianIndex(0, -8, -1, -1, 10), #make food
           CartesianIndex(0, 0, 0, 0, 0)] #do nothing
initialindex = indexed[1, 1, 1, 1, 1]
#make mission progress

initialstate = Deterministic(initialindex)

Deterministic{Int64}(1)

In [26]:
LinearIndices((max_progress, max_power, max_h2o, max_o2, max_food))[1,2,1,1,1]

6

In [27]:
# Determine if action is out of bounds
# Associated consequences are reflected in transition/reward functions
state_maxes = [max_progress, max_power, max_h2o, max_o2, max_food]
function outofbounds(state, action)
    for i in 1:length(state)
        if ((state[i] + action[i]) < 1 || (state[i] + action[i]) > state_maxes[i])
            return true
        end
    end
    return false
end

outofbounds (generic function with 1 method)

In [28]:
# Transition function
function transition(s, a)
    svec = CartesianIndices((max_progress, max_power, max_h2o, max_o2, max_food))[s]
    svec = collect(Tuple(svec))
    a = collect(Tuple(a))
    if outofbounds(svec, a)
        svec[2] = svec[2] + energy_per_day # increment energy
        svec[3] = svec[3] - water_loss_per_day # increment water
        svec[4] = svec[4] - o2_loss_per_day # increment oxygen
        svec[5] = svec[5] - food_loss_per_day # increment food
        if outofbounds(svec, a)
            svec[2] = max_power
        end
        spvec = svec
    else
        svec[2] = svec[2] + energy_per_day # increment energy
        svec[3] = svec[3] - water_loss_per_day # increment water
        svec[4] = svec[4] - o2_loss_per_day # increment oxygen
        svec[5] = svec[5] - food_loss_per_day # increment food
        if outofbounds(svec, a)
            svec[2] = max_power
        end
        x = rand()
        if (x < p_no_mission_progress)
            spvec = svec # no progress
        elseif (x < (p_water_loss + p_no_mission_progress))
            svec[3] = 1 # lose all water
            spvec = svec + a
        elseif (x<(p_water_loss + p_power_fail + p_no_mission_progress))
            svec[2] = 1 # lose all energy
            spvec = svec + a
        elseif (x < (p_mission_regression + p_water_loss + p_power_fail + p_no_mission_progress) && svec[3] > 1)
            svec[1] = svec[1] - 1 # backwards progress mission
            spvec = svec + a
        else 
            spvec = svec + a
        end
    end
    spvec = CartesianIndex(Tuple(spvec))
    sp = indexed[spvec] #linear index
    return sp
end


transition (generic function with 1 method)

In [29]:
mission = QuickMDP(
    states = states,
    actions = actions,
    initialstate = initialstate,
    discount = 0.95,
    isterminal = s -> CartesianIndices((max_progress, max_power, max_h2o, max_o2, max_food))[s][1] == max_progress,

    transition = function(s,a)
        return Deterministic(transition(s,a))
    end,

    reward = function (s, a)
        svec = CartesianIndices((max_progress, max_power, max_h2o, max_o2, max_food))[s]
        spvec = svec + a
        if spvec[1] == max_progress
            r = 1000
        elseif outofbounds(svec, a) # If invalid action
            r = -Inf
        else
            r = -1
        end
        return r
    end
)

QuickMDP{UUID("64849720-a7dd-4620-81d8-587ea015b2cc"), Int64, CartesianIndex{5}, NamedTuple{(:stateindex, :isterminal, :actionindex, :initialstate, :transition, :states, :actions, :discount, :reward), Tuple{Dict{Int64, Int64}, var"#27#30", Dict{CartesianIndex{5}, Int64}, Deterministic{Int64}, var"#28#31", UnitRange{Int64}, Vector{CartesianIndex{5}}, Float64, var"#29#32"}}}((stateindex = Dict(185983 => 185983, 221120 => 221120, 92533 => 92533, 137206 => 137206, 76914 => 76914, 150095 => 150095, 37100 => 37100, 173643 => 173643, 90240 => 90240, 3406 => 3406…), isterminal = var"#27#30"(), actionindex = Dict{CartesianIndex{5}, Int64}(CartesianIndex(1, -2, 0, 0, 0) => 1, CartesianIndex(0, -8, -1, -1, 10) => 4, CartesianIndex(0, -2, 5, 0, 0) => 2, CartesianIndex(0, 0, 0, 0, 0) => 5, CartesianIndex(0, -8, -2, 10, 0) => 3), initialstate = Deterministic{Int64}(1), transition = var"#28#31"(), states = 1:250000, actions = CartesianIndex{5}[CartesianIndex(1, -2, 0, 0, 0), CartesianIndex(0, -2, 5, 

In [30]:
solver = MCTSSolver(n_iterations=1000, depth=100, exploration_constant=5.0)
policy = solve(solver, mission)

MCTSPlanner{QuickMDP{UUID("64849720-a7dd-4620-81d8-587ea015b2cc"), Int64, CartesianIndex{5}, NamedTuple{(:stateindex, :isterminal, :actionindex, :initialstate, :transition, :states, :actions, :discount, :reward), Tuple{Dict{Int64, Int64}, var"#27#30", Dict{CartesianIndex{5}, Int64}, Deterministic{Int64}, var"#28#31", UnitRange{Int64}, Vector{CartesianIndex{5}}, Float64, var"#29#32"}}}, Int64, CartesianIndex{5}, MCTS.SolvedRolloutEstimator{RandomPolicy{Random._GLOBAL_RNG, QuickMDP{UUID("64849720-a7dd-4620-81d8-587ea015b2cc"), Int64, CartesianIndex{5}, NamedTuple{(:stateindex, :isterminal, :actionindex, :initialstate, :transition, :states, :actions, :discount, :reward), Tuple{Dict{Int64, Int64}, var"#27#30", Dict{CartesianIndex{5}, Int64}, Deterministic{Int64}, var"#28#31", UnitRange{Int64}, Vector{CartesianIndex{5}}, Float64, var"#29#32"}}}, NothingUpdater}, Random._GLOBAL_RNG}, Random._GLOBAL_RNG}(MCTSSolver(1000, Inf, 100, 5.0, Random._GLOBAL_RNG(), RolloutEstimator(RandomSolver(Rando

In [31]:
# Initial_state = initialstate(1)
history = simulate(policy, mission, 1, 10)

# Print the resulting history
println(history)

UndefVarError: UndefVarError: `simulate` not defined

In [34]:
#print(action(policy, 1))
rsum = 0.0

for (s,a,r) in stepthrough(mission, policy, "s, a,r", max_steps=1000)
    println("s: $s, b: $([s=>pdf(s) for s in states(mission)]), a: $a")
    global rsum += r
end


BoundsError: BoundsError: attempt to access 5×50×10×10×10 LinearIndices{5, NTuple{5, Base.OneTo{Int64}}} at index [1, 50, 0, -7, -8]

In [35]:
for i in 1:length(states)
    st = CartesianIndices((max_progress, max_power, max_h2o, max_o2, max_food))[i] 
    print(st)
    a = CartesianIndex(0, 0, 0, 0, 0)
    if (outofbounds(st, a))
        print("OUTOFBOUNDS")
        break
    end
    a = action(policy, i)
end
#a = action(planner, s)

CartesianIndex(1, 1, 1, 1, 1)

BoundsError: BoundsError: attempt to access 5×50×10×10×10 LinearIndices{5, NTuple{5, Base.OneTo{Int64}}} at index [1, 50, 0, -7, -8]