In [3]:
using POMDPs
using POMDPTools
using QuickPOMDPs
using MCTS
using POMDPSimulators
using Distributions
using LinearAlgebra
using D3Trees
using Random



# Parameters

In [4]:
# Dimensions
max_progress = 5
max_power = 20
max_h2o = 20
max_o2 = 20
max_food = 20

# Uncertainties
p_mission_regression = 0.05
p_power_fail = 0.02
p_water_loss = 0.02
p_food_loss = 0.02
p_no_mission_progress = 0.05

# Resource consumption
consumed_power = -10 # generated
consumed_h2o = 1
consumed_o2 = 8
consumed_food = 9

9

In [5]:
# Define all possible states and actions (with constraints)
# [progress, power, h2o, o2, food]
indexed = LinearIndices((max_progress, max_power, max_h2o, max_o2, max_food))
states = 1:indexed[max_progress, max_power, max_h2o, max_o2, max_food]

basic_actions = [
      CartesianIndex(1, -1, 0, 0, 0), # make mission progress
      CartesianIndex(0, -1, 2, 0, 0), # make water 
      CartesianIndex(0, -2, -1, 1, 0), # make oxygen
      CartesianIndex(0, -1, -1, -1, 2), # make food
      CartesianIndex(0, 0, 0, 0, 0)
] # do nothing
consumed = CartesianIndex(0, consumed_power, consumed_h2o, consumed_o2, consumed_food)
actions = [action - consumed for action in basic_actions]



5-element Vector{CartesianIndex{5}}:
 CartesianIndex(1, 9, -1, -8, -9)
 CartesianIndex(0, 9, 1, -8, -9)
 CartesianIndex(0, 8, -2, -7, -9)
 CartesianIndex(0, 9, -2, -9, -7)
 CartesianIndex(0, 10, -1, -8, -9)

In [15]:
# Determine if action is out of bounds
# Associated consequences are reflected in transition/reward functions
state_maxes = [max_progress, max_power, max_h2o, max_o2, max_food]
function outofbounds(state, action)
    for i in 1:length(state)
        if ((state[i] + action[i]) < 1 || (state[i] + action[i]) > state_maxes[i])
            return true
        end
    end
    return false
end

outofbounds (generic function with 1 method)

In [21]:
# Determine if action is out of bounds
# Associated consequences are reflected in transition/reward functions
state_maxes = [max_progress, max_power, max_h2o, max_o2, max_food]
function clamp_state(state)
    for i in 1:length(state)
        state[i] = clamp(state[i], 1, state_maxes[i])
    end
    return state
end

clamp_state (generic function with 1 method)

In [28]:
        #thought: when we make no mission progress or lose water - we would still consume/produce energy??

# Transition function
function transition(s, a)
    p_nominal = 1 - p_no_mission_progress - p_water_loss - p_power_fail - p_mission_regression 
    probabilities = [p_no_mission_progress, p_water_loss, p_power_fail, p_mission_regression, p_nominal]

    svec = CartesianIndices((max_progress, max_power, max_h2o, max_o2, max_food))[s]
    svec = collect(Tuple(svec))
    a = collect(Tuple(a))

    if outofbounds(svec, a)
        return [[s], [1]]
    else
        spvec_no_mission_progress = svec

        spvec_water_loss = svec + a
        spvec_water_loss[3] = 1 # lose all water

        spvec_power_fail = svec + a
        spvec_power_fail[2] = 1 # lose all energy

        spvec_mission_regression = svec + a
        spvec_mission_regression[1] = spvec_mission_regression[1] - 1 # mission setback

        spvec_nominal = svec + a

        spvec = [spvec_no_mission_progress, spvec_water_loss, spvec_power_fail, spvec_mission_regression, spvec_nominal]
    end
    

    for i in 1:length(probabilities)
        spvec[i] = clamp_state(spvec[i])
        spvec[i] = CartesianIndex(Tuple(spvec[i]))
        sp[i] = indexed[spvec[i]]
    end

    return [sp, probabilities]
    
end


transition (generic function with 1 method)

In [29]:


mission = QuickMDP(
    states = states,
    actions = actions,
    initialstate = initialstate,
    discount = 0.95,
    isterminal = s -> CartesianIndices((max_progress, max_power, max_h2o, max_o2, max_food))[s][1] == max_progress,

    transition = function(s,a)
        T = transition(s,a)
        dist = SparseCat(T[1], T[2])
        return dist
    end,

    reward = function (s, a)
        svec = CartesianIndices((max_progress, max_power, max_h2o, max_o2, max_food))[s]
        spvec = svec + a
        if spvec[1] == max_progress
            r = 1000
        elseif outofbounds(svec, a) # If invalid action
            r = -1e9
        else
            r = -1
        end
        return r
    end
)

QuickMDP{UUID("5031aacb-8628-4e66-ae7a-d0e50e51df0a"), Int64, CartesianIndex{5}, NamedTuple{(:stateindex, :isterminal, :actionindex, :initialstate, :transition, :states, :actions, :discount, :reward), Tuple{Dict{Int64, Int64}, var"#43#46", Dict{CartesianIndex{5}, Int64}, Deterministic{Int64}, var"#44#47", UnitRange{Int64}, Vector{CartesianIndex{5}}, Float64, var"#45#48"}}}((stateindex = Dict(185983 => 185983, 221120 => 221120, 371966 => 371966, 92533 => 92533, 395569 => 395569, 76914 => 76914, 437164 => 437164, 37100 => 37100, 173643 => 173643, 300190 => 300190…), isterminal = var"#43#46"(), actionindex = Dict{CartesianIndex{5}, Int64}(CartesianIndex(0, 9, 1, -8, -9) => 2, CartesianIndex(1, 9, -1, -8, -9) => 1, CartesianIndex(0, 8, -2, -7, -9) => 3, CartesianIndex(0, 9, -2, -9, -7) => 4, CartesianIndex(0, 10, -1, -8, -9) => 5), initialstate = Deterministic{Int64}(479998), transition = var"#44#47"(), states = 1:480000, actions = CartesianIndex{5}[CartesianIndex(1, 9, -1, -8, -9), Cartes

In [30]:
solver = MCTSSolver(n_iterations=100000, depth=10, exploration_constant=5.0, enable_tree_vis=true)


MCTSPlanner{QuickMDP{UUID("5031aacb-8628-4e66-ae7a-d0e50e51df0a"), Int64, CartesianIndex{5}, NamedTuple{(:stateindex, :isterminal, :actionindex, :initialstate, :transition, :states, :actions, :discount, :reward), Tuple{Dict{Int64, Int64}, var"#43#46", Dict{CartesianIndex{5}, Int64}, Deterministic{Int64}, var"#44#47", UnitRange{Int64}, Vector{CartesianIndex{5}}, Float64, var"#45#48"}}}, Int64, CartesianIndex{5}, MCTS.SolvedRolloutEstimator{RandomPolicy{Random._GLOBAL_RNG, QuickMDP{UUID("5031aacb-8628-4e66-ae7a-d0e50e51df0a"), Int64, CartesianIndex{5}, NamedTuple{(:stateindex, :isterminal, :actionindex, :initialstate, :transition, :states, :actions, :discount, :reward), Tuple{Dict{Int64, Int64}, var"#43#46", Dict{CartesianIndex{5}, Int64}, Deterministic{Int64}, var"#44#47", UnitRange{Int64}, Vector{CartesianIndex{5}}, Float64, var"#45#48"}}}, NothingUpdater}, Random._GLOBAL_RNG}, Random._GLOBAL_RNG}(MCTSSolver(100000, Inf, 10, 5.0, Random._GLOBAL_RNG(), RolloutEstimator(RandomSolver(Rand

In [32]:
initial_index = indexed[1, 20, 20, 20, 20]

states = [initial_state]

sim  = RolloutSimulator()
r = 0.0
i = 1
# Print the resulting history
while (!= isterminal):
    initialstate = state[i]
    policy = solve(solver, mission)
    r += simulate(sim, mission, policy)
    push!(states, state)
    i += 1
end
    

InterruptException: InterruptException:

In [31]:

state = indexed[1, 20, 20, 20, 20] #initialstate(mission)
a = action(policy, state)
a, info = action_info(policy, state)
D3Tree(info[:tree], init_expand=2) # click on the node to expand it

In [None]:
#for i in 1:length(states)
    #s = CartesianIndices((max_progress, max_power, max_h2o, max_o2, max_food))[i] 
   # a = action(policy, i)
    #if rand() < 0.1
    #    println(i)
    #    println(a + consumed)
    #end
#end