In [3]:
using POMDPs
using POMDPTools
using QuickPOMDPs
using MCTS
using POMDPSimulators
using Distributions
using LinearAlgebra
using D3Trees
using Random



# Parameters

In [4]:
# Dimensions
max_progress = 3
max_power = 20
max_h2o = 20
max_o2 = 20
max_food = 20

# Uncertainties
p_mission_regression = 0.05
p_power_fail = 0.02
p_water_loss = 0.02
p_food_loss = 0.02
p_no_mission_progress = 0.05

# Resource consumption
consumed_power = -10 # generated
consumed_h2o = 1
consumed_o2 = 8
consumed_food = 9

9

In [5]:
# Define all possible states and actions (with constraints)
# [progress, power, h2o, o2, food]
indexed = LinearIndices((max_progress, max_power, max_h2o, max_o2, max_food))
states = 1:indexed[max_progress, max_power, max_h2o, max_o2, max_food]

basic_actions = [
      CartesianIndex(1, -1, 0, 0, 0), # make mission progress
      CartesianIndex(0, -1, 2, 0, 0), # make water 
      CartesianIndex(0, -2, -1, 1, 0), # make oxygen
      CartesianIndex(0, -1, -1, -1, 2), # make food
      CartesianIndex(0, 0, 0, 0, 0)
] # do nothing
consumed = CartesianIndex(0, consumed_power, consumed_h2o, consumed_o2, consumed_food)
actions = [action - consumed for action in basic_actions]



5-element Vector{CartesianIndex{5}}:
 CartesianIndex(1, 9, -1, -8, -9)
 CartesianIndex(0, 9, 1, -8, -9)
 CartesianIndex(0, 8, -2, -7, -9)
 CartesianIndex(0, 9, -2, -9, -7)
 CartesianIndex(0, 10, -1, -8, -9)

In [15]:
# Determine if action is out of bounds
# Associated consequences are reflected in transition/reward functions
state_maxes = [max_progress, max_power, max_h2o, max_o2, max_food]
function outofbounds(state, action)
    for i in 1:length(state)
        if ((state[i] + action[i]) < 1 || (state[i] + action[i]) > state_maxes[i])
            return true
        end
    end
    return false
end

outofbounds (generic function with 1 method)

In [16]:
# Transition function
function transition(s, a)
    svec = CartesianIndices((max_progress, max_power, max_h2o, max_o2, max_food))[s]
    svec = collect(Tuple(svec))
    a = collect(Tuple(a))


    if svec[2] + a[2] > max_power
        svec[2] = max_power
    end
    if outofbounds(svec, a)
        spvec = svec
    else
        #thought: when we make no mission progress or lose water - we would still consume/produce energy??
        x = rand(rng)
        if (x < p_no_mission_progress)
            spvec = svec # no progress
        elseif (x < (p_water_loss + p_no_mission_progress))
            svec[3] = 1 # lose all water
            spvec = svec + a
        elseif (x<(p_water_loss + p_power_fail + p_no_mission_progress))
            svec[2] = 1 # lose all energy
            spvec = svec + a
        elseif (x < (p_mission_regression + p_water_loss + p_power_fail + p_no_mission_progress) && svec[3] > 1)
            svec[1] = svec[1] - 1 # mission setback
            spvec = svec + a
        else 
            spvec = svec + a
        end
    end
    spvec = CartesianIndex(Tuple(spvec))
    sp = indexed[spvec]
    return sp
    
end


transition (generic function with 1 method)

In [17]:
initialindex = indexed[1, 20, 20, 20, 20]
initialstate = Deterministic(initialindex)

mission = QuickMDP(
    states = states,
    actions = actions,
    initialstate = initialstate,
    discount = 0.95,
    isterminal = s -> CartesianIndices((max_progress, max_power, max_h2o, max_o2, max_food))[s][1] == max_progress,

    transition = function(s,a)
        return Deterministic(transition(s,a))
    end,

    reward = function (s, a)
        svec = CartesianIndices((max_progress, max_power, max_h2o, max_o2, max_food))[s]
        spvec = svec + a
        if spvec[1] == max_progress
            r = 1000
        elseif outofbounds(svec, a) # If invalid action
            r = -1e9
        else
            r = -1
        end
        return r
    end
)

QuickMDP{UUID("6d006c4a-dbbd-4ade-86be-00bc1f2bcde2"), Int64, CartesianIndex{5}, NamedTuple{(:stateindex, :isterminal, :actionindex, :initialstate, :transition, :states, :actions, :discount, :reward), Tuple{Dict{Int64, Int64}, var"#31#34", Dict{CartesianIndex{5}, Int64}, Deterministic{Int64}, var"#32#35", UnitRange{Int64}, Vector{CartesianIndex{5}}, Float64, var"#33#36"}}}((stateindex = Dict(185983 => 185983, 221120 => 221120, 371966 => 371966, 92533 => 92533, 395569 => 395569, 76914 => 76914, 437164 => 437164, 37100 => 37100, 173643 => 173643, 300190 => 300190…), isterminal = var"#31#34"(), actionindex = Dict{CartesianIndex{5}, Int64}(CartesianIndex(0, 9, 1, -8, -9) => 2, CartesianIndex(1, 9, -1, -8, -9) => 1, CartesianIndex(0, 8, -2, -7, -9) => 3, CartesianIndex(0, 9, -2, -9, -7) => 4, CartesianIndex(0, 10, -1, -8, -9) => 5), initialstate = Deterministic{Int64}(479998), transition = var"#32#35"(), states = 1:480000, actions = CartesianIndex{5}[CartesianIndex(1, 9, -1, -8, -9), Cartes

In [18]:
solver = MCTSSolver(n_iterations=100000, depth=10, exploration_constant=5.0, enable_tree_vis=true)
policy = solve(solver, mission)

MCTSPlanner{QuickMDP{UUID("6d006c4a-dbbd-4ade-86be-00bc1f2bcde2"), Int64, CartesianIndex{5}, NamedTuple{(:stateindex, :isterminal, :actionindex, :initialstate, :transition, :states, :actions, :discount, :reward), Tuple{Dict{Int64, Int64}, var"#31#34", Dict{CartesianIndex{5}, Int64}, Deterministic{Int64}, var"#32#35", UnitRange{Int64}, Vector{CartesianIndex{5}}, Float64, var"#33#36"}}}, Int64, CartesianIndex{5}, MCTS.SolvedRolloutEstimator{RandomPolicy{Random._GLOBAL_RNG, QuickMDP{UUID("6d006c4a-dbbd-4ade-86be-00bc1f2bcde2"), Int64, CartesianIndex{5}, NamedTuple{(:stateindex, :isterminal, :actionindex, :initialstate, :transition, :states, :actions, :discount, :reward), Tuple{Dict{Int64, Int64}, var"#31#34", Dict{CartesianIndex{5}, Int64}, Deterministic{Int64}, var"#32#35", UnitRange{Int64}, Vector{CartesianIndex{5}}, Float64, var"#33#36"}}}, NothingUpdater}, Random._GLOBAL_RNG}, Random._GLOBAL_RNG}(MCTSSolver(100000, Inf, 10, 5.0, Random._GLOBAL_RNG(), RolloutEstimator(RandomSolver(Rand

In [30]:
# Initial_state = initialstate(1)
sim  = RolloutSimulator()

# Print the resulting history
r = simulate(sim, mission, policy)

In [19]:

state = indexed[1, 20, 20, 20, 20] #initialstate(mission)
a = action(policy, state)
a, info = action_info(policy, state)
D3Tree(info[:tree], init_expand=2) # click on the node to expand it

In [None]:
#for i in 1:length(states)
    #s = CartesianIndices((max_progress, max_power, max_h2o, max_o2, max_food))[i] 
   # a = action(policy, i)
    #if rand() < 0.1
    #    println(i)
    #    println(a + consumed)
    #end
#end