In [2]:
using QuickPOMDPs
using POMDPTools
using Distributions
using LinearAlgebra

In [57]:
# Define all possible states and actions (with constraints)
# [progress, power, h2o, o2]
max_progress = 5
max_power = 100
max_h2o = 10
max_o2 = 10
p_mission_regression = 0.05
p_power_fail = 0.02
p_water_loss = 0.02
energy_per_day = 2 #energy production per day

indexed = LinearIndices((max_progress, max_power, max_h2o, max_o2))
states = 1:indexed[max_progress, max_power, max_h2o, max_o2]
actions = [CartesianIndex(1, -4, -2, 0), #make mission progress
           CartesianIndex(0, -2, 1, 0), #make water
           CartesianIndex(0, -1, -1, 1), #make oxygen
           CartesianIndex(0, -0, 0, 0)] #do nothing
initialindex = indexed[1, 1, 1, 1]
#make mission progress

initialstate = Deterministic(initialindex)

Deterministic{Int64}(1)

In [47]:
indexed

5×10×10×10 LinearIndices{4, NTuple{4, Base.OneTo{Int64}}}:
[:, :, 1, 1] =
 1   6  11  16  21  26  31  36  41  46
 2   7  12  17  22  27  32  37  42  47
 3   8  13  18  23  28  33  38  43  48
 4   9  14  19  24  29  34  39  44  49
 5  10  15  20  25  30  35  40  45  50

[:, :, 2, 1] =
 51  56  61  66  71  76  81  86  91   96
 52  57  62  67  72  77  82  87  92   97
 53  58  63  68  73  78  83  88  93   98
 54  59  64  69  74  79  84  89  94   99
 55  60  65  70  75  80  85  90  95  100

[:, :, 3, 1] =
 101  106  111  116  121  126  131  136  141  146
 102  107  112  117  122  127  132  137  142  147
 103  108  113  118  123  128  133  138  143  148
 104  109  114  119  124  129  134  139  144  149
 105  110  115  120  125  130  135  140  145  150

[:, :, 4, 1] =
 151  156  161  166  171  176  181  186  191  196
 152  157  162  167  172  177  182  187  192  197
 153  158  163  168  173  178  183  188  193  198
 154  159  164  169  174  179  184  189  194  199
 155  160  165  170  175  18

In [45]:
indexed[2, 2, 2, 2]

557

In [None]:
#transition model
N_SS = state_dimensions[1]*state_dimensions[2]*state_dimensions[3]
function Transition_Model(current_state, action, state_dimension)
    if (is_in_grid(current_state, [energy_per_day,0,0], state_dimension))
        current_state[1] = current_state[1] + energy_per_day #produce some energy every day
    end
    x = rand()
    if (x < 0.05)
        next_state = current_state # no progress
    elseif (x < p_water_loss)
        current_state[3] = 0.0 # lose all water
        next_state = current_state + action
    elseif (x<(p_water_loss + p_power_fail))
        current_state[2] = 0.0 # lose all energy
        next_state = current_state + action
    elseif (x < (p_mission_regression + p_water_loss + p_power_fail) && current_state[3] >= 1)
        current_state[1] = current_state[1] - 1 #backwards progress mission
        next_state = current_state + action
    else 
        next_state = current_state + action
    end
    return next_state
end




In [None]:
function find_valid_action(current_state, action, state_dimension)
    current_state_temp = current_state
    if (is_in_grid(current_state, action, state_dimension))
        current_state, reward = Transition_Model(current_state, action, state_dimension)
        valid_action = true
    end
    return current_state
end

function is_in_grid(state, action, state_dimension)
    for i in 1:length(state)
        if ((state[i] + action[i]) < 1 || (state[i] + action[i]) > state_dimension[i])
            return false
        end
    end
    return true
end



In [58]:
mission = QuickMDP(
    states = states,
    actions = actions,
    initialstate = initialstate,
    discount = 0.95,
    isterminal = s -> s[1] == 10,

    transition = function (s, a)
        svec = CartesianIndices((max_progress, max_power, max_h2o, max_o2))[s]
        spvec = find_valid_action(svec, a, (max_progress, max_power, max_h2o, max_o2))
        sp = indexed[spvec]
    end,

    reward = function (s, a)
        state = CartesianIndices((max_progress, max_power, max_h2o, max_o2))[s]
        if state[1] == max_progress
            r = 1000
        else
            r = -1
        end
        return r
    end
)

states = reshape(states, (3, Int(length(states)/3)))

QuickMDP{UUID("72a5a780-fac3-46e6-85e4-8e4e960bbe2f"), Int64, CartesianIndex{4}, NamedTuple{(:stateindex, :isterminal, :actionindex, :initialstate, :transition, :states, :actions, :discount, :reward), Tuple{Dict{Int64, Int64}, var"#29#32", Dict{CartesianIndex{4}, Int64}, Deterministic{Int64}, var"#30#33", UnitRange{Int64}, Vector{CartesianIndex{4}}, Float64, var"#31#34"}}}((stateindex = Dict(4986 => 4986, 1144 => 1144, 2108 => 2108, 1175 => 1175, 3634 => 3634, 2288 => 2288, 1546 => 1546, 1703 => 1703, 1956 => 1956, 2350 => 2350…), isterminal = var"#29#32"(), actionindex = Dict{CartesianIndex{4}, Int64}(CartesianIndex(1, -1, -1, -1) => 4, CartesianIndex(0, 0, 1, 0) => 2, CartesianIndex(0, 1, 0, 0) => 1, CartesianIndex(0, 0, 0, 1) => 3), initialstate = Deterministic{Int64}(1), transition = var"#30#33"(), states = 1:5000, actions = CartesianIndex{4}[CartesianIndex(0, 1, 0, 0), CartesianIndex(0, 0, 1, 0), CartesianIndex(0, 0, 0, 1), CartesianIndex(1, -1, -1, -1)], discount = 0.95, reward =