In [28]:
using POMDPs
using POMDPTools
using QuickPOMDPs
using MCTS
using POMDPSimulators
using Distributions
using LinearAlgebra
# using D3Trees
using Random

In [29]:
using Pkg
Pkg.add("QuickPOMDPs")

[32m[1m   Resolving[22m[39m package versions...


[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.9/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.9/Manifest.toml`


# Parameters

In [30]:
# Dimensions
max_progress = 5
max_power = 20
max_h2o = 20
max_o2 = 20
max_food = 20

# Uncertainties
p_mission_regression = 0.05
p_power_fail = 0.02
p_water_loss = 0.02
p_food_loss = 0.02
p_no_mission_progress = 0.05

# Resource consumption
consumed_power = -10 # generated
consumed_h2o = 1
consumed_o2 = 8
consumed_food = 9

9

In [31]:
# Define all possible states and actions (with constraints)
# [progress, power, h2o, o2, food]
indexed = LinearIndices((max_progress, max_power, max_h2o, max_o2, max_food))
states = 1:indexed[max_progress, max_power, max_h2o, max_o2, max_food]

basic_actions = [
      CartesianIndex(1, -1, 0, 0, 0), # make mission progress
      CartesianIndex(0, -1, 2, 0, 0), # make water 
      CartesianIndex(0, -2, -1, 1, 0), # make oxygen
      CartesianIndex(0, -1, -1, -1, 2), # make food
      CartesianIndex(0, 0, 0, 0, 0)
] # do nothing
consumed = CartesianIndex(0, consumed_power, consumed_h2o, consumed_o2, consumed_food)
actions = [action - consumed for action in basic_actions]



5-element Vector{CartesianIndex{5}}:
 CartesianIndex(1, 9, -1, -8, -9)
 CartesianIndex(0, 9, 1, -8, -9)
 CartesianIndex(0, 8, -2, -7, -9)
 CartesianIndex(0, 9, -2, -9, -7)
 CartesianIndex(0, 10, -1, -8, -9)

In [32]:
# Determine if action is out of bounds
# Associated consequences are reflected in transition/reward functions
state_maxes = [max_progress, max_power, max_h2o, max_o2, max_food]
function outofbounds(state, action)
    for i in 1:length(state)
        if ((state[i] + action[i]) < 1 || (state[i] + action[i]) > state_maxes[i])
            return true
        end
    end
    return false
end

outofbounds (generic function with 1 method)

In [33]:
# Determine if action is out of bounds
# Associated consequences are reflected in transition/reward functions
state_maxes = [max_progress, max_power, max_h2o, max_o2, max_food]
function clamp_state(state)
    for i in 1:length(state)
        state[i] = clamp(state[i], 1, state_maxes[i])
    end
    return state
end

clamp_state (generic function with 1 method)

In [34]:
        #thought: when we make no mission progress or lose water - we would still consume/produce energy??

# Transition function
function transition(s, a)
    p_nominal = 1 - p_no_mission_progress - p_water_loss - p_power_fail - p_mission_regression 
    probabilities = [p_no_mission_progress, p_water_loss, p_power_fail, p_mission_regression, p_nominal]

    svec = CartesianIndices((max_progress, max_power, max_h2o, max_o2, max_food))[s]
    svec = collect(Tuple(svec))
    a = collect(Tuple(a))

    if outofbounds(svec, a)
        return [[s], [1]]
    else
        spvec_no_mission_progress = svec

        spvec_water_loss = svec + a
        spvec_water_loss[3] = 1 # lose all water

        spvec_power_fail = svec + a
        spvec_power_fail[2] = 1 # lose all energy

        spvec_mission_regression = svec + a
        spvec_mission_regression[1] = spvec_mission_regression[1] - 1 # mission setback

        spvec_nominal = svec + a

        spvec = [spvec_no_mission_progress, spvec_water_loss, spvec_power_fail, spvec_mission_regression, spvec_nominal]
    end
    

    for i in 1:length(probabilities)
        spvec[i] = clamp_state(spvec[i])
        spvec[i] = CartesianIndex(Tuple(spvec[i]))
        sp[i] = indexed[spvec[i]]
    end

    return [sp, probabilities]
    
end


transition (generic function with 1 method)

In [35]:
using QuickPOMDPs

mission = QuickMDP(
    states = states,
    actions = actions,
    initialstate = initialstate,
    discount = 0.95,
    isterminal = s -> CartesianIndices((max_progress, max_power, max_h2o, max_o2, max_food))[s][1] == max_progress,

    transition = function(s,a)
        T = transition(s,a)
        dist = SparseCat(T[1], T[2])
        return dist
    end,

    reward = function (s, a)
       return reward_func(s,a)
    end
)

function reward_func(s, a)
    svec = CartesianIndices((max_progress, max_power, max_h2o, max_o2, max_food))[s]
    spvec = svec + a
    if spvec[1] == max_progress
        r = 1000
    elseif outofbounds(svec, a) # If invalid action
        r = -1e9
    else
        r = -1
    end
    return r
end

reward_func (generic function with 1 method)

In [36]:
solver = MCTSSolver(n_iterations=100000, depth=10, exploration_constant=5.0, enable_tree_vis=true)


MCTSSolver(100000, Inf, 10, 5.0, Random._GLOBAL_RNG(), RolloutEstimator(RandomSolver(Random._GLOBAL_RNG()), 50, 0.0), 0.0, 0, false, true, MCTS.var"#5#7"())

In [37]:
initial_index = indexed[1, 20, 20, 20, 20]

states = [initial_state]

sim  = RolloutSimulator()
r = 0.0
i = 1
# Print the resulting history
while (!= isterminal):
    initialstate = state[i]
    policy = solve(solver, mission)
    r += simulate(sim, mission, policy)
    push!(states, state)
    i += 1
end
    

ErrorException: syntax: "!=" is not a unary operator

In [43]:
# Main function with replanning
function mdp_with_replanning(mission, num_iterations::Int, replan_interval::Int, initial_state)
    state = initial_state
    for i in 1:num_iterations
        # Replanning step
        if need_replanning(i, replan_interval)
            print("replanning")
            mission = replan(mission)
        end

        for _ in 1:10  # Adjust the number of simulation steps as needed
            # Choose action using the rollout policy
            display(state)
            action, policy = rollout_policy(mission, state)

            # display(rollout_policy)
            # action = consumed[action]

            # Use the chosen action to update the state, get reward, and update statistics
            T = transition(state, action)
            next_state = rand(SparseCat(T[1], T[2]))
            display(next_state)
            r = reward_func(state, action)

            update_statistics(state, action, r, policy)

            # Move to the next state
            state = next_state
        end
    end
    # Return the final MDP
    return mission
end

# Replanning function (example: update transition probabilities)
function replan(mission)
    updated_probabilities = Dict{Symbol, Dict{Symbol, Dict{Symbol, Float64}}}()

    # # Update transition probabilities (example: random update)
    # for s in mission.state_space
    #     updated_probabilities[s] = Dict{Symbol, Dict{Symbol, Float64}}()
    #     for a in mission.action_space
    #         updated_probabilities[s][a] = Dict{Symbol, Float64}()
    #         for sp in mission.state_space
    #             updated_probabilities[s][a][sp] = rand()
    #         end
    #     end
    # end

    

    # Create a new MDP instance with updated probabilities
    updated_mdp = QuickMDP(mission.state_space, mission.action_space, updated_probabilities, mission.reward_function)

    return updated_mdp
end

# Replanning condition (example: every N visits)
function need_replanning(visits::Int, replan_interval::Int)
    return visits % replan_interval == 0
end

# Rollout policy using MCTSSolver
function rollout_policy(mission, state)
    solver = MCTSSolver(n_iterations=100000, depth=10, exploration_constant=5.0, enable_tree_vis=true)
    policy = solve(solver, mission)
    return action(policy, state), policy
end

function update_statistics(state, action, reward, policy)
    # Update or record relevant statistics here
    display("State: $state, Action: $action, Reward: $reward, Policy: $policy")
end

update_statistics (generic function with 1 method)

In [45]:
num_iterations = 10
replan_interval = 2
initial_state = indexed[1, 15, 15, 15, 15]
final_mdp = mdp_with_replanning(mission, num_iterations, replan_interval, initial_state)

589471

589471

"State: 589471, Action: CartesianIndex(0, 10, -1, -8, -9), Reward: -1.0e9"

589471

589471

"State: 589471, Action: CartesianIndex(0, 10, -1, -8, -9), Reward: -1.0e9"

589471

589471

"State: 589471, Action: CartesianIndex(0, 10, -1, -8, -9), Reward: -1.0e9"

589471

InterruptException: InterruptException:

In [40]:

state = indexed[1, 20, 20, 20, 20] #initialstate(mission)
a = action(policy, state)
a, info = action_info(policy, state)
D3Tree(info[:tree], init_expand=2) # click on the node to expand it

UndefVarError: UndefVarError: `policy` not defined

In [41]:
#for i in 1:length(states)
    #s = CartesianIndices((max_progress, max_power, max_h2o, max_o2, max_food))[i] 
   # a = action(policy, i)
    #if rand() < 0.1
    #    println(i)
    #    println(a + consumed)
    #end
#end