In [None]:
using POMDPs
using QuickPOMDPs
using Distributions

const MAX_INVENTORY = 20
const MAX_STORE = 10
const ORDER_SIZE = 5

const holding_cost_store = 2
const holding_cost_parking = 4
const order_cost = 20
const stockout_penalty = 50

const demand_dist = DiscreteUniform(0, 10)

mdp = QuickMDP(
    states = 0:MAX_INVENTORY,
    actions = [0, 1],  # 0 = no order, 1 = order 5 bikes
    discount = 0.99,
    transition = function (s, a)
        outcomes = []
        order_qty = a == 1 ? min(ORDER_SIZE, MAX_INVENTORY - s) : 0
        new_stock = s + order_qty

        for d in support(demand_dist)
            sold = min(d, new_stock)
            next_state = new_stock - sold
            prob = pdf(demand_dist, d)
            push!(outcomes, (next_state, prob))
        end
        return outcomes
    end,
    reward = function (s, a, sp)
        order_qty = a == 1 ? min(ORDER_SIZE, MAX_INVENTORY - s) : 0
        new_stock = s + order_qty
        demand = new_stock - sp
        lost_sales = max(demand - new_stock, 0)

        in_store = min(sp, MAX_STORE)
        in_parking = max(sp - MAX_STORE, 0)

        cost = 0
        cost += a == 1 ? order_cost : 0
        cost += in_store * holding_cost_store + in_parking * holding_cost_parking
        cost += (max(demand - new_stock, 0)) * stockout_penalty

        return -cost  # negative cost = reward
    end,
    initialstate = Deterministic(10)
)


In [None]:
using POMDPTools

s = 14  # Start state
a = 1   # Action: order

for (sp, p) in transition(mdp, s, a)
    r = reward(mdp, s, a, sp)
    println("From $s --(a=$a)--> $sp  | P=$p, Reward=$r")
end
