### Description théorique du MDP

- **États** : triplet \((s_1, s_2, s_w)\) où :
  - \(s_1\), \(s_2\) : stock des magasins 1 et 2 (0 à 20),
  - \(s_w\) : stock de l'entrepôt (0 à 20).

- **Actions** : \((a_1, a_2) \in \{0,1\}^2\)
  - \(a_1\) : commander (1) ou ne pas commander (0) pour magasin 1,
  - \(a_2\) : commander (1) ou ne pas commander (0) pour magasin 2.

- **Transitions** :
  - Commande : 5 vélos par magasin si possible selon la capacité,
  - Demande aléatoire \(d_1, d_2 \sim \text{Uniform}(0,7)\),
  - Livraison directe ou livraison à domicile depuis l'entrepôt,
  - Réapprovisionnement automatique de l'entrepôt si \(s_w < 10\).

- **Récompenses** :
  - Coût de commande : 20€ par commande,
  - Coûts de stockage : magasin (2€/vélo), parking extérieur (juste magasin 1) (4€/vélo), entrepôt (1€/vélo),
  - Livraison à domicile : 10€/vélo,
  - Vente perdue (stockout) : pénalité de 50€/vélo non satisfait.

- **Stochasticité** : uniquement sur la demande \(d_1, d_2\).

- **Objectif** : minimiser les coûts totaux sur un horizon infini (\(\gamma=0.99\)).



In [8]:
using POMDPs
using QuickPOMDPs
using Distributions
using Distributions: DiscreteUniform
using POMDPTools: Deterministic
using Random

# Constantes du problème
const MAX_INVENTORY = 20
const MAX_STORE = 10
const ORDER_SIZE = 5
const REPLENISH_WAREHOUSE_THRESHOLD = 10
const REPLENISH_WAREHOUSE_BATCH = 10

const holding_cost_store = 2
const holding_cost_parking = 4
const holding_cost_warehouse = 1
const order_cost = 20
const home_delivery_cost = 10
const stockout_penalty = 50

const demand_dist = DiscreteUniform(0, 7)

# Définir le MDP
mdp2 = QuickMDP(
    states = [(s1, s2, sw) for s1 in 0:MAX_INVENTORY, s2 in 0:MAX_INVENTORY, sw in 0:MAX_INVENTORY],
    actions = [(a1, a2) for a1 in 0:1, a2 in 0:1],
    discount = 0.99,

    gen = function (s, a, rng)
        s1, s2, sw = s
        a1, a2 = a

        # --- Commandes ---
        order1 = a1 == 1 ? min(ORDER_SIZE, MAX_INVENTORY - s1) : 0
        order2 = a2 == 1 ? min(ORDER_SIZE, MAX_INVENTORY - s2) : 0

        total_requested = order1 + order2

        if total_requested <= sw
            actual_order1, actual_order2 = order1, order2
        else
            if s1 < s2
                actual_order1 = min(order1, sw)
                actual_order2 = sw - actual_order1
            else
                actual_order2 = min(order2, sw)
                actual_order1 = sw - actual_order2
            end
        end

        actual_order1 = min(actual_order1, ORDER_SIZE)
        actual_order2 = min(actual_order2, ORDER_SIZE)

        new_s1 = s1 + actual_order1
        new_s2 = s2 + actual_order2
        new_sw = sw - (actual_order1 + actual_order2)

        # --- Demandes ---
        d1 = rand(rng, demand_dist)
        d2 = rand(rng, demand_dist)

        # Satisfaction magasin 1
        sold1 = min(new_s1, d1)
        remaining_demand1 = d1 - sold1
        delivered1 = min(remaining_demand1, new_sw)
        lost_sales1 = remaining_demand1 - delivered1

        new_s1 -= sold1
        new_sw -= delivered1

        # Satisfaction magasin 2
        sold2 = min(new_s2, d2)
        remaining_demand2 = d2 - sold2
        delivered2 = min(remaining_demand2, new_sw)
        lost_sales2 = remaining_demand2 - delivered2

        new_s2 -= sold2
        new_sw -= delivered2

        # --- Coûts ---
        cost = 0

        # Coûts de commande
        cost += (a1 == 1 ? order_cost : 0) + (a2 == 1 ? order_cost : 0)

        # Coûts de stockage magasin 1
        in_store1 = min(new_s1, MAX_STORE)
        in_parking1 = max(new_s1 - MAX_STORE, 0)
        cost += in_store1 * holding_cost_store + in_parking1 * holding_cost_parking

        # Coûts de stockage magasin 2
        cost += new_s2 * holding_cost_store


        # Coût de stockage entrepôt
        cost += new_sw * holding_cost_warehouse

        # Coûts de livraison à domicile
        cost += (delivered1 + delivered2) * home_delivery_cost

        # Coûts de perte de vente
        cost += (lost_sales1 + lost_sales2) * stockout_penalty

        # --- Réapprovisionnement automatique de l'entrepôt ---
        if new_sw < REPLENISH_WAREHOUSE_THRESHOLD
            new_sw = min(new_sw + REPLENISH_WAREHOUSE_BATCH, MAX_INVENTORY)
        end

        r = -cost

        return ((new_s1, new_s2, new_sw), r, (d1, d2))
    end,

    initialstate = Deterministic((10, 10, 10))
)

# Tests variés
rng = MersenneTwister(42)

# Scénarios différents pour couvrir toutes les possibilités
scenarios = [
    ((4, 3, 10), (1, 0)),  # store1 commande, store2 non
    ((18, 18, 5), (1, 1)), # presque plein, peu de stock entrepôt
    ((5, 5, 0), (1, 1)),   # entrepôt vide
    ((0, 0, 20), (0, 0)),  # pas de commande
    ((19, 19, 20), (0, 0)),# stocks pleins, pas de commande
    ((10, 10, 5), (1, 0)), # stock entrepôt limité
    ((7, 8, 2), (1, 1)),   # compétition entre magasins
    ((0, 20, 10), (1, 1)), # un magasin vide, l'autre plein
    ((2, 2, 2), (0, 1)),   # commande unique sur stock bas
    ((15, 5, 0), (0, 1))   # un magasin haut stock, l'autre bas
]

for (s, a) in scenarios
    sp, r, d = POMDPs.gen(mdp2, s, a, rng)
    println("From $s --(a=$a)--> $sp,  Demands: $d, Reward = $r")
end


From (4, 3, 10) --(a=(1, 0))--> (8, 0, 11),  Demands: (1, 7), Reward = -77
From (18, 18, 5) --(a=(1, 1))--> (17, 15, 11),  Demands: (3, 5), Reward = -119
From (5, 5, 0) --(a=(1, 1))--> (0, 0, 10),  Demands: (5, 5), Reward = -40
From (0, 0, 20) --(a=(0, 0))--> (0, 0, 17),  Demands: (7, 6), Reward = -137
From (19, 19, 20) --(a=(0, 0))--> (18, 19, 20),  Demands: (1, 0), Reward = -110
From (10, 10, 5) --(a=(1, 0))--> (14, 8, 10),  Demands: (1, 2), Reward = -72
From (7, 8, 2) --(a=(1, 1))--> (4, 1, 10),  Demands: (5, 7), Reward = -50
From (0, 20, 10) --(a=(1, 1))--> (4, 13, 15),  Demands: (1, 7), Reward = -79
From (2, 2, 2) --(a=(0, 1))--> (1, 1, 10),  Demands: (1, 3), Reward = -24
From (15, 5, 0) --(a=(0, 1))--> (15, 0, 10),  Demands: (0, 5), Reward = -60


### Description théorique du MDP corrigé

- **États** : triplet \((s_1, s_2, s_w)\) où :
  - \(s_1\), \(s_2\) : stock des magasins 1 et 2 (entre 0 et 20),
  - \(s_w\) : stock de l'entrepôt (entre 0 et 20).

- **Actions** : \((a_1, a_2, a_w) \in \{0,1\}^3\)
  - \(a_1\) : commander (1) ou ne pas commander (0) pour magasin 1 (5 vélos commandés si possible),
  - \(a_2\) : commander (1) ou ne pas commander (0) pour magasin 2 (5 vélos commandés si possible),
  - \(a_w\) : commander (1) ou ne pas commander (0) un réapprovisionnement de 10 vélos depuis l'usine vers l'entrepôt.

- **Transitions** :
  - Le stock de l'entrepôt est augmenté immédiatement de 10 vélos si \(a_w = 1\) (sans dépasser 20),
  - Les magasins passent commande (5 vélos chacun) si possible,
  - Si l'entrepôt ne peut pas satisfaire les deux commandes, priorité au magasin ayant le stock \(s_i\) le plus bas,
  - Demande aléatoire \(d_1, d_2 \sim \text{Uniform}(0,7)\),
  - Les demandes sont satisfaites par le magasin si possible ; sinon livraison à domicile via l'entrepôt ; sinon perte de vente.

- **Récompenses** :
  - Coût de commande magasin : 20€ par commande passée (que la commande soit honorée ou non),
  - Coût de commande usine : 20€ par commande \(a_w=1\),
  - Coûts de stockage :
    - Magasin (2€/vélo stocké),
    - Parking extérieur magasin 1 (4€/vélo si \(s_1 > 10\)),
    - Entrepôt (1€/vélo stocké),
  - Livraison à domicile (via entrepôt) : 10€/vélo livré,
  - Pénalité de perte de vente : 50€/vélo non livré.

- **Stochasticité** :
  - Provient uniquement de la demande \(d_1\) et \(d_2\) aléatoire.

- **Objectif** :
  - Minimiser les coûts totaux sur un horizon infini, avec un facteur d'actualisation \(\gamma=0.99\).


In [2]:
### Dépendances ###
using POMDPs
using QuickPOMDPs
using Distributions
using Distributions: DiscreteUniform
using POMDPTools: Deterministic
using Random

### Définir le MDP corrigé EddyBikes ###
const MAX_INVENTORY = 20
const MAX_STORE = 10
const ORDER_SIZE = 5
const REPLENISH_WAREHOUSE_BATCH = 10

const holding_cost_store = 2
const holding_cost_parking = 4
const holding_cost_warehouse = 1
const order_cost = 20
const home_delivery_cost = 10
const stockout_penalty = 50

const demand_dist = DiscreteUniform(0, 7)

mdp3 = QuickMDP(
    states = [(s1, s2, sw) for s1 in 0:MAX_INVENTORY, s2 in 0:MAX_INVENTORY, sw in 0:MAX_INVENTORY],
    actions = [(a1, a2, aw) for a1 in 0:1, a2 in 0:1, aw in 0:1],
    discount = 0.99,

    gen = function (s, a, rng)
        s1, s2, sw = s
        a1, a2, aw = a

        # --- Réapprovisionnement usine vers warehouse ---
        new_sw = sw + (aw == 1 ? REPLENISH_WAREHOUSE_BATCH : 0)
        new_sw = min(new_sw, MAX_INVENTORY)

        # --- Commandes magasins ---
        order1 = a1 == 1 ? min(ORDER_SIZE, MAX_INVENTORY - s1) : 0
        order2 = a2 == 1 ? min(ORDER_SIZE, MAX_INVENTORY - s2) : 0

        total_requested = order1 + order2

        if total_requested <= new_sw
            actual_order1, actual_order2 = order1, order2
        else
            if s1 < s2
                actual_order1 = min(order1, new_sw)
                actual_order2 = new_sw - actual_order1
            else
                actual_order2 = min(order2, new_sw)
                actual_order1 = new_sw - actual_order2
            end
        end

        actual_order1 = min(actual_order1, ORDER_SIZE)
        actual_order2 = min(actual_order2, ORDER_SIZE)

        new_s1 = s1 + actual_order1
        new_s2 = s2 + actual_order2
        new_sw -= (actual_order1 + actual_order2)

        # --- Demandes clients ---
        d1 = rand(rng, demand_dist)
        d2 = rand(rng, demand_dist)

        sold1 = min(new_s1, d1)
        remaining_demand1 = d1 - sold1
        delivered1 = min(remaining_demand1, new_sw)
        lost_sales1 = remaining_demand1 - delivered1

        new_s1 -= sold1
        new_sw -= delivered1

        sold2 = min(new_s2, d2)
        remaining_demand2 = d2 - sold2
        delivered2 = min(remaining_demand2, new_sw)
        lost_sales2 = remaining_demand2 - delivered2

        new_s2 -= sold2
        new_sw -= delivered2

        # --- Coûts ---
        cost = 0
        cost += (a1 == 1 ? order_cost : 0) + (a2 == 1 ? order_cost : 0)
        cost += (aw == 1 ? order_cost : 0)

        in_store1 = min(new_s1, MAX_STORE)
        in_parking1 = max(new_s1 - MAX_STORE, 0)
        cost += in_store1 * holding_cost_store + in_parking1 * holding_cost_parking

        cost += new_s2 * holding_cost_store
        cost += new_sw * holding_cost_warehouse
        cost += (delivered1 + delivered2) * home_delivery_cost
        cost += (lost_sales1 + lost_sales2) * stockout_penalty

        r = -cost
        return ((new_s1, new_s2, new_sw), r, (d1, d2))
    end,

    initialstate = Deterministic((10, 10, 10))
)


QuickMDP{Base.UUID("9187f4b6-5500-4a45-8dd6-2931847a4d93"), Tuple{Int64, Int64, Int64}, Tuple{Int64, Int64, Int64}, @NamedTuple{stateindex::Dict{Tuple{Int64, Int64, Int64}, Int64}, isterminal::Bool, actionindex::Dict{Tuple{Int64, Int64, Int64}, Int64}, initialstate::Deterministic{Tuple{Int64, Int64, Int64}}, states::Array{Tuple{Int64, Int64, Int64}, 3}, actions::Array{Tuple{Int64, Int64, Int64}, 3}, discount::Float64, gen::var"#13#16"}}((stateindex = Dict((19, 5, 6) => 2771, (6, 13, 14) => 6454, (20, 16, 1) => 798, (3, 9, 8) => 3721, (0, 2, 19) => 8422, (16, 15, 16) => 7388, (14, 12, 1) => 708, (11, 13, 15) => 6900, (16, 5, 3) => 1445, (18, 6, 19) => 8524…), isterminal = false, actionindex = Dict((0, 0, 0) => 1, (1, 1, 1) => 8, (1, 1, 0) => 4, (0, 1, 1) => 7, (0, 1, 0) => 3, (1, 0, 1) => 6, (1, 0, 0) => 2, (0, 0, 1) => 5), initialstate = Deterministic{Tuple{Int64, Int64, Int64}}((10, 10, 10)), states = [(0, 0, 0) (0, 1, 0) … (0, 19, 0) (0, 20, 0); (1, 0, 0) (1, 1, 0) … (1, 19, 0) (1, 2

In [3]:
### Tester plusieurs transitions ###
rng = MersenneTwister(42)

scenarios = [
    ((4, 3, 10), (1, 0, 0)),  # store1 commande, pas store2, pas d'usine
    ((4, 3, 10), (1, 0, 1)),  # store1 commande, et réappro usine
    ((18, 18, 5), (1, 1, 1)), # magasins pleins, commande depuis usine
    ((5, 5, 0), (1, 1, 0)),   # entrepôt vide, demande forte
    ((0, 0, 20), (0, 0, 0)),  # rien ne se passe
    ((19, 19, 20), (0, 0, 1)),# stocks pleins, mais entrepôt recharge
    ((10, 10, 5), (1, 0, 1)), # 1 magasin commande, usine réapprovisionne
    ((7, 8, 2), (1, 1, 0)),   # compétition entre magasins, sans usine
    ((0, 20, 10), (1, 1, 1)), # un vide un plein, recharge usine
    ((2, 2, 2), (0, 1, 1)),   # un seul magasin commande
    ((15, 5, 0), (0, 1, 0)),  # un magasin haut stock, autre bas
]

for (s, a) in scenarios
    sp, r, d = POMDPs.gen(mdp3, s, a, rng)
    println("From $s --(a=$a)--> $sp,  Demands: $d, Reward = $r")
end


From (4, 3, 10) --(a=(1, 0, 0))--> (8, 0, 1),  Demands: (1, 7), Reward = -77
From (4, 3, 10) --(a=(1, 0, 1))--> (6, 0, 13),  Demands: (3, 5), Reward = -85
From (18, 18, 5) --(a=(1, 1, 1))--> (15, 15, 11),  Demands: (5, 5), Reward = -141
From (5, 5, 0) --(a=(1, 1, 0))--> (0, 0, 0),  Demands: (7, 6), Reward = -190
From (0, 0, 20) --(a=(0, 0, 0))--> (0, 0, 19),  Demands: (1, 0), Reward = -29
From (19, 19, 20) --(a=(0, 0, 1))--> (18, 17, 20),  Demands: (1, 2), Reward = -126
From (10, 10, 5) --(a=(1, 0, 1))--> (10, 3, 10),  Demands: (5, 7), Reward = -76
From (7, 8, 2) --(a=(1, 1, 0))--> (8, 1, 0),  Demands: (1, 7), Reward = -58
From (0, 20, 10) --(a=(1, 1, 1))--> (4, 17, 15),  Demands: (1, 3), Reward = -117
From (2, 2, 2) --(a=(0, 1, 1))--> (2, 2, 7),  Demands: (0, 5), Reward = -55
From (15, 5, 0) --(a=(0, 1, 0))--> (8, 4, 0),  Demands: (7, 1), Reward = -44
