In [2]:
from ValueIteration import value_iteration
import matplotlib.pyplot as plt
import numpy as np
import math
import itertools

# Implementing Value Iteration to solve 3-echelon inventory optimisation MDPs

## Centralised system

### No lead times

#### Set up data structures

Notes to self:

Need to decide capacities of warehouse and DC.

In [None]:
# Function to create state space
def create_state_space(capacity, increment, n_ech=2):
    ''' Creates a set as the state space for an n-echelon problem 
    with format (x1, ..., xn) where xj is the inventory level at site j '''

    # Possible inventory levels at each site
    IL = set(int(x) for x in np.arange(-capacity, capacity+1, increment))

    # Possible set of sets
    S = sorted(set(il_pair for il_pair in itertools.product(IL, repeat=n_ech)))

    # Dictionary containing indices for each state (for value iteration)
    state_idx = {s: i for i, s in enumerate(S)}
    
    return S, state_idx

def create_action_space(capacity, increment, n_ech=2):
    ''' Creates a set as the action space for an n-echelon inventory problem
    with format (q1, ..., qn) where qj is the quantity ordered by site j 
    from site j+1 '''

    # The maximum order quantity at each site is half the capacity of the site
    order_set = set(int(x) for x in np.arange(0, math.ceil(capacity/2)+1, increment))
    
    # Possible actions
    A = sorted(set(order_pair for order_pair in itertools.product(order_set, repeat=n_ech)))

    # Dictionary containing indices for each action (for value iteration)
    action_idx = {a: i for i, a in enumerate(A)}

    return A, action_idx



def create_P_c0(S, A, state_idx, action_idx, demand_distribution, capacity):
    ''' 
    Creates an array containing transition probabilities from s to s' under a
    for a centralised multi-echelon serial system without lead times 
    '''
    def prob_trans(s, a, sp):
        ''' Calculates transition probability from s to s' under action a'''
        prob = 0
        new_IL = s + a - (0, a[0:-1]) # IL at end of period (after orders arrived)
        demand = new_IL[0] - sp[0]

        
def create_R_c0(S, A, state_idx, action_idx, demand_distribution, hold_costs, backlog_cost):
    ''' 
    Creates an array containing reward obtained under action a chosen at 
    state s for a centralised multi-echelon serial system without lead times.
    '''

    def cost_function(s, a):
        ''' Calculates the cost incurred if action a is taken at state s. '''
        # Costs at warehouse: holding cost (hc)
        hc_w = hold_costs[1]*max(s[1] + a[1] - a[0], 0)
        warehouse_cost = hc_w

        # Costs at DC: holding cost (hc) and backlog cost (bc)
        hc_dc = hold_costs[0]*sum(max(s[0] + a[0] - dt, 0)*prob for dt, prob in demand_distribution.items())
        bc_dc = backlog_cost*sum(max(dt - s[0] - a[0], 0)*prob for dt, prob in demand_distribution.items())
        dc_cost = hc_dc + bc_dc

        return warehouse_cost + dc_cost
    
    R_array = np.zeros((len(S), len(A)))

capacity = 1000
increment = 100
IL = set(int(x) for x in np.arange(-capacity, capacity+1, increment))
create_state_space(capacity, increment)

[(-1000, -1000),
 (-1000, -900),
 (-1000, -800),
 (-1000, -700),
 (-1000, -600),
 (-1000, -500),
 (-1000, -400),
 (-1000, -300),
 (-1000, -200),
 (-1000, -100),
 (-1000, 0),
 (-1000, 100),
 (-1000, 200),
 (-1000, 300),
 (-1000, 400),
 (-1000, 500),
 (-1000, 600),
 (-1000, 700),
 (-1000, 800),
 (-1000, 900),
 (-1000, 1000),
 (-900, -1000),
 (-900, -900),
 (-900, -800),
 (-900, -700),
 (-900, -600),
 (-900, -500),
 (-900, -400),
 (-900, -300),
 (-900, -200),
 (-900, -100),
 (-900, 0),
 (-900, 100),
 (-900, 200),
 (-900, 300),
 (-900, 400),
 (-900, 500),
 (-900, 600),
 (-900, 700),
 (-900, 800),
 (-900, 900),
 (-900, 1000),
 (-800, -1000),
 (-800, -900),
 (-800, -800),
 (-800, -700),
 (-800, -600),
 (-800, -500),
 (-800, -400),
 (-800, -300),
 (-800, -200),
 (-800, -100),
 (-800, 0),
 (-800, 100),
 (-800, 200),
 (-800, 300),
 (-800, 400),
 (-800, 500),
 (-800, 600),
 (-800, 700),
 (-800, 800),
 (-800, 900),
 (-800, 1000),
 (-700, -1000),
 (-700, -900),
 (-700, -800),
 (-700, -700),
 (-700