In [1]:
from env.SourcingEnv import *
import numpy as np
import gurobipy as gp
from gurobipy import GRB
import pickle as pkl
from env.HelperClasses import *
from sim.sim_functions import *
import itertools

In [2]:
filename = "output/msource_value_dic_07-04-2022-05-59-13.pkl"

with open(filename, 'rb') as f:
    output_obj = pkl.load(f)

    value_dic = output_obj["state_value_dic"]
    model_params = output_obj["model_params"]
    sourcingEnv = output_obj["mdp_env"]

    sourcingEnv = SourcingEnv(
        order_quantity = ACTION_SIZE,
        lambda_arrival = model_params['mdp_env_params']['lambda'], # or 10
        procurement_cost_vec = np.array(model_params['mdp_env_params']['procurement_cost_vec']),
        supplier_lead_times_vec = np.array(model_params['mdp_env_params']['supplier_lead_times_vec']),
        on_times = np.array([1, 1]), 
        off_times = np.array([np.Inf, np.Inf]))


state_s = list(range(BACKORDER_MAX_LP, MAX_INVEN_LP))
state_backorders = list(itertools.product(range(MAX_INVEN_LP - BACKORDER_MAX_LP), range(sourcingEnv.n_suppliers)))
state_onoff = list(itertools.product(range(2), range(sourcingEnv.n_suppliers)))

possible_state_tuples = list(itertools.product(state_s, state_backorders, state_onoff))
poss_states = [[x[0], np.array(list(x[1])), np.array(list(x[2]))] for x in possible_state_tuples]


action_space_tup = [x for x in itertools.product(*([list(range(sourcingEnv.action_size))]*sourcingEnv.n_suppliers)) ]

# action_space_tup = list(itertools.product(range(sourcingEnv.action_size), range(sourcingEnv.n_suppliers))) 
action_space = [np.array(list(x)) for x in action_space_tup]

for s in poss_states:
    for i in range(1, len(s)):
        s[i] = list(s[i])

In [3]:
# need to write a pij function
# tau = sourcingEnv.compute_event_arrival_time(a)

m = gp.Model("MDP")
x = {}

for state in poss_states:
    for a in action_space:
        state_rep = MState(state[0], sourcingEnv.n_suppliers, state[1], state[2])
        state_rep_str = str(state_rep)
        a_rep = repr(list(a))
        cost = cost_calc_expected_di(sourcingEnv, a, custom_state = state_rep)
        x[state_rep_str, a_rep] = m.addVar(obj = cost, name='x-'+str(state)+"-"+str(a))
        m.addConstr(x[state_rep_str, a_rep] >= 0.0)
        
# need to write a pij function
# tau = sourcingEnv.compute_event_arrival_time(a)

def add_in_additional_constr(change_i_state, a_i, x, m):
    if (str(change_i_state), repr(list(a_i))) not in x:
        cost = cost_calc_expected_di(sourcingEnv, a_i, custom_state = change_i_state)
        x[str(change_i_state), repr(list(a_i))] = m.addVar(obj = cost, name='x-'+str(state)+"-"+str(a))
        m.addConstr(x[state_rep_str, a_rep] >= 0.0)
    return m

poss_states_new = copy.deepcopy(poss_states)
for j_state in poss_states:
    j_state_obj = MState(j_state[0], sourcingEnv.n_suppliers, j_state[1], j_state[2])   
    poss_i_states_tuples = [] # possible prev. states
    for a_i in action_space:
        event_probs = sourcingEnv.get_event_probs(a_i)
        for k in range(sourcingEnv.n_suppliers):
            i_state_supp = copy.deepcopy(j_state[1])
            i_state_supp[k] = j_state[1][k] - a_i[k]
            change_i_state = MState(j_state[0] + 1, sourcingEnv.n_suppliers, i_state_supp, j_state[2])
            poss_i_states_tuples.append((a_i, change_i_state, event_probs[0])) # Event DEMAND_ARRIVAL
            
            poss_states_new.append(change_i_state.get_nested_list_repr())
            m = add_in_additional_constr(change_i_state, a_i, x, m)
            
            i_state_supp = copy.deepcopy(j_state[1])
            i_state_supp[k] = j_state[1][k] - a_i[k] + 1
            change_i_state = MState(j_state[0] - 1, sourcingEnv.n_suppliers, i_state_supp, j_state[2])
            index = sourcingEnv.get_event_index_from_event(Event.SUPPLY_ARRIVAL, k)
            poss_i_states_tuples.append((a_i, change_i_state, event_probs[index])) # Event SUPPLY_ARRIVAL
            
            poss_states_new.append(change_i_state.get_nested_list_repr())
            m = add_in_additional_constr(change_i_state, a_i, x, m)

            i_state_v = copy.deepcopy(j_state[2])
            if j_state[2][k] == 1:
                i_state_v[k] = 0
                change_i_state = MState(j_state[0], sourcingEnv.n_suppliers, j_state[1], i_state_v)
                index = sourcingEnv.get_event_index_from_event(Event.SUPPLIER_ON, k)
                poss_i_states_tuples.append((a_i, change_i_state, event_probs[index])) # Event SUPPLY_ON
            
            i_state_v = copy.deepcopy(j_state[2])
            if j_state[2][k] == 0:
                i_state_v[k] = 1
                change_i_state = MState(j_state[0], sourcingEnv.n_suppliers, j_state[1], i_state_v)
                index = sourcingEnv.get_event_index_from_event(Event.SUPPLIER_OFF, k)
                poss_i_states_tuples.append((a_i, change_i_state, event_probs[index])) # Event SUPPLY_OFF
            
    m.addConstr(sum(x[str(j_state_obj), repr(list(a))] for a in action_space) - sum(pij*x[str(state_i), repr(list(a_i))] for (a_i, state_i, pij) in poss_i_states_tuples) == 0)

poss_states = copy.deepcopy(poss_states_new)

# sa_keys = []

# for state_i in poss_states:
#     for a in action_space:
#         sa_keys.append((str(MState(state_i[0], sourcingEnv.n_suppliers, np.array(state_i[1]), np.array(state_i[2])) ), repr(list(a))))

poss_states_objs = [MState(state[0], sourcingEnv.n_suppliers, state[1], state[2]) for state in poss_states]

m.addConstr(sum(sourcingEnv.compute_event_arrival_time(a, state_obj = state_i)*x[str(state_i), repr(list(a))] for state_i in poss_states_objs for a in action_space) == 1)


Set parameter CSQueueTimeout to value 120
Set parameter CSIdleTimeout to value 60
Set parameter ServerTimeout to value 10
Set parameter TokenServer to value "10.162.183.44"


In [None]:
m.optimize()


Gurobi Optimizer version 9.5.0 build v9.5.0rc5 (linux64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 1299 rows, 1170 columns and 4552 nonzeros
Model fingerprint: 0xb82ce64d
Coefficient statistics:
  Matrix range     [2e-01, 1e+02]
  Objective range  [3e-01, 2e+02]
  Bounds range     [0e+00, 0e+00]
  RHS range        [1e+00, 1e+00]
Presolve removed 1290 rows and 1124 columns
Presolve time: 0.01s
Presolved: 9 rows, 46 columns, 238 nonzeros

Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    3.3783784e-03   5.236486e-02   0.000000e+00      0s
       5    5.9027778e-01   0.000000e+00   0.000000e+00      0s

Solved in 5 iterations and 0.01 seconds (0.00 work units)
Optimal objective  5.902777778e-01


In [None]:
# Optimal Policy 
for state in poss_states_new:
    for a in action_space:
        guro_var = m.getVarByName('x-' + str(state) +"-" + str(a))
        if guro_var is not None and guro_var.X > 0:
            print(guro_var)

In [None]:
m

<gurobi.Model Continuous instance MDP: 1299 constrs, 1170 vars, Parameter changes: CSQueueTimeout=120.0, ServerTimeout=10, TokenServer=10.162.183.44, CSIdleTimeout=60>