In [45]:
from env.SourcingEnv import *
import numpy as np
import gurobipy as gp
import pickle as pkl
from env.HelperClasses import *
from sim.sim_functions import *
import itertools

GRB = gp.GRB

In [46]:
filename = "output/msource_value_dic_08-05-2022-07-30-09.pkl"

with open(filename, 'rb') as f:
    output_obj = pkl.load(f)

    value_dic = output_obj["state_value_dic"]
    model_params = output_obj["model_params"]
    sourcingEnv = output_obj["mdp_env"]

    sourcingEnv = SourcingEnv(
        lambda_arrival = model_params['mdp_env_params']['lambda'], # or 10
        procurement_cost_vec = np.array(model_params['mdp_env_params']['procurement_cost_vec']),
        supplier_lead_times_vec = np.array(model_params['mdp_env_params']['supplier_lead_times_vec']),
        on_times = np.array(model_params['mdp_env_params']['on_times']), 
        off_times = np.array(model_params['mdp_env_params']['off_times']))

state_s = list(range(BACKORDER_MAX_LP, MAX_INVEN_LP))
state_backorders = list(itertools.product(range(MAX_INVEN_LP - BACKORDER_MAX_LP), range(sourcingEnv.n_suppliers)))
state_onoff = list(itertools.product(range(2), range(sourcingEnv.n_suppliers)))

possible_state_tuples = list(itertools.product(state_s, state_backorders, state_onoff))
poss_states = [[x[0], np.array(list(x[1])), np.array(list(x[2]))] for x in possible_state_tuples]


action_space_tup = [x for x in itertools.product(*([list(range(sourcingEnv.action_size))]*sourcingEnv.n_suppliers)) ]

# action_space_tup = list(itertools.product(range(sourcingEnv.action_size), range(sourcingEnv.n_suppliers))) 
action_space = [np.array(list(x)) for x in action_space_tup]

for s in poss_states:
    for i in range(1, len(s)):
        s[i] = list(s[i])

In [47]:
# need to write a pij function
# tau = sourcingEnv.compute_event_arrival_time(a)

m = gp.Model("MDP")
x = {}

def add_in_additional_var(state_obj, action):
    if (state_obj.get_repr_key(), repr(list(action))) not in x:
        cost = cost_calc_expected_di(sourcingEnv, action, custom_state = state_obj)
        x[state_obj.get_repr_key(), repr(list(action))] = m.addVar(obj = cost, name='var_x..' + state_obj.get_repr_key() + ".." + str(action), vtype=GRB.CONTINUOUS) # default lower bound of 0, obj = cost
        m.addConstr(x[state_obj.get_repr_key(), repr(list(action))] >= 0.0)
        return True

for state in poss_states:
    for a in action_space:
        state_rep = MState(state[0], sourcingEnv.n_suppliers, state[1], state[2])
        add_in_additional_var(state_rep, a)
        
# need to write a pij function
# tau = sourcingEnv.compute_event_arrival_time(a)

poss_states_new = copy.deepcopy(poss_states)
for j_state in poss_states:
    j_state_obj = MState(j_state[0], sourcingEnv.n_suppliers, j_state[1], j_state[2])   
    poss_i_states_tuples = [] # possible prev. states
    for a_i in action_space:
        event_probs = sourcingEnv.get_event_probs(a_i)
        for k in range(sourcingEnv.n_suppliers):
            i_state_supp = copy.deepcopy(j_state[1])
            i_state_supp[k] = j_state[1][k] - a_i[k]
            change_i_state = MState(j_state[0] + 1, sourcingEnv.n_suppliers, i_state_supp, j_state[2])
            poss_i_states_tuples.append((a_i, change_i_state, event_probs[0])) # Event DEMAND_ARRIVAL
            
            poss_states_new.append(change_i_state.get_nested_list_repr())
            add_in_additional_var(change_i_state, a_i)
            
            i_state_supp = copy.deepcopy(j_state[1])
            i_state_supp[k] = j_state[1][k] - a_i[k] + 1
            change_i_state = MState(j_state[0] - 1, sourcingEnv.n_suppliers, i_state_supp, j_state[2])
            index = sourcingEnv.get_event_index_from_event(Event.SUPPLY_ARRIVAL, k)
            poss_i_states_tuples.append((a_i, change_i_state, event_probs[index])) # Event SUPPLY_ARRIVAL
            
            poss_states_new.append(change_i_state.get_nested_list_repr())
            add_in_additional_var(change_i_state, a_i)

            i_state_v = copy.deepcopy(j_state[2])
            if j_state[2][k] == 1:
                i_state_v[k] = 0
                change_i_state = MState(j_state[0], sourcingEnv.n_suppliers, j_state[1], i_state_v)
                index = sourcingEnv.get_event_index_from_event(Event.SUPPLIER_ON, k)
                poss_i_states_tuples.append((a_i, change_i_state, event_probs[index])) # Event SUPPLY_ON
            
            i_state_v = copy.deepcopy(j_state[2])
            if j_state[2][k] == 0:
                i_state_v[k] = 1
                change_i_state = MState(j_state[0], sourcingEnv.n_suppliers, j_state[1], i_state_v)
                index = sourcingEnv.get_event_index_from_event(Event.SUPPLIER_OFF, k)
                poss_i_states_tuples.append((a_i, change_i_state, event_probs[index])) # Event SUPPLY_OFF
            
    m.addConstr(gp.quicksum(x[j_state_obj.get_repr_key(), repr(list(a))] for a in action_space) - gp.quicksum(pij*x[state_i.get_repr_key(), repr(list(a_i2))] for (a_i2, state_i, pij) in poss_i_states_tuples) == 0)
poss_states = copy.deepcopy(poss_states_new)

sa_keys = []
for state_i in poss_states:
    for a in action_space:
        sa_keys.append((str(MState(state_i[0], sourcingEnv.n_suppliers, np.array(state_i[1]), np.array(state_i[2])) ), repr(list(a))))

poss_states_objs = [MState(state[0], sourcingEnv.n_suppliers, state[1], state[2]) for state in poss_states]

# 
m.addConstr(gp.quicksum(sourcingEnv.compute_event_arrival_time(a, state_obj = state_i)*x[state_i.get_repr_key(), repr(list(a))] for state_i in poss_states_objs for a in action_space) == 1)



<gurobi.Constr *Awaiting Model Update*>

In [48]:
# m.setObjective(gp.quicksum( cost_calc_expected_di(sourcingEnv, a, custom_state = state) * x[str(state), repr(list(a))] for state in poss_states_objs for a in action_space) )

In [49]:
m.write("model_lp_2source.lp")




In [50]:
m.optimize()


Gurobi Optimizer version 9.5.0 build v9.5.0rc5 (linux64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 183 rows, 54 columns and 3436 nonzeros
Model fingerprint: 0xc0c4d89f
Coefficient statistics:
  Matrix range     [2e-01, 1e+02]
  Objective range  [2e-01, 1e+02]
  Bounds range     [0e+00, 0e+00]
  RHS range        [1e+00, 1e+00]
Presolve removed 174 rows and 8 columns
Presolve time: 0.02s
Presolved: 9 rows, 46 columns, 238 nonzeros

Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    2.0270270e-03   6.959459e-02   0.000000e+00      0s
       5    5.6250000e-01   0.000000e+00   0.000000e+00      0s

Solved in 5 iterations and 0.03 seconds (0.00 work units)
Optimal objective  5.625000000e-01


In [51]:
m.printStats()


Statistics for modelMDP:
  Linear constraint matrix    : 183 Constrs, 54 Vars, 3436 NZs
  Matrix coefficient range    : [ 0.182482, 105.714 ]
  Objective coefficient range : [ 0.214286, 106.117 ]
  Variable bound range        : [ 0, 0 ]
  RHS coefficient range       : [ 1, 1 ]


In [52]:
m.printAttr('x')


    Variable            x 
-------------------------
var_x..[-2, 0, 0, 1, 1]..[0 0]    0.0194444 


In [53]:
# Optimal Policy 
# for state in poss_states:
#     for a in action_space:
#         guro_var = m.getVarByName('x-' + str(state) +"-" + str(a))
#         print(guro_var)

In [54]:
# Optimal Policy 
for state in poss_states:
    for a in action_space:
        guro_var = m.getVarByName('x-' + str(state) +"-" + str(a))
        if guro_var is not None and guro_var.X > 0:
            print(guro_var)

In [55]:
m

<gurobi.Model Continuous instance MDP: 183 constrs, 54 vars, Parameter changes: CSQueueTimeout=120.0, ServerTimeout=10, TokenServer=10.162.183.44, CSIdleTimeout=60>