Import packages.

In [1]:
import gym
import numpy as np
import dual_sourcing

Set up configurations for the environment.

In [2]:
CONFIG = {'Lr': 5, 'Le': 1, 'cr': 100, 'ce': 105, 'lambda': 10,
          'h': 1, 'b': 19, 'starting_state': [0]*7, 'max_order': 20, 'max_inventory': 1000}

Make an instance of the environment.

In [3]:
env = gym.make('DualSourcing-v0', config=CONFIG)

Print the environment settings.

In [4]:
print(env.state)
print(env.action_space)
print(env.observation_space)

[0 0 0 0 0 0 0]
MultiDiscrete([21 21])
MultiDiscrete([  21   21   21   21   21   21 1000])


Test the step function.

In [5]:
env.seed(0)
env.state = [0] * 7
print(env.step([8, 2]))
print('***')
env.state = [5, 4, 5, 6, 6, 2, 2]
print(env.step([3, 1]))

(array([  0,   0,   0,   0,   8,   2, -10]), 0, 10, {})
***
(array([ 4,  5,  6,  6,  3,  1, -2]), -812, 11, {})


Evaluate an estimate of the value function for a certain policy by doing multiple episodes of simulation.

In [62]:
def evaluate(env, n_episodes, numiters, policy, *args):
    # policy: policy function
    # env: gym environment
    # n_episodes: number of total episodes to run (outer iteration)
    # numiters: number of time steps (inner iteration)
    
    av_reward = np.zeros(n_episodes)
    
    for i in range(n_episodes):
        av_r = 0
        env.reset() # reset environment
        for t in range(numiters):
            action = policy(*args) # use a constant action
            state, reward, demand, info = env.step(action)
            av_r = av_r + reward
        av_reward[i] = av_r / numiters
        
    return np.mean(av_reward), np.std(av_reward) # return average reward and std

In [49]:
def constant_policy(env):
    return np.array([0, 10])

In [52]:
evaluate(constant_policy, env, 100, 1000)

(-1764.5975599999997, 699.6435250348754)

In [60]:
def TBS(env, Q, Se):
    ip = np.sum(env.state[1:env.Le]) + np.sum(env.state[env.Lr+1:])
    return Q, max(0, ip-Se)

In [67]:
evaluate(env, 100, 1000, TBS, env, 5, 3)

(-48426.34678, 1150.3982653473936)