In [6]:
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from collections import defaultdict
import os
import subprocess
from tqdm import trange
from copy import deepcopy

from env import Scenario, AMoD, CascadedQLearning
from util import mat2str, dictsum, moving_average

plt.style.use('ggplot')
CPLEXPATH = "C:/Program Files/ibm/ILOG/CPLEX_Studio1210/opl/bin/x64_win64/"

## Scenario 1 <br>

Given a 2x4 grid:

| 0 | 2 | 4 | 6 |<br>
| 1 | 3 | 5 | 7 | <br>

<br>

We assume the demand is generated according to K=2 periodic patterns. That is:


- K = 1 --> people go from 0 to 7 and from 6 to 1;
- K = 2 --> people go from 7 to 0 and from 1 to 6;

In [2]:
scenario = Scenario()

In [3]:
env = AMoD(scenario)

In [4]:
# dummy agent
agent = CascadedQLearning(env=env)
num_nodes = len(agent.nodes)

In [5]:
# Test Episodes
test_episodes = 100
epochs = trange(test_episodes) # build tqdm iterator for loop visualization
max_steps = 100 # maximum length of episode
np.random.seed(10)

# book-keeping variables
test_rewards = []
test_revenue = []
test_served_demand = []
test_rebalancing_cost = []
test_operating_cost = []


for episode in epochs:
    try:
        obs = env.reset()
        episode_reward = 0
        episode_revenue = 0
        episode_served_demand = 0
        episode_rebalancing_cost = 0
        episode_operating_cost = 0
        for step in range(max_steps):
            # Execure MPC
            action = env.MPC_exact(CPLEXPATH=CPLEXPATH)    

            # Take step
            new_obs, reward, done, info = env.step(action)

            episode_reward += reward # update sum of rewards over episode
            episode_revenue += info['revenue']
            episode_served_demand += info['served_demand']
            episode_rebalancing_cost += info['rebalancing_cost']
            episode_operating_cost += info['operating_cost']
            obs = new_obs

            # end episode if conditions reached
            if done:
                break

        epochs.set_description(f"Episode {episode+1} | Reward: {episode_reward:.2f} | Revenue: {episode_revenue:.2f} | ServedDemand: {episode_served_demand:.2f} \
        | Reb. Cost: {episode_operating_cost:.2f}")
        #Adding the total reward and reduced epsilon values
        test_rewards.append(episode_reward)
        test_revenue.append(episode_revenue)
        test_served_demand.append(episode_served_demand)
        test_rebalancing_cost.append(episode_rebalancing_cost)
        test_operating_cost.append(episode_operating_cost)
    except KeyboardInterrupt:
        break

Episode 100 | Reward: 4777.29 | Revenue: 5206.29 | ServedDemand: 715.00         | Reb. Cost: 429.00: 100%|██████████| 100/100 [03:08<00:00,  1.89s/it]


In [7]:
# Plot results
print("Average Performance: \n")
print(f'Avg Reward: {np.mean(test_rewards):.2f}')
print(f'Total Revenue: {np.mean(test_revenue):.2f}')
print(f'Total Served Demand: {np.mean(test_served_demand):.2f}')
print(f'Total Operating Cost: {np.mean(test_operating_cost):.2f}')

Average Performance: 

Avg Reward: 5032.50
Total Revenue: 5472.30
Total Served Demand: 730.22
Total Operating Cost: 439.80


## Scenario 2

Given a 2x4 grid:

| 0 | 2 | 4 | 6 |<br>
| 1 | 3 | 5 | 7 | <br>

<br>

We assume the demand is generated according to an unbalanced demand pattern (customers move from left to right). That is:


- K = {1,2} --> people go from 1 to 6 and from 0 to 7;

In [8]:
scenario = Scenario(demand_input = {(1,6):2, (0,7):2, 'default':0.1})

In [9]:
env = AMoD(scenario)

In [10]:
# dummy agent
agent = CascadedQLearning(env=env)
num_nodes = len(agent.nodes)

In [11]:
# Test Episodes
test_episodes = 100
epochs = trange(test_episodes) # build tqdm iterator for loop visualization
max_steps = 100 # maximum length of episode
np.random.seed(10)

# book-keeping variables
test_rewards = []
test_revenue = []
test_served_demand = []
test_rebalancing_cost = []
test_operating_cost = []


for episode in epochs:
    try:
        obs = env.reset()
        episode_reward = 0
        episode_revenue = 0
        episode_served_demand = 0
        episode_rebalancing_cost = 0
        episode_operating_cost = 0
        for step in range(max_steps):
            # Execure MPC
            action = env.MPC_exact(CPLEXPATH=CPLEXPATH)    

            # Take step
            new_obs, reward, done, info = env.step(action)

            episode_reward += reward # update sum of rewards over episode
            episode_revenue += info['revenue']
            episode_served_demand += info['served_demand']
            episode_rebalancing_cost += info['rebalancing_cost']
            episode_operating_cost += info['operating_cost']
            obs = new_obs

            # end episode if conditions reached
            if done:
                break

        epochs.set_description(f"Episode {episode+1} | Reward: {episode_reward:.2f} | Revenue: {episode_revenue:.2f} | ServedDemand: {episode_served_demand:.2f} \
        | Reb. Cost: {episode_operating_cost:.2f}")
        #Adding the total reward and reduced epsilon values
        test_rewards.append(episode_reward)
        test_revenue.append(episode_revenue)
        test_served_demand.append(episode_served_demand)
        test_rebalancing_cost.append(episode_rebalancing_cost)
        test_operating_cost.append(episode_operating_cost)
    except KeyboardInterrupt:
        break

Episode 100 | Reward: 3213.98 | Revenue: 3646.58 | ServedDemand: 538.00         | Reb. Cost: 432.60: 100%|██████████| 100/100 [03:09<00:00,  1.90s/it]


In [12]:
# Plot results
print("Average Performance: \n")
print(f'Avg Reward: {np.mean(test_rewards):.2f}')
print(f'Total Revenue: {np.mean(test_revenue):.2f}')
print(f'Total Served Demand: {np.mean(test_served_demand):.2f}')
print(f'Total Rebalancing Cost: {np.mean(test_operating_cost):.2f}')

Average Performance: 

Avg Reward: 3032.34
Total Revenue: 3459.56
Total Served Demand: 525.39
Total Rebalancing Cost: 427.31
