In [1]:
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from collections import defaultdict
import os
import subprocess
from tqdm import trange
from copy import deepcopy

from env import Scenario, AMoD, CascadedQLearning
from util import mat2str, dictsum, moving_average

plt.style.use('ggplot')

## Scenario 1 <br>

Given a 2x4 grid:

| 0 | 2 | 4 | 6 |<br>
| 1 | 3 | 5 | 7 | <br>

<br>

We assume the demand is generated according to K=2 periodic patterns. That is:


- K = 1 --> people go from 0 to 7 and from 6 to 1;
- K = 2 --> people go from 7 to 0 and from 1 to 6;

In [2]:
scenario = Scenario()

In [3]:
env = AMoD(scenario)

In [4]:
# dummy agent
agent = CascadedQLearning(env=env)
num_nodes = len(agent.nodes)

In [5]:
# Test Episodes
test_episodes = 100
epochs = trange(test_episodes) # build tqdm iterator for loop visualization
max_steps = 100 # maximum length of episode
np.random.seed(10)

# book-keeping variables
test_rewards = []
test_revenue = []
test_served_demand = []
test_rebalancing_cost = []


for episode in epochs:
    try:
        obs = env.reset()
        episode_reward = 0
        episode_revenue = 0
        episode_served_demand = 0
        episode_rebalancing_cost = 0
        for step in range(max_steps):
            # Execure Random Policy
            action_rl = [] # action for all nodes
            for i in range(num_nodes):
                action_rl.append(np.random.randint(low=0, high=4))
            # get actual vehicle distributions vi (i.e. (x1*x2*..*xn)*num_vehicles)
            v_d = agent.get_desired_distribution(action_rl)

            # 1.3 Solve ILP - Minimal Distance Problem 
            t = env.time
            accTuple = [(n,env.acc[n][t]) for n in env.acc]
            accRLTuple = [(n, int(v_d_n)) for n, v_d_n in enumerate(v_d)]
            edgeAttr = [(i,j,env.G.edges[i,j]['time']) for i,j in env.G.edges]
            modPath = os.getcwd().replace('\\','/')+'/mod/'
            OPTPath = os.getcwd().replace('\\','/')+'/OPT/Random/S1/'
            if not os.path.exists(OPTPath):
                os.makedirs(OPTPath)
            datafile = OPTPath + f'data_{t}.dat'
            resfile = OPTPath + f'res_{t}.dat'
            with open(datafile,'w') as file:
                file.write('path="'+resfile+'";\r\n')
                file.write('edgeAttr='+mat2str(edgeAttr)+';\r\n')
                file.write('accInitTuple='+mat2str(accTuple)+';\r\n')
                file.write('accRLTuple='+mat2str(accRLTuple)+';\r\n')
            modfile = modPath+'minRebDistRebOnly.mod'
            CPLEXPATH = "C:/Program Files/ibm/ILOG/CPLEX_Studio1210/opl/bin/x64_win64/"
            my_env = os.environ.copy()
            my_env["LD_LIBRARY_PATH"] = CPLEXPATH
            out_file =  OPTPath + f'out_{t}.dat'
            with open(out_file,'w') as output_f:
                subprocess.check_call([CPLEXPATH+"oplrun", modfile, datafile], stdout=output_f, env=my_env)
            output_f.close()

            # 3. collect results from file
            flow = defaultdict(float)
            with open(resfile,'r', encoding="utf8") as file:
                for row in file:
                    item = row.strip().strip(';').split('=')
                    if item[0] == 'flow':
                        values = item[1].strip(')]').strip('[(').split(')(')
                        for v in values:
                            if len(v) == 0:
                                continue
                            i,j,f = v.split(',')
                            flow[int(i),int(j)] = float(f)
            action = [flow[i,j] for i,j in env.edges]

            # Take step
            new_obs, reward, done, info = env.step(action)

            episode_reward += reward # update sum of rewards over episode
            episode_revenue += info['revenue']
            episode_served_demand += info['served_demand']
            episode_rebalancing_cost += info['rebalancing_cost']
            obs = new_obs

            # end episode if conditions reached
            if done:
                break

        epochs.set_description(f"Episode {episode+1} | Reward: {episode_reward:.2f} | Revenue: {episode_revenue:.2f} | ServedDemand: {episode_served_demand:.2f} \
        | Reb. Cost: {episode_rebalancing_cost:.2f}")
        #Adding the total reward and reduced epsilon values
        test_rewards.append(episode_reward)
        test_revenue.append(episode_revenue)
        test_served_demand.append(episode_served_demand)
        test_rebalancing_cost.append(episode_rebalancing_cost)
    except KeyboardInterrupt:
        break

Episode 100 | Reward: 173.16 | Revenue: 0.00 | ServedDemand: 80.00 | Reb. Cost: 186.00: 100%|██████████| 100/100 [03:38<00:00,  2.19s/it]


In [6]:
# Plot results
print("Average Performance: \n")
print(f'Avg Reward: {np.mean(test_rewards):.2f}')
print(f'Total Revenue: {np.mean(test_revenue):.2f}')
print(f'Total Served Demand: {np.mean(test_served_demand):.2f}')
print(f'Total Rebalancing Cost: {np.mean(test_rebalancing_cost):.2f}')

Average Performance: 

Avg Reward: 133.96
Total Revenue: 0.00
Total Served Demand: 68.89
Total Rebalancing Cost: 190.98


## Scenario 2

Given a 2x4 grid:

| 0 | 2 | 4 | 6 |<br>
| 1 | 3 | 5 | 7 | <br>

<br>

We assume the demand is generated according to an unbalanced demand pattern (customers move from left to right). That is:


- K = {1,2} --> people go from 1 to 6 and from 0 to 7;

In [7]:
scenario = Scenario(demand_input = {(1,6):2, (0,7):2, 'default':0.1})

In [8]:
env = AMoD(scenario)

In [9]:
# dummy agent
agent = CascadedQLearning(env=env)
num_nodes = len(agent.nodes)

In [10]:
# Test Episodes
test_episodes = 100
epochs = trange(test_episodes) # build tqdm iterator for loop visualization
max_steps = 100 # maximum length of episode
np.random.seed(10)

# book-keeping variables
test_rewards = []
test_revenue = []
test_served_demand = []
test_rebalancing_cost = []


for episode in epochs:
    try:
        obs = env.reset()
        episode_reward = 0
        episode_revenue = 0
        episode_served_demand = 0
        episode_rebalancing_cost = 0
        for step in range(max_steps):
            # Execure Random Policy
            action_rl = [] # action for all nodes
            for i in range(num_nodes):
                action_rl.append(np.random.randint(low=0, high=4))
            # get actual vehicle distributions vi (i.e. (x1*x2*..*xn)*num_vehicles)
            v_d = agent.get_desired_distribution(action_rl)

            # 1.3 Solve ILP - Minimal Distance Problem 
            t = env.time
            accTuple = [(n,env.acc[n][t]) for n in env.acc]
            accRLTuple = [(n, int(v_d_n)) for n, v_d_n in enumerate(v_d)]
            edgeAttr = [(i,j,env.G.edges[i,j]['time']) for i,j in env.G.edges]
            modPath = os.getcwd().replace('\\','/')+'/mod/'
            OPTPath = os.getcwd().replace('\\','/')+'/OPT/Random/S2/'
            if not os.path.exists(OPTPath):
                os.makedirs(OPTPath)
            datafile = OPTPath + f'data_{t}.dat'
            resfile = OPTPath + f'res_{t}.dat'
            with open(datafile,'w') as file:
                file.write('path="'+resfile+'";\r\n')
                file.write('edgeAttr='+mat2str(edgeAttr)+';\r\n')
                file.write('accInitTuple='+mat2str(accTuple)+';\r\n')
                file.write('accRLTuple='+mat2str(accRLTuple)+';\r\n')
            modfile = modPath+'minRebDistRebOnly.mod'
            CPLEXPATH = "C:/Program Files/ibm/ILOG/CPLEX_Studio1210/opl/bin/x64_win64/"
            my_env = os.environ.copy()
            my_env["LD_LIBRARY_PATH"] = CPLEXPATH
            out_file =  OPTPath + f'out_{t}.dat'
            with open(out_file,'w') as output_f:
                subprocess.check_call([CPLEXPATH+"oplrun", modfile, datafile], stdout=output_f, env=my_env)
            output_f.close()

            # 3. collect results from file
            flow = defaultdict(float)
            with open(resfile,'r', encoding="utf8") as file:
                for row in file:
                    item = row.strip().strip(';').split('=')
                    if item[0] == 'flow':
                        values = item[1].strip(')]').strip('[(').split(')(')
                        for v in values:
                            if len(v) == 0:
                                continue
                            i,j,f = v.split(',')
                            flow[int(i),int(j)] = float(f)
            action = [flow[i,j] for i,j in env.edges]

            # Take step
            new_obs, reward, done, info = env.step(action)

            episode_reward += reward # update sum of rewards over episode
            episode_revenue += info['revenue']
            episode_served_demand += info['served_demand']
            episode_rebalancing_cost += info['rebalancing_cost']
            obs = new_obs

            # end episode if conditions reached
            if done:
                break

        epochs.set_description(f"Episode {episode+1} | Reward: {episode_reward:.2f} | Revenue: {episode_revenue:.2f} | ServedDemand: {episode_served_demand:.2f} \
        | Reb. Cost: {episode_rebalancing_cost:.2f}")
        #Adding the total reward and reduced epsilon values
        test_rewards.append(episode_reward)
        test_revenue.append(episode_revenue)
        test_served_demand.append(episode_served_demand)
        test_rebalancing_cost.append(episode_rebalancing_cost)
    except KeyboardInterrupt:
        break

Episode 100 | Reward: 70.27 | Revenue: 0.00 | ServedDemand: 33.00         | Reb. Cost: 196.20: 100%|██████████| 100/100 [03:39<00:00,  2.20s/it]


In [11]:
# Plot results
print("Average Performance: \n")
print(f'Avg Reward: {np.mean(test_rewards):.2f}')
print(f'Total Revenue: {np.mean(test_revenue):.2f}')
print(f'Total Served Demand: {np.mean(test_served_demand):.2f}')
print(f'Total Rebalancing Cost: {np.mean(test_rebalancing_cost):.2f}')

Average Performance: 

Avg Reward: 52.44
Total Revenue: 0.00
Total Served Demand: 30.66
Total Rebalancing Cost: 204.47
