In [1]:
from __future__ import absolute_import
from __future__ import print_function

import os
import datetime
from shutil import copyfile

from training_simulation import Simulation
from generator import TrafficGenerator
from memory import Memory
from model import TrainModel
from visualization import Visualization
from utils import import_train_configuration, set_sumo, set_train_path
  

In [2]:
config = import_train_configuration(config_file='training_settings.ini')
sumo_cmd = set_sumo(config['gui'], config['sumocfg_file_name'], config['max_steps'])
path = set_train_path(config['models_path_name'])

Model = TrainModel(
    config['num_layers'], 
    config['width_layers'], 
    config['batch_size'], 
    config['learning_rate'], 
    input_dim=config['num_states'], 
    output_dim=config['num_actions']
)

Memory = Memory(
    config['memory_size_max'], 
    config['memory_size_min']
)

TrafficGen = TrafficGenerator(
    config['max_steps'], 
    config['n_cars_generated']
)

Visualization = Visualization(
    path, 
    dpi=96
)
        
Simulation = Simulation(
    Model,
    Memory,
    TrafficGen,
    sumo_cmd,
    config['gamma'],
    config['max_steps'],
    config['green_duration'],
    config['yellow_duration'],
    config['num_states'],
    config['num_actions'],
    config['training_epochs']
)
    
episode = 0
timestamp_start = datetime.datetime.now()
    
while episode < config['total_episodes']:
    print('\n----- Episode', str(episode+1), 'of', str(config['total_episodes']))
    epsilon = 1.0 - (episode / config['total_episodes'])  # set the epsilon for this episode according to epsilon-greedy policy
    simulation_time, training_time = Simulation.run(episode, epsilon)  # run the simulation
    print('Simulation time:', simulation_time, 's - Training time:', training_time, 's - Total:', round(simulation_time+training_time, 1), 's')
    episode += 1

print("\n----- Start time:", timestamp_start)
print("----- End time:", datetime.datetime.now())
print("----- Session info saved at:", path)


----- Episode 1 of 100
 Retrying in 1 seconds
Simulating...
Total reward: -3386 - Epsilon: 1.0
Training...
Simulation time: 10.6 s - Training time: 0.0 s - Total: 10.6 s

----- Episode 2 of 100
 Retrying in 1 seconds
Simulating...
Total reward: -2952 - Epsilon: 0.99
Training...
Simulation time: 11.9 s - Training time: 153.3 s - Total: 165.2 s

----- Episode 3 of 100
 Retrying in 1 seconds
Simulating...
Total reward: -2544 - Epsilon: 0.98
Training...
Simulation time: 6.1 s - Training time: 150.1 s - Total: 156.2 s

----- Episode 4 of 100
 Retrying in 1 seconds
Simulating...
Total reward: -2919 - Epsilon: 0.97
Training...
Simulation time: 11.1 s - Training time: 147.9 s - Total: 159.0 s

----- Episode 5 of 100
 Retrying in 1 seconds
Simulating...
Total reward: -2871 - Epsilon: 0.96
Training...
Simulation time: 11.3 s - Training time: 143.3 s - Total: 154.6 s

----- Episode 6 of 100
 Retrying in 1 seconds
Simulating...
Total reward: -4096 - Epsilon: 0.95
Training...
Simulation time: 12.5

Simulating...
Total reward: -1632 - Epsilon: 0.53
Training...
Simulation time: 16.9 s - Training time: 141.9 s - Total: 158.8 s

----- Episode 49 of 100
 Retrying in 1 seconds
Simulating...
Total reward: -1685 - Epsilon: 0.52
Training...
Simulation time: 16.1 s - Training time: 146.8 s - Total: 162.9 s

----- Episode 50 of 100
 Retrying in 1 seconds
Simulating...
Total reward: -1832 - Epsilon: 0.51
Training...
Simulation time: 17.9 s - Training time: 146.9 s - Total: 164.8 s

----- Episode 51 of 100
 Retrying in 1 seconds
Simulating...
Total reward: -1724 - Epsilon: 0.5
Training...
Simulation time: 20.2 s - Training time: 143.1 s - Total: 163.3 s

----- Episode 52 of 100
 Retrying in 1 seconds
Simulating...
Total reward: -1591 - Epsilon: 0.49
Training...
Simulation time: 18.2 s - Training time: 146.8 s - Total: 165.0 s

----- Episode 53 of 100
 Retrying in 1 seconds
Simulating...
Total reward: -1726 - Epsilon: 0.48
Training...
Simulation time: 17.9 s - Training time: 147.9 s - Total: 1

Simulating...
Total reward: -1485 - Epsilon: 0.06
Training...
Simulation time: 27.4 s - Training time: 146.6 s - Total: 174.0 s

----- Episode 96 of 100
 Retrying in 1 seconds
Simulating...
Total reward: -1377 - Epsilon: 0.05
Training...
Simulation time: 28.7 s - Training time: 144.3 s - Total: 173.0 s

----- Episode 97 of 100
 Retrying in 1 seconds
Simulating...
Total reward: -1291 - Epsilon: 0.04
Training...
Simulation time: 26.9 s - Training time: 138.8 s - Total: 165.7 s

----- Episode 98 of 100
 Retrying in 1 seconds
Simulating...
Total reward: -1413 - Epsilon: 0.03
Training...
Simulation time: 26.9 s - Training time: 147.6 s - Total: 174.5 s

----- Episode 99 of 100
 Retrying in 1 seconds
Simulating...
Total reward: -1710 - Epsilon: 0.02
Training...
Simulation time: 27.7 s - Training time: 146.0 s - Total: 173.7 s

----- Episode 100 of 100
 Retrying in 1 seconds
Simulating...
Total reward: -1235 - Epsilon: 0.01
Training...
Simulation time: 29.0 s - Training time: 144.6 s - Total:

In [3]:
Model.save_model(path)

copyfile(src='training_settings.ini', dst=os.path.join(path, 'training_settings.ini'))

'/home/vysyakh/Downloads/new model/First_Model-20200418T085809Z-001/First_Model/TLCS/models/model_16/training_settings.ini'

In [4]:
Visualization.save_data_and_plot(data=Simulation.reward_store, filename='reward', xlabel='Episode', ylabel='Cumulative negative reward')
Visualization.save_data_and_plot(data=Simulation.cumulative_wait_store, filename='delay', xlabel='Episode', ylabel='Cumulative delay (s)')
Visualization.save_data_and_plot(data=Simulation.avg_queue_length_store, filename='queue', xlabel='Episode', ylabel='Average queue length (vehicles)')