In [None]:
# !pip install stable-baselines3
# print(gym.__version__)

## Import relevant libraries

In [37]:
# import gym_cityflow
# from gym_cityflow.envs import Cityflow
import gym
import sys
import os
import random
import cityflow
import numpy as np
import json

from gym import spaces
from stable_baselines3 import A2C
from stable_baselines3 import DQN
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_checker import check_env

## Create gym environment for Cityflow

In [38]:
'''
action space = num of lighPhases


                |
                |
                N
                |
                |
------W----[][][][][]-------E-------
                |
                |
                S
                |
                |


go straight (W/E), (N/S)
straight + right (N), (S), (E), (W)
right only (W/E), (N/S)

total 8 phases
################################################################

observation space = [for each intersection[for each phase [num of incoming vehicles for each phase, avg speed for that phase]]]

################################################################

needs to be a way to extract number of lightPhases for each intersection (done, roadnet file)

'''

'\naction space = num of lighPhases\n\n\n                |\n                |\n                N\n                |\n                |\n------W----[][][][][]-------E-------\n                |\n                |\n                S\n                |\n                |\n\n\ngo straight (W/E), (N/S)\nstraight + right (N), (S), (E), (W)\nright only (W/E), (N/S)\n\ntotal 8 phases\n################################################################\n\nobservation space = [for each intersection[for each phase [num of incoming vehicles for each phase, avg speed for that phase]]]\n\n################################################################\n\nneeds to be a way to extract number of lightPhases for each intersection (done, roadnet file)\n\n'

In [158]:
class cityFlowGym(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, config_path, episodeSteps):
        
        # edit config_path to change replayLogFile name for each step
#         with open(config_path, "r+") as jsonFile:
#             data = json.load(jsonFile)
#             fileName = data["replayLogFile"].split(".")[0]
#             data["replayLogFile"] = fileName + "_" + str(replayNumber) + ".txt"

#             jsonFile.seek(0)  # rewind
#             json.dump(data, jsonFile)
        
        # creating Cityflow engine
        self.engine = cityflow.Engine(config_path, thread_num=1)
        
        #open cityflow config file into dict
        self.configDict = json.load(open(config_path))
        #open cityflow roadnet file into dict
        self.roadnetDict = json.load(open(self.configDict['dir'] + self.configDict['roadnetFile']))
        self.flowDict = json.load(open(self.configDict['dir'] + self.configDict['flowFile']))
        
        #steps per episode
#         self.steps_per_episode = episodeSteps
        self.isDone = False
        self.currStep = 0
        self.maxStep = episodeSteps
        self.minLighphaseTime = 0 #approximate realistic time
        self.maxLighphaseTime = 60 #approximate realistic time
        self.maxSpeed = self.flowDict[0]["vehicle"]["maxSpeed"]
        self.info = {}
        self.maxLightPhase = 0
        
        # create dict of controllable intersections and number of light phases
        self.intersections = {}
        for i in range(len(self.roadnetDict['intersections'])):
            # check if intersection is controllable
            if self.roadnetDict['intersections'][i]['virtual'] == False:
                currIntersection = self.roadnetDict['intersections'][i]
                
                # change maxLightPhase if needed for observationSpace definition
                if self.maxLightPhase < len(currIntersection['trafficLight']['lightphases']):
                    self.maxLightPhase = len(currIntersection['trafficLight']['lightphases'])
                
                # create incoming roadlink key that contains all incoming road lane id for each roadLink index
                roadLinkIntersectionDict = {}
                for roadLinkIndex, roadLink in enumerate(currIntersection["roadLinks"]):
                    startRoad = currIntersection["roadLinks"][roadLinkIndex]["startRoad"]
                    roadLinkSet = set()
                    for laneLinkIndex, laneLink in enumerate(currIntersection["roadLinks"][roadLinkIndex]["laneLinks"]):
                        tempLaneLink = startRoad + "_" + str(laneLink["startLaneIndex"])
                        roadLinkSet.add(tempLaneLink)
                    roadLinkIntersectionDict[roadLinkIndex] = roadLinkSet
                    
                # using the roadLinkIntersectionDict, we create the incoming road lanes and store it to each lightphase    
                lightPhaseRoadLaneIntersectionDict = {}
                for lightPhase in range(len(currIntersection['trafficLight']['lightphases'])):
                    availableRoadLinks = set()
                    for roadLinkIndex in currIntersection['trafficLight']['lightphases'][lightPhase]["availableRoadLinks"]:
                        availableRoadLinks.update(roadLinkIntersectionDict[roadLinkIndex])
                    lightPhaseRoadLaneIntersectionDict[lightPhase] = availableRoadLinks
                
#                 # for each roadLink in intersection store incoming lanes, outgoing lanes and direction in lists
#                 incomingLanes = []
#                 outgoingLanes = []
#                 directions = []
#                 for j in range(len(self.roadnetDict['intersections'][i]['roadLinks'])):
#                     incomingRoads = []
#                     outgoingRoads = []
#                     directions.append(self.roadnetDict['intersections'][i]['roadLinks'][j]['direction'])
#                     for k in range(len(self.roadnetDict['intersections'][i]['roadLinks'][j]['laneLinks'])):
#                         incomingRoads.append(self.roadnetDict['intersections'][i]['roadLinks'][j]['startRoad'] + 
#                                             '_' + 
#                                             str(self.roadnetDict['intersections'][i]['roadLinks'][j]['laneLinks'][k]['startLaneIndex']))
#                         outgoingRoads.append(self.roadnetDict['intersections'][i]['roadLinks'][j]['endRoad'] + 
#                                             '_' + 
#                                             str(self.roadnetDict['intersections'][i]['roadLinks'][j]['laneLinks'][k]['endLaneIndex']))
#                     incomingLanes.append(incomingRoads)
#                     outgoingLanes.append(outgoingRoads)

                # add intersection to dict where key = intersection_id
                # value = no of lightPhases, incoming lane names, outgoing lane names, directions for each lane group
                self.intersections[self.roadnetDict['intersections'][i]['id']] = { 
#                     "lightPhases" : len(self.roadnetDict['intersections'][i]['trafficLight']['lightphases']),
                    "lightPhases" : lightPhaseRoadLaneIntersectionDict,
                    "incomingRoadLinks" : roadLinkIntersectionDict
#                     "incomingLanes" : incomingLanes,
#                     "outgoingLanes" : outgoingLanes,
#                     "directions" : directions
                }
        
        #setup intersectionNames list for agent actions
        self.intersectionNames = []
        for key in self.intersections:
            self.intersectionNames.append(key)
        
        #define action space (num of lightPhases for each intersection) MultiDiscrete()
        actionSpaceArray = []
        upperBound = []
        for intersection in self.intersections:
#             lightphaseSpace = spaces.Discrete(len(self.intersections[intersection]["lightPhases"]))
#             lightphaseDurationSpace = spaces.Box(low = 0, high=60, shape=(1,), dtype=np.int32) # will +1 to get actual time
#             actionSpaceArray.append([lightphaseSpace.n, lightphaseDurationSpace.high[0]])

#             lightphaseSpace = spaces.Discrete(len(self.intersections[intersection]["lightPhases"]))
#             lightphaseDurationSpace = spaces.Box(low = 0, high=60, shape=(1,), dtype=np.int32) # will +1 to get actual time
#             actionSpaceArray.append([lightphaseSpace, lightphaseDurationSpace])

            
            upperBound.append(len(self.intersections[intersection]["lightPhases"]))
            upperBound.append(self.maxLighphaseTime)
#             lightphaseDurationSpace = self.minLighphaseTime = 0 #approximate realistic time
        

        self.action_space = spaces.Box(
            np.array([0 for space in range(len(upperBound))]).astype(np.int32),
            np.array(upperBound).astype(np.int32),
            dtype = np.int32
        )
        
        # define observation space
        '''
        (for 1 intersection) for 1 light phase = spaces.Dict({"numVehicles" : box(0, np.inf, (1,), dtype=np.int32),
                            "numWaitingVehicles" : box(0, np.inf, (1,), dtype=np.int32),
                             "avgSpeed" : box(0, np.inf, (1,), dtype=np.float32)})
                             
        (for 1 intersection) for n light phase = spaces.Dict({"numVehicles" : box(0, np.inf, (n,), dtype=np.int32),
                            "numWaitingVehicles" : box(0, np.inf, (1,), dtype=np.int32),
                             "avgSpeed" : box(0, np.inf, (n,), dtype=np.float32)})
                             
        (for n intersection) for n light phase = spaces.Dict({"intersection_1_1" : 
                                                    spaces.Dict({"numVehicles" : box(0, np.inf, (n,), dtype=np.int32),
                                                    "numWaitingVehicles" : box(0, np.inf, (1,), dtype=np.int32),
                                                     "avgSpeed" : box(0, np.inf, (n,), dtype=np.float32)})})
        '''
#         4th Iteration
        numOfIntersections = len(self.intersectionNames)
        observationSpace = {
                "numVehicles" : spaces.Box(0, np.inf, (numOfIntersections, self.maxLightPhase), dtype=np.int32),
                "numWaitingVehicles" : spaces.Box(0, np.inf, (numOfIntersections, self.maxLightPhase), dtype=np.int32),
                "avgSpeed" : spaces.Box(0, np.inf, (numOfIntersections, self.maxLightPhase), dtype=np.float32)

            }
        
        self.observation_space = spaces.Dict(observationSpace)
        
        
#         Applies for 3rd iteration
#         observationSpace = {}
#         for intersection in self.intersections:
#             numLightphases = len(self.intersections[intersection]["lightPhases"])
#             intersectionDict = spaces.Dict(
#                 {
#                     "numVehicles" : spaces.Box(0, np.inf, (numLightphases,), dtype=np.int32),
#                     "numWaitingVehicles" : spaces.Box(0, np.inf, (numLightphases,), dtype=np.int32),
#                     "avgSpeed" : spaces.Box(0, np.inf, (numLightphases,), dtype=np.float32)
                    
#                 }
#             )
#             observationSpace[intersection] = intersectionDict
        
#         self.observation_space = spaces.Dict(observationSpace)
        
        # create dictionary linking roadlane id to roadlinks for each intersection
#         for intersection in self.intersections:
#             roadLinkIntersectionDict = {}
#             for roadLinkIndex, roadLink in enumerate(self.intersections[intersection]["roadLinks"]):
#                 startRoad = self.intersections[intersection]["roadLinks"][roadLinkIndex]["startRoad"]
#                 roadLinkSet = set()
#                 for laneLinkIndex, laneLink in enumerate(self.intersections[intersection]["roadLinks"]["laneLinks"]):
#                     tempLaneLink = startRoad + "_" + str(laneLink["startLaneIndex"])
#                     roadLinkSet.add(tempLaneLink)
#                 roadLinkIntersectionDict[roadLinkIndex] = roadLinkSet
#             self.intersection[intersection]["incomingRoadLinks"] = roadLinkIntersectionDict
            
                
#         self.end_time = 3600
#         self.time_range = np.arange(self.start_time, self.end_time, self.step_len)
#         self.num_steps = len(self.time_range)
#         self.observation_space = gym.spaces.Box(
#             low=0, high=1, shape=(self.engine.get_num_of_intersections(), 2), dtype=np.float32)
#         self.action_space = gym.spaces.Discrete(self.engine.get_num_of_phases())
#         self.intersection_id = 0

    def step(self, action):
        
        # space.multidiscrete(tuple(space.discrete (lightPhase), space.box(duration of lightphase)) (for each intersection))
        actionArr = np.ndarray.tolist(action)
        minTimer = -1
        negRewards = False
        for intersection in range(len(self.intersectionNames)):
            
            # check if lightphase in intersection
            if actionArr[intersection*2] <= len(self.intersections[self.intersectionNames[intersection]]["lightPhases"]):
                self.engine.set_tl_phase(self.intersectionNames[intersection], int(actionArr[intersection*2]))
                if actionArr[1+intersection*2] < minTimer:
                    minTimer = intersection[1]
                    
            else:
                negRewards = True
#         for curr, intersection in enumerate(actionArr):
#             self.engine.set_tl_phase(self.intersectionNames[curr], intersection[0])
#             if intersection[1] < minTimer:
#                 minTimer = intersection[1]
                
        # let scenario run for minTimer long
        for second in range(minTimer):
            self.engine.next_step()
        
#         obs = np.concatenate((self.engine.get_lane_vehicle_count().reshape(-1, 1), self.engine.get_lane_speed().reshape(-1, 1)), axis=1) / 100
#         reward = self.engine.get_reward()
        obs = self.get_observation()
        reward = self.get_reward(obs, negRewards)
        self.isDone = self.currStep >= self.maxStep
        info = {}
        self.currStep += 1
        return obs, reward, self.isDone, info
    
    def get_observation(self):
        # get waiting vehicles for each lane first (key = laneID, value = numVehiclesWaiting)
        vehiclesWaitingByLaneDict = self.engine.get_lane_waiting_vehicle_count()
#         print(vehiclesWaitingByLaneDict)
        
        # get all vehicles speed (key = vehId, value = speed)
        vehiclesSpeedDict = self.engine.get_vehicle_speed()
#         print(vehiclesSpeedDict)
        
        # get all vehicles for each lane (key = laneId, value = [vehId])
        vehicleLaneDict = self.engine.get_lane_vehicles()
#         print(vehicleLaneDict)
        
        # create observation space for number of waiting vehicles and avgSpeed of moving+waiting vehicles of each lane
        # , for each lightphase
        
        # create for each roadlane first
        '''
        for roadLaneDict = {intersectionId : 
                                {roadLaneId: 
                                    {"numVehicles" : int,
                                     "numWaitingVehicles" : int,
                                     "avgSpeed" : float
                                                
                                    }
                
                                }
        
                            }
        '''
        roadLaneDict = {}
        for intersectionId, intersectionValue in self.intersections.items():
            roadLaneByIntersectionDict = {}
            
            # get a set of all incoming roadlanes for the intersection
            roadLaneByIntersectionSet = set()
            for roadLane in intersectionValue["incomingRoadLinks"].values():
                roadLaneByIntersectionSet.update(roadLane)
            
            # for each roadlane, find out num of waiting vehicles, and vehicles (waiting + nonwaiting) with speed
            for roadLane in roadLaneByIntersectionSet:
                tempRoadLaneDict = {}
                tempVehiclesArr = vehicleLaneDict[roadLane]
                tempRoadLaneDict["numVehicles"] = len(tempVehiclesArr)
                tempRoadLaneDict["numWaitingVehicles"] = vehiclesWaitingByLaneDict[roadLane]
                if len(tempVehiclesArr) == 0:
                    tempRoadLaneDict["avgSpeed"] = 0
                else:
                    tempAvgSpeed = 0
                    for vehicle in tempVehiclesArr:
                        tempAvgSpeed += vehiclesSpeedDict[vehicle]
                    tempRoadLaneDict["avgSpeed"] = tempAvgSpeed/len(tempVehiclesArr)
                
                roadLaneByIntersectionDict[roadLane] = tempRoadLaneDict
                
            # add intersection to roadLaneDict
            roadLaneDict[intersectionId] = roadLaneByIntersectionDict
        
        
        
        # define observation space
        '''
        1st iteration
        (for 1 intersection) for 1 light phase = spaces.Dict({"numVehicles" : box(0, np.inf, (1,), dtype=np.int32),
                            "numWaitingVehicles" : box(0, np.inf, (1,), dtype=np.int32),
                             "avgSpeed" : box(0, np.inf, (1,), dtype=np.float32)})
        2nd iteration                     
        (for 1 intersection) for n light phase = spaces.Dict({"numVehicles" : box(0, np.inf, (n,), dtype=np.int32),
                            "numWaitingVehicles" : box(0, np.inf, (n,), dtype=np.int32),
                             "avgSpeed" : box(0, np.inf, (n,), dtype=np.float32)})
        
        3rd iteration 
        (for n intersection) for n light phase = spaces.Dict({"intersection_1_1" : 
                                                    spaces.Dict({"numVehicles" : box(0, np.inf, (n,), dtype=np.int32),
                                                    "numWaitingVehicles" : box(0, np.inf, (n,), dtype=np.int32),
                                                     "avgSpeed" : box(0, np.inf, (n,), dtype=np.float32)})})
                                                     
        3rd definition of observation space does not work
        
        4th iteration
        (for m intersection) for n light phase = spaces.Dict({"numVehicles" : box(0, np.inf, (m,n), dtype=np.int32),
                                                    "numWaitingVehicles" : box(0, np.inf, (m,n), dtype=np.int32),
                                                     "avgSpeed" : box(0, np.inf, (m,n), dtype=np.float32)})})
                                                     
                                                     
        '''
        # observation for 4th iteration
        observationSpace = {
                "numVehicles" : [],
                "numWaitingVehicles" : [],             
                "avgSpeed" : []
            }
        
        # for each intersection 
        for intersectionId, intersectionValue in self.intersections.items():
            numVehiclesArr = []
            numWaitingVehiclesArr = []             
            avgSpeedArr = []
            
            # for each lightphase in the intersection
            for lightPhase, roadLaneArr in intersectionValue["lightPhases"].items():
                totalVehiclesByPhase = 0
                waitingVehiclesByPhase = 0
                totalSpeedByPhase = 0
                
                # for each roadLane (availableRoadLinks) in each lightphase
                for roadLaneId in roadLaneArr:
                    totalVehiclesByPhase += roadLaneDict[intersectionId][roadLaneId]["numVehicles"]
                    waitingVehiclesByPhase += roadLaneDict[intersectionId][roadLaneId]["numWaitingVehicles"]
                    totalSpeedByPhase += (roadLaneDict[intersectionId][roadLaneId]["numVehicles"] * 
                                          roadLaneDict[intersectionId][roadLaneId]["avgSpeed"])
                    
                # error checking if theres no vehicles in the lane to eliminate divide by zero error
                if totalVehiclesByPhase == 0:
                    avgSpeedByPhase = 0
                else:
                    avgSpeedByPhase = totalSpeedByPhase/totalVehiclesByPhase
                    
                numVehiclesArr.append(totalVehiclesByPhase)
                numWaitingVehiclesArr.append(waitingVehiclesByPhase)
                avgSpeedArr.append(avgSpeedByPhase)
                
            # convert to np array
            tempNumVehicles = np.array(numVehiclesArr)
            tempNumWaitingVehicles = np.array(numWaitingVehiclesArr)
            tempAvgSpeed = np.array(avgSpeedArr)
            
            observationSpace["numVehicles"].append(tempNumVehicles)
            observationSpace["numWaitingVehicles"].append(tempNumWaitingVehicles)
            observationSpace["avgSpeed"].append(tempAvgSpeed)
                
        # convert to np array
        observationSpace["numVehicles"] = np.array(observationSpace["numVehicles"])
        observationSpace["numWaitingVehicles"] = np.array(observationSpace["numWaitingVehicles"])
        observationSpace["avgSpeed"] = np.array(observationSpace["avgSpeed"])
        
#         print(observationSpace)
        return observationSpace
        
        
        # 3rd iteration (to be used for reference)
        # create obs dict to return function call
#         observationSpace = {}
        
#         # for each intersection 
#         for intersectionId, intersectionValue in self.intersections.items():
#             intersectionObsDict = {
#                 "numVehicles" : [],
#                 "numWaitingVehicles" : [],             
#                 "avgSpeed" : []
#             }
            
#             # for each lightphase in the intersection
#             for lightPhase, roadLaneArr in intersectionValue["lightPhases"].items():
#                 totalVehiclesByPhase = 0
#                 waitingVehiclesByPhase = 0
#                 totalSpeedByPhase = 0
                
#                 # for each roadLane (availableRoadLinks) in each lightphase
#                 for roadLaneId in roadLaneArr:
#                     totalVehiclesByPhase += roadLaneDict[intersectionId][roadLaneId]["numVehicles"]
#                     waitingVehiclesByPhase += roadLaneDict[intersectionId][roadLaneId]["numWaitingVehicles"]
#                     totalSpeedByPhase += (roadLaneDict[intersectionId][roadLaneId]["numVehicles"] * 
#                                           roadLaneDict[intersectionId][roadLaneId]["avgSpeed"])
                    
#                 # error checking if theres no vehicles in the lane to eliminate divide by zero error
#                 if totalVehiclesByPhase == 0:
#                     avgSpeedByPhase = 0
#                 else:
#                     avgSpeedByPhase = totalSpeedByPhase/totalVehiclesByPhase
                    
#                 intersectionObsDict["numVehicles"].append(totalVehiclesByPhase)
#                 intersectionObsDict["numWaitingVehicles"].append(waitingVehiclesByPhase)
#                 intersectionObsDict["avgSpeed"].append(avgSpeedByPhase)
                
#                 # convert to np array
#                 intersectionObsDict["numVehicles"] = np.array(intersectionObsDict["numVehicles"])
#                 ntersectionObsDict["numWaitingVehicles"] = np.array(intersectionObsDict["numWaitingVehicles"])
#                 ntersectionObsDict["avgSpeed"] = np.array(intersectionObsDict["avgSpeed"])
                
#             observationSpace[intersectionId] = intersectionObsDict
                        
#         return observationSpace
        
    def get_reward(self, observationSpace, negRewards):
        
        # if model returns lightphase that is not possible
        if negRewards == True:
            return -np.inf
        # aggregate total reward from all intersections
        totalReward = 0
        
        # numVehicles reward calc
        for intersection in observationSpace["numVehicles"]:
            for veh in intersection:
                
                totalReward -= veh
#                 print("veh", totalReward)
                
        # numWaitingVehicles reward calc
        for intersection in observationSpace["numWaitingVehicles"]:
            for waitingVeh in intersection:
                if waitingVeh == 0:
                        totalReward += 100
                else:
                    totalReward -= waitingVeh*100
#                 print("waitingVeh", totalReward)
                    
                        
        # avgSpeed reward calc
        for intersection in observationSpace["numWaitingVehicles"]:
            for speed in intersection:
                
                #reward if speed is
                if (speed-5) >= self.maxSpeed:
                    totalReward += 50
                else:
                    totalReward -= ((self.maxSpeed - speed)/self.maxSpeed)*50
#                 print("speed", totalReward)
        
            
        return totalReward/len(self.intersectionNames)
            
    def reset(self):
#         self.engine.reset()
        self.currStep = 0
        obs = self.get_observation()
#         print("obs", obs)
        self.isDone = False
        return obs
    

    def render(self, mode='human', close=False):
        pass

In [159]:
# cityflowEnv = DummyVecEnv([lambda: cityFlowGym("sample_data/jacob_config.json", 3600)])

In [160]:
cityflowEnv = cityFlowGym("sample_data/jacob_config.json", 3600)

In [161]:
log_path = os.path.join('Training', 'Logs')

In [162]:
model = PPO("MultiInputPolicy", cityflowEnv, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [163]:
model.learn(total_timesteps=1000000)

Logging to Training/Logs/PPO_8
-----------------------------
| time/              |      |
|    fps             | 1487 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 665           |
|    iterations           | 2             |
|    time_elapsed         | 6             |
|    total_timesteps      | 4096          |
| train/                  |               |
|    approx_kl            | 1.1109281e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.84         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.25e+07      |
|    n_upda

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 823           |
|    iterations           | 11            |
|    time_elapsed         | 27            |
|    total_timesteps      | 22528         |
| train/                  |               |
|    approx_kl            | 1.1670985e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.85         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.22e+07      |
|    n_updates            | 100           |
|    policy_gradient_loss | 0.000106      |
|    std                  | 1.01          |
|    value_loss           | 4.45e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 778           |
|    iterations           | 20            |
|    time_elapsed         | 52            |
|    total_timesteps      | 40960         |
| train/                  |               |
|    approx_kl            | 4.0279992e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.84         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 2.24e+07      |
|    n_updates            | 190           |
|    policy_gradient_loss | 0.000164      |
|    std                  | 1             |
|    value_loss           | 4.43e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 701           |
|    iterations           | 29            |
|    time_elapsed         | 84            |
|    total_timesteps      | 59392         |
| train/                  |               |
|    approx_kl            | 2.8982438e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.84         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 2.19e+07      |
|    n_updates            | 280           |
|    policy_gradient_loss | -0.000341     |
|    std                  | 0.999         |
|    value_loss           | 4.42e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 666           |
|    iterations           | 38            |
|    time_elapsed         | 116           |
|    total_timesteps      | 77824         |
| train/                  |               |
|    approx_kl            | 2.7823553e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.83         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.17e+07      |
|    n_updates            | 370           |
|    policy_gradient_loss | -0.000174     |
|    std                  | 0.997         |
|    value_loss           | 4.36e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 677           |
|    iterations           | 47            |
|    time_elapsed         | 142           |
|    total_timesteps      | 96256         |
| train/                  |               |
|    approx_kl            | 1.7998391e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.83         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.17e+07      |
|    n_updates            | 460           |
|    policy_gradient_loss | -1.69e-05     |
|    std                  | 0.995         |
|    value_loss           | 4.34e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 688           |
|    iterations           | 56            |
|    time_elapsed         | 166           |
|    total_timesteps      | 114688        |
| train/                  |               |
|    approx_kl            | 1.1635711e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.84         |
|    explained_variance   | 3.58e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.18e+07      |
|    n_updates            | 550           |
|    policy_gradient_loss | -7.93e-05     |
|    std                  | 1             |
|    value_loss           | 4.33e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 695           |
|    iterations           | 65            |
|    time_elapsed         | 191           |
|    total_timesteps      | 133120        |
| train/                  |               |
|    approx_kl            | 1.9824365e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.84         |
|    explained_variance   | -2.38e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 2.18e+07      |
|    n_updates            | 640           |
|    policy_gradient_loss | -1.1e-05      |
|    std                  | 1             |
|    value_loss           | 4.32e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 700           |
|    iterations           | 74            |
|    time_elapsed         | 216           |
|    total_timesteps      | 151552        |
| train/                  |               |
|    approx_kl            | 1.1699187e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.85         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.15e+07      |
|    n_updates            | 730           |
|    policy_gradient_loss | -7.2e-05      |
|    std                  | 1             |
|    value_loss           | 4.31e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 703           |
|    iterations           | 83            |
|    time_elapsed         | 241           |
|    total_timesteps      | 169984        |
| train/                  |               |
|    approx_kl            | 4.9580412e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.86         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 2.17e+07      |
|    n_updates            | 820           |
|    policy_gradient_loss | 4.9e-05       |
|    std                  | 1.01          |
|    value_loss           | 4.35e+07      |
-------------------------------------------
--------------------------------

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.6e+03      |
|    ep_rew_mean          | 1.44e+06     |
| time/                   |              |
|    fps                  | 705          |
|    iterations           | 92           |
|    time_elapsed         | 267          |
|    total_timesteps      | 188416       |
| train/                  |              |
|    approx_kl            | 6.390619e-07 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.86        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.19e+07     |
|    n_updates            | 910          |
|    policy_gradient_loss | -0.000122    |
|    std                  | 1.01         |
|    value_loss           | 4.33e+07     |
------------------------------------------
-------------------------------------------
| rollout/

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.6e+03     |
|    ep_rew_mean          | 1.44e+06    |
| time/                   |             |
|    fps                  | 700         |
|    iterations           | 101         |
|    time_elapsed         | 295         |
|    total_timesteps      | 206848      |
| train/                  |             |
|    approx_kl            | 9.47871e-06 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.86       |
|    explained_variance   | 1.79e-07    |
|    learning_rate        | 0.0003      |
|    loss                 | 2.11e+07    |
|    n_updates            | 1000        |
|    policy_gradient_loss | 0.000516    |
|    std                  | 1.01        |
|    value_loss           | 4.32e+07    |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.6e+03      |
|    ep_rew_mean          | 1.44e+06     |
| time/                   |              |
|    fps                  | 700          |
|    iterations           | 110          |
|    time_elapsed         | 321          |
|    total_timesteps      | 225280       |
| train/                  |              |
|    approx_kl            | 8.516072e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.86        |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 2.16e+07     |
|    n_updates            | 1090         |
|    policy_gradient_loss | -0.00179     |
|    std                  | 1.01         |
|    value_loss           | 4.31e+07     |
------------------------------------------
-------------------------------------------
| rollout/

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.6e+03      |
|    ep_rew_mean          | 1.44e+06     |
| time/                   |              |
|    fps                  | 700          |
|    iterations           | 119          |
|    time_elapsed         | 347          |
|    total_timesteps      | 243712       |
| train/                  |              |
|    approx_kl            | 7.786206e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.86        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.13e+07     |
|    n_updates            | 1180         |
|    policy_gradient_loss | -4.45e-05    |
|    std                  | 1.01         |
|    value_loss           | 4.24e+07     |
------------------------------------------
------------------------------------------
| rollout/ 

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 700           |
|    iterations           | 128           |
|    time_elapsed         | 374           |
|    total_timesteps      | 262144        |
| train/                  |               |
|    approx_kl            | 3.9742794e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.85         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 2.08e+07      |
|    n_updates            | 1270          |
|    policy_gradient_loss | -0.000327     |
|    std                  | 1.01          |
|    value_loss           | 4.23e+07      |
-------------------------------------------
--------------------------------

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.6e+03      |
|    ep_rew_mean          | 1.44e+06     |
| time/                   |              |
|    fps                  | 700          |
|    iterations           | 137          |
|    time_elapsed         | 400          |
|    total_timesteps      | 280576       |
| train/                  |              |
|    approx_kl            | 1.405098e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.86        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.12e+07     |
|    n_updates            | 1360         |
|    policy_gradient_loss | -0.000113    |
|    std                  | 1.01         |
|    value_loss           | 4.22e+07     |
------------------------------------------
----------------------------------------
| rollout/   

--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 3.6e+03        |
|    ep_rew_mean          | 1.44e+06       |
| time/                   |                |
|    fps                  | 700            |
|    iterations           | 146            |
|    time_elapsed         | 426            |
|    total_timesteps      | 299008         |
| train/                  |                |
|    approx_kl            | 1.47322135e-05 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    entropy_loss         | -2.86          |
|    explained_variance   | 0              |
|    learning_rate        | 0.0003         |
|    loss                 | 2.15e+07       |
|    n_updates            | 1450           |
|    policy_gradient_loss | -0.0001        |
|    std                  | 1.01           |
|    value_loss           | 4.21e+07       |
--------------------------------------------
----------

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.6e+03      |
|    ep_rew_mean          | 1.44e+06     |
| time/                   |              |
|    fps                  | 700          |
|    iterations           | 155          |
|    time_elapsed         | 452          |
|    total_timesteps      | 317440       |
| train/                  |              |
|    approx_kl            | 8.175848e-07 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.85        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.15e+07     |
|    n_updates            | 1540         |
|    policy_gradient_loss | 0.000144     |
|    std                  | 1.01         |
|    value_loss           | 4.25e+07     |
------------------------------------------
-------------------------------------------
| rollout/

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.6e+03      |
|    ep_rew_mean          | 1.44e+06     |
| time/                   |              |
|    fps                  | 701          |
|    iterations           | 164          |
|    time_elapsed         | 478          |
|    total_timesteps      | 335872       |
| train/                  |              |
|    approx_kl            | 8.332427e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.86        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.11e+07     |
|    n_updates            | 1630         |
|    policy_gradient_loss | -6.52e-05    |
|    std                  | 1.01         |
|    value_loss           | 4.24e+07     |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.6e+03      |
|    ep_rew_mean          | 1.44e+06     |
| time/                   |              |
|    fps                  | 701          |
|    iterations           | 173          |
|    time_elapsed         | 505          |
|    total_timesteps      | 354304       |
| train/                  |              |
|    approx_kl            | 8.859788e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.86        |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 2.11e+07     |
|    n_updates            | 1720         |
|    policy_gradient_loss | -0.000165    |
|    std                  | 1.01         |
|    value_loss           | 4.22e+07     |
------------------------------------------
-------------------------------------------
| rollout/

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.6e+03      |
|    ep_rew_mean          | 1.44e+06     |
| time/                   |              |
|    fps                  | 693          |
|    iterations           | 182          |
|    time_elapsed         | 537          |
|    total_timesteps      | 372736       |
| train/                  |              |
|    approx_kl            | 3.428242e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.86        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.12e+07     |
|    n_updates            | 1810         |
|    policy_gradient_loss | -0.00049     |
|    std                  | 1.01         |
|    value_loss           | 4.21e+07     |
------------------------------------------
------------------------------------------
| rollout/ 

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 680           |
|    iterations           | 191           |
|    time_elapsed         | 574           |
|    total_timesteps      | 391168        |
| train/                  |               |
|    approx_kl            | 2.7366652e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.85         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 1.97e+07      |
|    n_updates            | 1900          |
|    policy_gradient_loss | -0.000196     |
|    std                  | 1             |
|    value_loss           | 4.14e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 666           |
|    iterations           | 200           |
|    time_elapsed         | 614           |
|    total_timesteps      | 409600        |
| train/                  |               |
|    approx_kl            | 5.6434947e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.85         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 1.99e+07      |
|    n_updates            | 1990          |
|    policy_gradient_loss | -5.29e-05     |
|    std                  | 1.01          |
|    value_loss           | 4.13e+07      |
-------------------------------------------
--------------------------------

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.6e+03      |
|    ep_rew_mean          | 1.44e+06     |
| time/                   |              |
|    fps                  | 666          |
|    iterations           | 209          |
|    time_elapsed         | 642          |
|    total_timesteps      | 428032       |
| train/                  |              |
|    approx_kl            | 8.598785e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.83        |
|    explained_variance   | 1.79e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 2.05e+07     |
|    n_updates            | 2080         |
|    policy_gradient_loss | -6.64e-05    |
|    std                  | 0.996        |
|    value_loss           | 4.12e+07     |
------------------------------------------
-------------------------------------------
| rollout/

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 666           |
|    iterations           | 218           |
|    time_elapsed         | 669           |
|    total_timesteps      | 446464        |
| train/                  |               |
|    approx_kl            | 7.4661803e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.84         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.06e+07      |
|    n_updates            | 2170          |
|    policy_gradient_loss | -4.72e-05     |
|    std                  | 1             |
|    value_loss           | 4.11e+07      |
-------------------------------------------
--------------------------------

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.6e+03      |
|    ep_rew_mean          | 1.44e+06     |
| time/                   |              |
|    fps                  | 666          |
|    iterations           | 227          |
|    time_elapsed         | 697          |
|    total_timesteps      | 464896       |
| train/                  |              |
|    approx_kl            | 9.512849e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.84        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.08e+07     |
|    n_updates            | 2260         |
|    policy_gradient_loss | -8.31e-05    |
|    std                  | 0.999        |
|    value_loss           | 4.1e+07      |
------------------------------------------
-------------------------------------------
| rollout/

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 665           |
|    iterations           | 236           |
|    time_elapsed         | 726           |
|    total_timesteps      | 483328        |
| train/                  |               |
|    approx_kl            | 2.0907319e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.84         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 2.09e+07      |
|    n_updates            | 2350          |
|    policy_gradient_loss | 0.00297       |
|    std                  | 1             |
|    value_loss           | 4.14e+07      |
-------------------------------------------
--------------------------------

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.6e+03      |
|    ep_rew_mean          | 1.44e+06     |
| time/                   |              |
|    fps                  | 665          |
|    iterations           | 245          |
|    time_elapsed         | 754          |
|    total_timesteps      | 501760       |
| train/                  |              |
|    approx_kl            | 3.451656e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.84        |
|    explained_variance   | 1.79e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 2.09e+07     |
|    n_updates            | 2440         |
|    policy_gradient_loss | 7e-05        |
|    std                  | 1            |
|    value_loss           | 4.13e+07     |
------------------------------------------
------------------------------------------
| rollout/ 

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 664           |
|    iterations           | 254           |
|    time_elapsed         | 782           |
|    total_timesteps      | 520192        |
| train/                  |               |
|    approx_kl            | 2.9500807e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.84         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.03e+07      |
|    n_updates            | 2530          |
|    policy_gradient_loss | -6.96e-05     |
|    std                  | 1             |
|    value_loss           | 4.11e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 663           |
|    iterations           | 263           |
|    time_elapsed         | 811           |
|    total_timesteps      | 538624        |
| train/                  |               |
|    approx_kl            | 1.1855707e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.84         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2e+07         |
|    n_updates            | 2620          |
|    policy_gradient_loss | -0.000103     |
|    std                  | 1             |
|    value_loss           | 4.06e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 663           |
|    iterations           | 272           |
|    time_elapsed         | 839           |
|    total_timesteps      | 557056        |
| train/                  |               |
|    approx_kl            | 1.9490253e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.86         |
|    explained_variance   | 5.96e-08      |
|    learning_rate        | 0.0003        |
|    loss                 | 2e+07         |
|    n_updates            | 2710          |
|    policy_gradient_loss | -6.4e-06      |
|    std                  | 1.01          |
|    value_loss           | 4.03e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 662           |
|    iterations           | 281           |
|    time_elapsed         | 868           |
|    total_timesteps      | 575488        |
| train/                  |               |
|    approx_kl            | 3.4497934e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.86         |
|    explained_variance   | 1.79e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 1.99e+07      |
|    n_updates            | 2800          |
|    policy_gradient_loss | -2.28e-05     |
|    std                  | 1.01          |
|    value_loss           | 4.02e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 662           |
|    iterations           | 290           |
|    time_elapsed         | 896           |
|    total_timesteps      | 593920        |
| train/                  |               |
|    approx_kl            | 1.7659127e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.86         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.05e+07      |
|    n_updates            | 2890          |
|    policy_gradient_loss | -0.000114     |
|    std                  | 1.01          |
|    value_loss           | 4.01e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 662           |
|    iterations           | 299           |
|    time_elapsed         | 923           |
|    total_timesteps      | 612352        |
| train/                  |               |
|    approx_kl            | 1.3783225e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.87         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 1.99e+07      |
|    n_updates            | 2980          |
|    policy_gradient_loss | -8.65e-05     |
|    std                  | 1.02          |
|    value_loss           | 4e+07         |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 663           |
|    iterations           | 308           |
|    time_elapsed         | 951           |
|    total_timesteps      | 630784        |
| train/                  |               |
|    approx_kl            | 2.9312214e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.86         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 1.99e+07      |
|    n_updates            | 3070          |
|    policy_gradient_loss | 1.99e-06      |
|    std                  | 1.01          |
|    value_loss           | 4.04e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 663           |
|    iterations           | 317           |
|    time_elapsed         | 979           |
|    total_timesteps      | 649216        |
| train/                  |               |
|    approx_kl            | 4.5256456e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.87         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.04e+07      |
|    n_updates            | 3160          |
|    policy_gradient_loss | 0.000392      |
|    std                  | 1.02          |
|    value_loss           | 4.03e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 662           |
|    iterations           | 326           |
|    time_elapsed         | 1007          |
|    total_timesteps      | 667648        |
| train/                  |               |
|    approx_kl            | 1.3641838e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.88         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 2e+07         |
|    n_updates            | 3250          |
|    policy_gradient_loss | 2.24e-06      |
|    std                  | 1.02          |
|    value_loss           | 4.02e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 662           |
|    iterations           | 335           |
|    time_elapsed         | 1035          |
|    total_timesteps      | 686080        |
| train/                  |               |
|    approx_kl            | 4.6699424e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.86         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 2e+07         |
|    n_updates            | 3340          |
|    policy_gradient_loss | -0.000121     |
|    std                  | 1.01          |
|    value_loss           | 4.01e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 662           |
|    iterations           | 344           |
|    time_elapsed         | 1063          |
|    total_timesteps      | 704512        |
| train/                  |               |
|    approx_kl            | 1.2005097e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.88         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 1.99e+07      |
|    n_updates            | 3430          |
|    policy_gradient_loss | -9e-05        |
|    std                  | 1.02          |
|    value_loss           | 3.94e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 662           |
|    iterations           | 353           |
|    time_elapsed         | 1091          |
|    total_timesteps      | 722944        |
| train/                  |               |
|    approx_kl            | 4.6691275e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.88         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 2e+07         |
|    n_updates            | 3520          |
|    policy_gradient_loss | -4.99e-05     |
|    std                  | 1.02          |
|    value_loss           | 3.93e+07      |
-------------------------------------------
--------------------------------

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.6e+03      |
|    ep_rew_mean          | 1.44e+06     |
| time/                   |              |
|    fps                  | 661          |
|    iterations           | 362          |
|    time_elapsed         | 1121         |
|    total_timesteps      | 741376       |
| train/                  |              |
|    approx_kl            | 9.406591e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.87        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.94e+07     |
|    n_updates            | 3610         |
|    policy_gradient_loss | -6.61e-05    |
|    std                  | 1.01         |
|    value_loss           | 3.91e+07     |
------------------------------------------
-------------------------------------------
| rollout/

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 661           |
|    iterations           | 371           |
|    time_elapsed         | 1149          |
|    total_timesteps      | 759808        |
| train/                  |               |
|    approx_kl            | 5.0959934e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.87         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 1.97e+07      |
|    n_updates            | 3700          |
|    policy_gradient_loss | -5.15e-05     |
|    std                  | 1.02          |
|    value_loss           | 3.9e+07       |
-------------------------------------------
--------------------------------

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.6e+03      |
|    ep_rew_mean          | 1.44e+06     |
| time/                   |              |
|    fps                  | 661          |
|    iterations           | 380          |
|    time_elapsed         | 1176         |
|    total_timesteps      | 778240       |
| train/                  |              |
|    approx_kl            | 3.659079e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.87        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 1.94e+07     |
|    n_updates            | 3790         |
|    policy_gradient_loss | -2.03e-05    |
|    std                  | 1.02         |
|    value_loss           | 3.89e+07     |
------------------------------------------
-------------------------------------------
| rollout/

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 661           |
|    iterations           | 389           |
|    time_elapsed         | 1203          |
|    total_timesteps      | 796672        |
| train/                  |               |
|    approx_kl            | 2.4916953e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.87         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 1.95e+07      |
|    n_updates            | 3880          |
|    policy_gradient_loss | 6.38e-05      |
|    std                  | 1.02          |
|    value_loss           | 3.94e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 662           |
|    iterations           | 398           |
|    time_elapsed         | 1231          |
|    total_timesteps      | 815104        |
| train/                  |               |
|    approx_kl            | 1.5993137e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.87         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 1.96e+07      |
|    n_updates            | 3970          |
|    policy_gradient_loss | 0.000593      |
|    std                  | 1.02          |
|    value_loss           | 3.92e+07      |
-------------------------------------------
--------------------------------

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.6e+03      |
|    ep_rew_mean          | 1.44e+06     |
| time/                   |              |
|    fps                  | 662          |
|    iterations           | 407          |
|    time_elapsed         | 1258         |
|    total_timesteps      | 833536       |
| train/                  |              |
|    approx_kl            | 8.612202e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.87        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 1.98e+07     |
|    n_updates            | 4060         |
|    policy_gradient_loss | 3.68e-05     |
|    std                  | 1.02         |
|    value_loss           | 3.91e+07     |
------------------------------------------
------------------------------------------
| rollout/ 

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 662           |
|    iterations           | 416           |
|    time_elapsed         | 1286          |
|    total_timesteps      | 851968        |
| train/                  |               |
|    approx_kl            | 1.1080498e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.87         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 1.85e+07      |
|    n_updates            | 4150          |
|    policy_gradient_loss | -7.53e-05     |
|    std                  | 1.02          |
|    value_loss           | 3.84e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 662           |
|    iterations           | 425           |
|    time_elapsed         | 1314          |
|    total_timesteps      | 870400        |
| train/                  |               |
|    approx_kl            | 1.8940045e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.87         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 1.93e+07      |
|    n_updates            | 4240          |
|    policy_gradient_loss | -0.000143     |
|    std                  | 1.02          |
|    value_loss           | 3.83e+07      |
-------------------------------------------
--------------------------------

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.6e+03      |
|    ep_rew_mean          | 1.44e+06     |
| time/                   |              |
|    fps                  | 661          |
|    iterations           | 434          |
|    time_elapsed         | 1342         |
|    total_timesteps      | 888832       |
| train/                  |              |
|    approx_kl            | 5.429407e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.87        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.92e+07     |
|    n_updates            | 4330         |
|    policy_gradient_loss | -5.3e-05     |
|    std                  | 1.02         |
|    value_loss           | 3.82e+07     |
------------------------------------------
------------------------------------------
| rollout/ 

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 661           |
|    iterations           | 443           |
|    time_elapsed         | 1371          |
|    total_timesteps      | 907264        |
| train/                  |               |
|    approx_kl            | 1.3338635e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.87         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 1.94e+07      |
|    n_updates            | 4420          |
|    policy_gradient_loss | -8.38e-05     |
|    std                  | 1.02          |
|    value_loss           | 3.81e+07      |
-------------------------------------------
--------------------------------

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.6e+03     |
|    ep_rew_mean          | 1.44e+06    |
| time/                   |             |
|    fps                  | 661         |
|    iterations           | 452         |
|    time_elapsed         | 1399        |
|    total_timesteps      | 925696      |
| train/                  |             |
|    approx_kl            | 1.69079e-05 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.87       |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0003      |
|    loss                 | 1.89e+07    |
|    n_updates            | 4510        |
|    policy_gradient_loss | -0.000124   |
|    std                  | 1.02        |
|    value_loss           | 3.8e+07     |
-----------------------------------------
-------------------------------------------
| rollout/                |     

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.6e+03      |
|    ep_rew_mean          | 1.44e+06     |
| time/                   |              |
|    fps                  | 661          |
|    iterations           | 461          |
|    time_elapsed         | 1428         |
|    total_timesteps      | 944128       |
| train/                  |              |
|    approx_kl            | 7.578463e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.88        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.88e+07     |
|    n_updates            | 4600         |
|    policy_gradient_loss | 0.000111     |
|    std                  | 1.02         |
|    value_loss           | 3.84e+07     |
------------------------------------------
-------------------------------------------
| rollout/

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 660           |
|    iterations           | 470           |
|    time_elapsed         | 1457          |
|    total_timesteps      | 962560        |
| train/                  |               |
|    approx_kl            | 1.1038559e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.88         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 1.9e+07       |
|    n_updates            | 4690          |
|    policy_gradient_loss | 0.00051       |
|    std                  | 1.02          |
|    value_loss           | 3.83e+07      |
-------------------------------------------
--------------------------------

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.6e+03      |
|    ep_rew_mean          | 1.44e+06     |
| time/                   |              |
|    fps                  | 660          |
|    iterations           | 479          |
|    time_elapsed         | 1485         |
|    total_timesteps      | 980992       |
| train/                  |              |
|    approx_kl            | 8.416071e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.88        |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 1.92e+07     |
|    n_updates            | 4780         |
|    policy_gradient_loss | -0.00011     |
|    std                  | 1.02         |
|    value_loss           | 3.82e+07     |
------------------------------------------
-------------------------------------------
| rollout/

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | 1.44e+06      |
| time/                   |               |
|    fps                  | 660           |
|    iterations           | 488           |
|    time_elapsed         | 1513          |
|    total_timesteps      | 999424        |
| train/                  |               |
|    approx_kl            | 5.2695104e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.89         |
|    explained_variance   | -2.38e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 1.92e+07      |
|    n_updates            | 4870          |
|    policy_gradient_loss | 0.000642      |
|    std                  | 1.02          |
|    value_loss           | 3.81e+07      |
-------------------------------------------
--------------------------------

<stable_baselines3.ppo.ppo.PPO at 0x7f1d505f4e20>

In [154]:
episodes = 10
for episode in range(1, episodes+1):
    state = cityflowEnv.reset()
#     cityflowEnv.engine.reset()
    done = False
    score = 0
    step = 0
    
    while not done:
#         env.render()
        action = cityflowEnv.action_space.sample()
        n_state, reward, done, info = cityflowEnv.step(action)
#         print(n_state, reward, done, info)
        
#         print(reward)
#         avg_reward = 0
#         for intersection in reward:
#             avg_reward += intersection[1]
#         avg_reward /= len(reward)
        score += reward
        step +=1
#         break
    score /= step
        
    print('Episode:{} Score:{} with {} steps'.format(episode, score, step))

Episode:1 Score:400.0 with 3601 steps
Episode:2 Score:400.0 with 3601 steps
Episode:3 Score:400.0 with 3601 steps
Episode:4 Score:400.0 with 3601 steps
Episode:5 Score:400.0 with 3601 steps
Episode:6 Score:400.0 with 3601 steps
Episode:7 Score:400.0 with 3601 steps
Episode:8 Score:400.0 with 3601 steps
Episode:9 Score:400.0 with 3601 steps
Episode:10 Score:400.0 with 3601 steps


In [155]:
# It will check your custom environment and output additional warnings if needed
check_env(cityflowEnv)

AssertionError: Error while checking key=avgSpeed: The observation returned by the `reset()` method does not match the given observation space

In [63]:
# actionSpaceArray = []

# lightphaseSpace = spaces.Discrete(8)
# lightphaseDurationSpace = spaces.Box(low = 0, high=60, shape=(1,), dtype=np.float32)
# # intersection1 = gym.spaces.Tuple((lightphaseSpace.n, lightphaseDurationSpace[0]))
# actionSpaceArray.append([lightphaseSpace.n, lightphaseDurationSpace.high[0]])
# lightphaseSpace = spaces.Discrete(3)
# lightphaseDurationSpace = spaces.Box(low = 10, high=20, shape=(1,), dtype=np.int32)
# # intersection2 = gym.spaces.Tuple((lightphaseSpace.n, lightphaseDurationSpace[0]))
# actionSpaceArray.append([lightphaseSpace.n, lightphaseDurationSpace.high[0]])

# action_space = spaces.MultiDiscrete(actionSpaceArray)




# for intersection in self.intersections:
# lightphaseSpace = spaces.Discrete(8)
# lightphaseDurationSpace = spaces.Box(low = 10, high=60, shape=(1,), dtype=np.int32)
# actionSpaceArray.append([lightphaseSpace, lightphaseDurationSpace]) # tuple for each intersection (lightphase next, duration)
# lightphaseSpace = spaces.Discrete(3)
# lightphaseDurationSpace = spaces.Box(low = 10, high=20, shape=(1,), dtype=np.int32)
# actionSpaceArray.append([lightphaseSpace, lightphaseDurationSpace]) # tuple for each intersection (lightphase next, duration)

# actionSpaceArray.append([i for i in range(8)])
# actionSpaceArray.append([i for i in range(10,61)])
# actionSpaceArray.append([i for i in range(3)])
# actionSpaceArray.append([i for i in range(10,21)])
# action_space = spaces.MultiDiscrete(actionSpaceArray) # multidiscrete for all intersections

# lightphaseSpace = spaces.Discrete(8)
# lightphaseDurationSpace = spaces.Box(low = 10, high=60, shape=(1,), dtype=np.int32)
# actionSpaceArray.append(gym.spaces.MultiDiscrete([lightphaseSpace, lightphaseDurationSpace]))
# lightphaseSpace = spaces.Discrete(3)
# lightphaseDurationSpace = spaces.Box(low = 10, high=20, shape=(1,), dtype=np.int32)
# actionSpaceArray.append(gym.spaces.MultiDiscrete([lightphaseSpace, lightphaseDurationSpace]))

# action_space = spaces.MultiDiscrete(np.array(actionSpaceArray))

# action_space = spaces.MultiDiscrete(actionSpaceArray) # multidiscrete for all intersections


action_space = spaces.Box(
            np.array([0, 0, 0, 0]).astype(np.int32),
            np.array([8, 60, 3, 10]).astype(np.int32),
            dtype=np.int32
        )


In [64]:
# np.ndarray.tolist(action_space.sample())

[5, 1, 0, 5]

In [55]:
env = gym.make(
    "cityflow-v0",
    configPath="sample_data/jacob_config.json"
    ,episodeSteps=3600
)

In [56]:
env.getRoadnet()

Roadnet is from path: sample_data/jacob_flow.json


In [4]:
# vec_env = DummyVecEnv([lambda: env])

In [5]:
# Check action space
print(type(env.action_space))
print(type(env.observation_space))
print(env.action_space)

<class 'gym.spaces.multi_discrete.MultiDiscrete'>
<class 'gym.spaces.dict.Dict'>
MultiDiscrete([8])


In [6]:
print(env.action_space.sample())

[4]


In [7]:
print(env.actionSpaceArray)

[8]


In [14]:
print(env.intersections)

{'intersection_1_1': [[8], [['road_0_1_0_2', 'road_0_1_0_2', 'road_0_1_0_2', 'road_0_1_0_2', 'road_0_1_0_2', 'road_0_1_0_2', 'road_0_1_0_2', 'road_0_1_0_3', 'road_0_1_0_3', 'road_0_1_0_3', 'road_0_1_0_3', 'road_0_1_0_3', 'road_0_1_0_3', 'road_0_1_0_3', 'road_0_1_0_4', 'road_0_1_0_4', 'road_0_1_0_4', 'road_0_1_0_4', 'road_0_1_0_4', 'road_0_1_0_4', 'road_0_1_0_4'], ['road_0_1_0_0', 'road_0_1_0_0', 'road_0_1_0_0', 'road_0_1_0_0', 'road_0_1_0_0', 'road_0_1_0_0', 'road_0_1_0_0', 'road_0_1_0_1', 'road_0_1_0_1', 'road_0_1_0_1', 'road_0_1_0_1', 'road_0_1_0_1', 'road_0_1_0_1', 'road_0_1_0_1'], ['road_0_1_0_5', 'road_0_1_0_5', 'road_0_1_0_5', 'road_0_1_0_5', 'road_0_1_0_5', 'road_0_1_0_5', 'road_0_1_0_5', 'road_0_1_0_6', 'road_0_1_0_6', 'road_0_1_0_6', 'road_0_1_0_6', 'road_0_1_0_6', 'road_0_1_0_6', 'road_0_1_0_6'], ['road_1_0_1_5', 'road_1_0_1_5', 'road_1_0_1_5', 'road_1_0_1_5', 'road_1_0_1_5', 'road_1_0_1_5', 'road_1_0_1_5', 'road_1_0_1_6', 'road_1_0_1_6', 'road_1_0_1_6', 'road_1_0_1_6', 'road

In [24]:
print(env.intersections["intersection_1_1"][1])
print(len(env.intersections["intersection_1_1"][1]))

[['road_0_1_0_2', 'road_0_1_0_2', 'road_0_1_0_2', 'road_0_1_0_2', 'road_0_1_0_2', 'road_0_1_0_2', 'road_0_1_0_2', 'road_0_1_0_3', 'road_0_1_0_3', 'road_0_1_0_3', 'road_0_1_0_3', 'road_0_1_0_3', 'road_0_1_0_3', 'road_0_1_0_3', 'road_0_1_0_4', 'road_0_1_0_4', 'road_0_1_0_4', 'road_0_1_0_4', 'road_0_1_0_4', 'road_0_1_0_4', 'road_0_1_0_4'], ['road_0_1_0_0', 'road_0_1_0_0', 'road_0_1_0_0', 'road_0_1_0_0', 'road_0_1_0_0', 'road_0_1_0_0', 'road_0_1_0_0', 'road_0_1_0_1', 'road_0_1_0_1', 'road_0_1_0_1', 'road_0_1_0_1', 'road_0_1_0_1', 'road_0_1_0_1', 'road_0_1_0_1'], ['road_0_1_0_5', 'road_0_1_0_5', 'road_0_1_0_5', 'road_0_1_0_5', 'road_0_1_0_5', 'road_0_1_0_5', 'road_0_1_0_5', 'road_0_1_0_6', 'road_0_1_0_6', 'road_0_1_0_6', 'road_0_1_0_6', 'road_0_1_0_6', 'road_0_1_0_6', 'road_0_1_0_6'], ['road_1_0_1_5', 'road_1_0_1_5', 'road_1_0_1_5', 'road_1_0_1_5', 'road_1_0_1_5', 'road_1_0_1_5', 'road_1_0_1_5', 'road_1_0_1_6', 'road_1_0_1_6', 'road_1_0_1_6', 'road_1_0_1_6', 'road_1_0_1_6', 'road_1_0_1_6', 

In [32]:
print(env.intersections["intersection_1_1"][1][0])
print(len(env.intersections["intersection_1_1"][1][0]))
print()
print(env.intersections["intersection_1_1"][1][1])
print(len(env.intersections["intersection_1_1"][1][1]))
print()
print(env.intersections["intersection_1_1"][1][2])
print(len(env.intersections["intersection_1_1"][1][2]))

['road_0_1_0_2', 'road_0_1_0_2', 'road_0_1_0_2', 'road_0_1_0_2', 'road_0_1_0_2', 'road_0_1_0_2', 'road_0_1_0_2', 'road_0_1_0_3', 'road_0_1_0_3', 'road_0_1_0_3', 'road_0_1_0_3', 'road_0_1_0_3', 'road_0_1_0_3', 'road_0_1_0_3', 'road_0_1_0_4', 'road_0_1_0_4', 'road_0_1_0_4', 'road_0_1_0_4', 'road_0_1_0_4', 'road_0_1_0_4', 'road_0_1_0_4']
21

['road_0_1_0_0', 'road_0_1_0_0', 'road_0_1_0_0', 'road_0_1_0_0', 'road_0_1_0_0', 'road_0_1_0_0', 'road_0_1_0_0', 'road_0_1_0_1', 'road_0_1_0_1', 'road_0_1_0_1', 'road_0_1_0_1', 'road_0_1_0_1', 'road_0_1_0_1', 'road_0_1_0_1']
14

['road_0_1_0_5', 'road_0_1_0_5', 'road_0_1_0_5', 'road_0_1_0_5', 'road_0_1_0_5', 'road_0_1_0_5', 'road_0_1_0_5', 'road_0_1_0_6', 'road_0_1_0_6', 'road_0_1_0_6', 'road_0_1_0_6', 'road_0_1_0_6', 'road_0_1_0_6', 'road_0_1_0_6']
14


In [33]:
print(env.intersections["intersection_1_1"][2][0])
print(len(env.intersections["intersection_1_1"][2][0]))
print()
print(env.intersections["intersection_1_1"][2][1])
print(len(env.intersections["intersection_1_1"][2][1]))
print()
print(env.intersections["intersection_1_1"][2][2])
print(len(env.intersections["intersection_1_1"][2][2]))

['road_1_1_0_0', 'road_1_1_0_1', 'road_1_1_0_2', 'road_1_1_0_3', 'road_1_1_0_4', 'road_1_1_0_5', 'road_1_1_0_6', 'road_1_1_0_0', 'road_1_1_0_1', 'road_1_1_0_2', 'road_1_1_0_3', 'road_1_1_0_4', 'road_1_1_0_5', 'road_1_1_0_6', 'road_1_1_0_0', 'road_1_1_0_1', 'road_1_1_0_2', 'road_1_1_0_3', 'road_1_1_0_4', 'road_1_1_0_5', 'road_1_1_0_6']
21

['road_1_1_1_0', 'road_1_1_1_1', 'road_1_1_1_2', 'road_1_1_1_3', 'road_1_1_1_4', 'road_1_1_1_5', 'road_1_1_1_6', 'road_1_1_1_0', 'road_1_1_1_1', 'road_1_1_1_2', 'road_1_1_1_3', 'road_1_1_1_4', 'road_1_1_1_5', 'road_1_1_1_6']
14

['road_1_1_3_0', 'road_1_1_3_1', 'road_1_1_3_2', 'road_1_1_3_3', 'road_1_1_3_4', 'road_1_1_3_5', 'road_1_1_3_6', 'road_1_1_3_0', 'road_1_1_3_1', 'road_1_1_3_2', 'road_1_1_3_3', 'road_1_1_3_4', 'road_1_1_3_5', 'road_1_1_3_6']
14


In [22]:
print(env.intersections["intersection_1_1"][3])
print(len(env.intersections["intersection_1_1"][3]))

[0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]
12


In [29]:
env.flowDict

[{'vehicle': {'length': 5.0,
   'width': 2.0,
   'maxPosAcc': 2.0,
   'maxNegAcc': 4.5,
   'usualPosAcc': 2.0,
   'usualNegAcc': 4.5,
   'minGap': 2.5,
   'maxSpeed': 16.67,
   'headwayTime': 1.5},
  'route': ['road_0_1_0', 'road_1_1_0'],
  'interval': 5.0,
  'startTime': 0,
  'endTime': -1},
 {'vehicle': {'length': 5.0,
   'width': 2.0,
   'maxPosAcc': 2.0,
   'maxNegAcc': 4.5,
   'usualPosAcc': 2.0,
   'usualNegAcc': 4.5,
   'minGap': 2.5,
   'maxSpeed': 16.67,
   'headwayTime': 1.5},
  'route': ['road_2_1_2', 'road_1_1_2'],
  'interval': 5.0,
  'startTime': 0,
  'endTime': -1},
 {'vehicle': {'length': 5.0,
   'width': 2.0,
   'maxPosAcc': 2.0,
   'maxNegAcc': 4.5,
   'usualPosAcc': 2.0,
   'usualNegAcc': 4.5,
   'minGap': 2.5,
   'maxSpeed': 16.67,
   'headwayTime': 1.5},
  'route': ['road_1_0_1', 'road_1_1_1'],
  'interval': 5.0,
  'startTime': 0,
  'endTime': -1},
 {'vehicle': {'length': 5.0,
   'width': 2.0,
   'maxPosAcc': 2.0,
   'maxNegAcc': 4.5,
   'usualPosAcc': 2.0,
   'usu

In [30]:
len(env.flowDict)

12

In [None]:
for i in range(len(env.intersections["intersection_1_1"][1])):
    print("New i " + str(i))
    for j in range(len(env.intersections["intersection_1_1"][1][i])):
        print("Start Road " + env.intersections["intersection_1_1"][1][i][j] + 
              " End Road " + env.intersections["intersection_1_1"][2][i][j])

In [None]:
# # disable print temporarily
# # iterate environment a lttle bit to test env
# actionInterval = 10


# for i in range(10):
#     if i % actionInterval == 0:
#         testAction = []
#         for i in range(0, 16):
#             n = random.randint(0, 8)
#             testAction.append(n)
#     observation, reward, done, debug = env.step(action=testAction)
#     if done:
#         break

# observation, reward, done, debug = env.step(action=testAction)
# print(observation)
# print(reward)

# observation = env.reset()
# print(observation)

In [None]:
episodes = 10
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0
    step = 0
    
    while not done:
#         env.render()
        step += 1
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        
        
        print(reward)
        avg_reward = 0
        for intersection in reward:
            avg_reward += intersection[1]
        avg_reward /= len(reward)
        score += avg_reward
        
    print('Episode:{} Score:{}'.format(episode, score))

In [None]:
print(n_state)

In [11]:
cityflowEnv = Cityflow("sample_data/jacob_config.json", 3600)

In [6]:
log_path = os.path.join('Training', 'Logs')

In [9]:
model = PPO("MultiInputPolicy", cityflowEnv, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


TypeError: empty(): argument 'size' must be tuple of SymInts, but found element of type numpy.ndarray at pos 2

In [None]:
print(type(env.observation_space.spaces))

## Simulation of Junction

In [149]:
import cityflow
eng_conf = cityflow.Engine("sample_data/jacob_config.json", thread_num=1)

In [150]:
for i in range(10000):
    eng_conf.next_step()