In [None]:
# !pip uninstall tensorflow

## Import relevant libraries

In [1]:
# import gym_cityflow
# from gym_cityflow.envs import Cityflow
import gym
import sys
import os
import random
import cityflow
import numpy as np
import json

from gym import spaces
from stable_baselines3 import A2C
from stable_baselines3 import DQN
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_checker import check_env

## Create gym environment for Cityflow

In [2]:
'''
action space = num of lighPhases


                |
                |
                N
                |
                |
------W----[][][][][]-------E-------
                |
                |
                S
                |
                |


go straight (W/E), (N/S)
straight + right (N), (S), (E), (W)
right only (W/E), (N/S)

total 8 phases
################################################################

observation space = [for each intersection[for each phase [num of incoming vehicles for each phase, avg speed for that phase]]]

################################################################

needs to be a way to extract number of lightPhases for each intersection (done, roadnet file)

'''

'\naction space = num of lighPhases\n\n\n                |\n                |\n                N\n                |\n                |\n------W----[][][][][]-------E-------\n                |\n                |\n                S\n                |\n                |\n\n\ngo straight (W/E), (N/S)\nstraight + right (N), (S), (E), (W)\nright only (W/E), (N/S)\n\ntotal 8 phases\n################################################################\n\nobservation space = [for each intersection[for each phase [num of incoming vehicles for each phase, avg speed for that phase]]]\n\n################################################################\n\nneeds to be a way to extract number of lightPhases for each intersection (done, roadnet file)\n\n'

In [3]:
class cityFlowGym(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, config_path, episodeSteps):
        
        # edit config_path to change replayLogFile name for each step
#         with open(config_path, "r+") as jsonFile:
#             data = json.load(jsonFile)
#             fileName = data["replayLogFile"].split(".")[0]
#             data["replayLogFile"] = fileName + "_" + str(replayNumber) + ".txt"

#             jsonFile.seek(0)  # rewind
#             json.dump(data, jsonFile)
        
        # creating Cityflow engine
        self.engine = cityflow.Engine(config_path, thread_num=1)
        
        #open cityflow config file into dict
        self.configDict = json.load(open(config_path))
        #open cityflow roadnet file into dict
        self.roadnetDict = json.load(open(self.configDict['dir'] + self.configDict['roadnetFile']))
        self.flowDict = json.load(open(self.configDict['dir'] + self.configDict['flowFile']))
        
        #steps per episode
#         self.steps_per_episode = episodeSteps
        self.isDone = False
        self.currStep = 0
        self.maxStep = episodeSteps
        self.minLighphaseTime = 0 #approximate realistic time
        self.maxLighphaseTime = 60 #approximate realistic time
        self.maxSpeed = self.flowDict[0]["vehicle"]["maxSpeed"]
        self.info = {}
        self.maxLightPhase = 0
        
        # create dict of controllable intersections and number of light phases
        self.intersections = {}
        for i in range(len(self.roadnetDict['intersections'])):
            # check if intersection is controllable
            if self.roadnetDict['intersections'][i]['virtual'] == False:
                currIntersection = self.roadnetDict['intersections'][i]
                
                # change maxLightPhase if needed for observationSpace definition
                if self.maxLightPhase < len(currIntersection['trafficLight']['lightphases']):
                    self.maxLightPhase = len(currIntersection['trafficLight']['lightphases'])
                
                # create incoming roadlink key that contains all incoming road lane id for each roadLink index
                roadLinkIntersectionDict = {}
                for roadLinkIndex, roadLink in enumerate(currIntersection["roadLinks"]):
                    startRoad = currIntersection["roadLinks"][roadLinkIndex]["startRoad"]
                    roadLinkSet = set()
                    for laneLinkIndex, laneLink in enumerate(currIntersection["roadLinks"][roadLinkIndex]["laneLinks"]):
                        tempLaneLink = startRoad + "_" + str(laneLink["startLaneIndex"])
                        roadLinkSet.add(tempLaneLink)
                    roadLinkIntersectionDict[roadLinkIndex] = roadLinkSet
                    
                # using the roadLinkIntersectionDict, we create the incoming road lanes and store it to each lightphase    
                lightPhaseRoadLaneIntersectionDict = {}
                for lightPhase in range(len(currIntersection['trafficLight']['lightphases'])):
                    availableRoadLinks = set()
                    for roadLinkIndex in currIntersection['trafficLight']['lightphases'][lightPhase]["availableRoadLinks"]:
                        availableRoadLinks.update(roadLinkIntersectionDict[roadLinkIndex])
                    lightPhaseRoadLaneIntersectionDict[lightPhase] = availableRoadLinks

                # add intersection to dict where key = intersection_id
                # value = no of lightPhases, incoming lane names, outgoing lane names, directions for each lane group
                self.intersections[self.roadnetDict['intersections'][i]['id']] = { 
                    "lightPhases" : lightPhaseRoadLaneIntersectionDict,
                    "incomingRoadLinks" : roadLinkIntersectionDict
                }
        
        #setup intersectionNames list for agent actions
        self.intersectionNames = []
        for key in self.intersections:
            self.intersectionNames.append(key)
        
        #define action space (num of lightPhases for each intersection) MultiDiscrete()
        actionSpaceArray = []
        upperBound = []
        for intersection in self.intersections:
            upperBound.append(len(self.intersections[intersection]["lightPhases"]))
            upperBound.append(self.maxLighphaseTime)
#             lightphaseDurationSpace = self.minLighphaseTime = 0 #approximate realistic time
        

        self.action_space = spaces.Box(
            np.array([0 for space in range(len(upperBound))]).astype(np.int32),
            np.array(upperBound).astype(np.int32),
            dtype = np.int32
        )
        
        # define observation space
        numOfIntersections = len(self.intersectionNames)
        observationSpace = {
                "numVehicles" : spaces.Box(0, np.inf, (numOfIntersections, self.maxLightPhase), dtype=np.int32),
                "numWaitingVehicles" : spaces.Box(0, np.inf, (numOfIntersections, self.maxLightPhase), dtype=np.int32),
                "avgSpeed" : spaces.Box(0, np.inf, (numOfIntersections, self.maxLightPhase), dtype=np.float32)

            }
        
        self.observation_space = spaces.Dict(observationSpace)

    def step(self, action):
        actionArr = np.ndarray.tolist(action)
        minTimer = -1
        negRewards = False # true means set negative infinity if lightphase action given is not part of the maxlightphases
        for intersection in range(len(self.intersectionNames)):
            # check if lightphase in intersection
            if actionArr[intersection*2] <= len(self.intersections[self.intersectionNames[intersection]]["lightPhases"]):
                self.engine.set_tl_phase(self.intersectionNames[intersection], int(actionArr[intersection*2]))
                if actionArr[1+intersection*2] < minTimer:
                    minTimer = intersection[1]
                    
            else:
                negRewards = True
                
        # let scenario run for minTimer long
        if minTimer == -1:
            minTimer = 10
        
        for second in range(minTimer):
            self.engine.next_step()
        
        obs = self.get_observation()
        reward = self.get_reward(obs, negRewards)
        self.isDone = self.currStep >= self.maxStep
        info = {}
        self.currStep += 1
        return obs, reward, self.isDone, info
    
    def get_observation(self):
        # get waiting vehicles for each lane first (key = laneID, value = numVehiclesWaiting)
        vehiclesWaitingByLaneDict = self.engine.get_lane_waiting_vehicle_count()
#         print(vehiclesWaitingByLaneDict)
        
        # get all vehicles speed (key = vehId, value = speed)
        vehiclesSpeedDict = self.engine.get_vehicle_speed()
#         print(vehiclesSpeedDict)
        
        # get all vehicles for each lane (key = laneId, value = [vehId])
        vehicleLaneDict = self.engine.get_lane_vehicles()
#         print(vehicleLaneDict)
        
        # create observation space for number of waiting vehicles and avgSpeed of moving+waiting vehicles of each lane
        # , for each lightphase
        
        # create for each roadlane first
        '''
        for roadLaneDict = {intersectionId : 
                                {roadLaneId: 
                                    {"numVehicles" : int,
                                     "numWaitingVehicles" : int,
                                     "avgSpeed" : float
                                                
                                    }
                
                                }
        
                            }
        '''
        roadLaneDict = {}
        for intersectionId, intersectionValue in self.intersections.items():
            roadLaneByIntersectionDict = {}
            
            # get a set of all incoming roadlanes for the intersection
            roadLaneByIntersectionSet = set()
            for roadLane in intersectionValue["incomingRoadLinks"].values():
                roadLaneByIntersectionSet.update(roadLane)
            
            # for each roadlane, find out num of waiting vehicles, and vehicles (waiting + nonwaiting) with speed
            for roadLane in roadLaneByIntersectionSet:
                tempRoadLaneDict = {}
                tempVehiclesArr = vehicleLaneDict[roadLane]
                tempRoadLaneDict["numVehicles"] = len(tempVehiclesArr)
                tempRoadLaneDict["numWaitingVehicles"] = vehiclesWaitingByLaneDict[roadLane]
                if len(tempVehiclesArr) == 0:
                    tempRoadLaneDict["avgSpeed"] = 0
                else:
                    tempAvgSpeed = 0
                    for vehicle in tempVehiclesArr:
                        tempAvgSpeed += vehiclesSpeedDict[vehicle]
                    tempRoadLaneDict["avgSpeed"] = tempAvgSpeed/len(tempVehiclesArr)
                
                roadLaneByIntersectionDict[roadLane] = tempRoadLaneDict
                
            # add intersection to roadLaneDict
            roadLaneDict[intersectionId] = roadLaneByIntersectionDict
        
        
        
        # define observation space
        observationSpace = {
                "numVehicles" : [],
                "numWaitingVehicles" : [],             
                "avgSpeed" : []
            }
        
        # for each intersection 
        for intersectionId, intersectionValue in self.intersections.items():
            numVehiclesArr = []
            numWaitingVehiclesArr = []             
            avgSpeedArr = []
            
            # for each lightphase in the intersection
            for lightPhase, roadLaneArr in intersectionValue["lightPhases"].items():
                totalVehiclesByPhase = 0
                waitingVehiclesByPhase = 0
                totalSpeedByPhase = 0
                
                # for each roadLane (availableRoadLinks) in each lightphase
                for roadLaneId in roadLaneArr:
                    totalVehiclesByPhase += roadLaneDict[intersectionId][roadLaneId]["numVehicles"]
                    waitingVehiclesByPhase += roadLaneDict[intersectionId][roadLaneId]["numWaitingVehicles"]
                    totalSpeedByPhase += (roadLaneDict[intersectionId][roadLaneId]["numVehicles"] * 
                                          roadLaneDict[intersectionId][roadLaneId]["avgSpeed"])
                    
                # error checking if theres no vehicles in the lane to eliminate divide by zero error
                if totalVehiclesByPhase == 0:
                    avgSpeedByPhase = 0
                else:
                    avgSpeedByPhase = totalSpeedByPhase/totalVehiclesByPhase
                    
                numVehiclesArr.append(totalVehiclesByPhase)
                numWaitingVehiclesArr.append(waitingVehiclesByPhase)
                avgSpeedArr.append(avgSpeedByPhase)
                
            # convert to np array
            tempNumVehicles = np.array(numVehiclesArr)
            tempNumWaitingVehicles = np.array(numWaitingVehiclesArr)
            tempAvgSpeed = np.array(avgSpeedArr)
            
            observationSpace["numVehicles"].append(tempNumVehicles)
            observationSpace["numWaitingVehicles"].append(tempNumWaitingVehicles)
            observationSpace["avgSpeed"].append(tempAvgSpeed)
                
        # convert to np array
        observationSpace["numVehicles"] = np.array(observationSpace["numVehicles"])
        observationSpace["numWaitingVehicles"] = np.array(observationSpace["numWaitingVehicles"])
        observationSpace["avgSpeed"] = np.array(observationSpace["avgSpeed"])

        return observationSpace
        
    def get_reward(self, observationSpace, negRewards):
        
        # if model returns lightphase that is not possible
        if negRewards == True:
            return -np.inf
        # aggregate total reward from all intersections
        totalReward = 0
        
        # numVehicles reward calc
        for intersection in observationSpace["numVehicles"]:
            for veh in intersection:
                
                totalReward -= veh
#                 print("veh", totalReward)
                
        # numWaitingVehicles reward calc
        for intersection in observationSpace["numWaitingVehicles"]:
            for waitingVeh in intersection:
                if waitingVeh == 0:
                        totalReward += 100
                else:
                    totalReward -= waitingVeh*100
#                 print("waitingVeh", totalReward)
                    
                        
        # avgSpeed reward calc
        for intersection in observationSpace["numWaitingVehicles"]:
            for speed in intersection:
                
                #reward if speed is
                if (speed-5) >= self.maxSpeed:
                    totalReward += 50
                else:
                    totalReward -= ((self.maxSpeed - speed)/self.maxSpeed)*50
#                 print("speed", totalReward)
        
            
        return totalReward/len(self.intersectionNames)
            
    def reset(self):
#         self.engine.reset()
        self.currStep = 0
        obs = self.get_observation()
#         print("obs", obs)
        self.isDone = False
        return obs
    

    def render(self, mode='human', close=False):
        pass

In [4]:
# cityflowEnv = DummyVecEnv([lambda: cityFlowGym("sample_data/jacob_config.json", 3600)])

In [5]:
cityflowEnv = cityFlowGym("sample_data/jacob_config.json", 3600)

In [6]:
log_path = os.path.join('Training', 'Logs')

In [15]:
model = PPO("MultiInputPolicy", cityflowEnv, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [16]:
model.learn(total_timesteps=1000000)

Logging to Training/Logs/PPO_3
-----------------------------
| time/              |      |
|    fps             | 13   |
|    iterations      | 1    |
|    time_elapsed    | 152  |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.6e+03      |
|    ep_rew_mean          | -1.8e+08     |
| time/                   |              |
|    fps                  | 9            |
|    iterations           | 2            |
|    time_elapsed         | 449          |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 8.469215e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.84        |
|    explained_variance   | -5.96e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 3.48e+11     |
|    n_updates            | 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.6e+03      |
|    ep_rew_mean          | -1.81e+08    |
| time/                   |              |
|    fps                  | 1            |
|    iterations           | 11           |
|    time_elapsed         | 11801        |
|    total_timesteps      | 22528        |
| train/                  |              |
|    approx_kl            | 3.070454e-08 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.84        |
|    explained_variance   | 1.79e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 3.46e+11     |
|    n_updates            | 100          |
|    policy_gradient_loss | -5.81e-06    |
|    std                  | 1            |
|    value_loss           | 7.08e+11     |
------------------------------------------
-------------------------------------------
| rollout/

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | -1.81e+08     |
| time/                   |               |
|    fps                  | 1             |
|    iterations           | 20            |
|    time_elapsed         | 38200         |
|    total_timesteps      | 40960         |
| train/                  |               |
|    approx_kl            | 5.3085387e-07 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.84         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 3.52e+11      |
|    n_updates            | 190           |
|    policy_gradient_loss | -2.71e-05     |
|    std                  | 1             |
|    value_loss           | 7.05e+11      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 3.6e+03       |
|    ep_rew_mean          | -1.81e+08     |
| time/                   |               |
|    fps                  | 0             |
|    iterations           | 29            |
|    time_elapsed         | 74625         |
|    total_timesteps      | 59392         |
| train/                  |               |
|    approx_kl            | 4.9744267e-07 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.84         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 3.55e+11      |
|    n_updates            | 280           |
|    policy_gradient_loss | -2.35e-05     |
|    std                  | 1             |
|    value_loss           | 7.05e+11      |
-------------------------------------------
--------------------------------

KeyboardInterrupt: 

In [17]:
models_dir = "models/PPO"
model.save(f"{models_dir}")



In [None]:
episodes = 1
for episode in range(1, episodes+1):
    state = cityflowEnv.reset()
#     cityflowEnv.engine.reset()
    done = False
    score = 0
    step = 0
    
    while not done:
#         env.render()
        action = cityflowEnv.action_space.sample()
        n_state, reward, done, info = cityflowEnv.step(action)
        print("action", action)
        print("n_state", n_state)
        print("reward", reward)
        print("done", done)
        print("info", info)
        print("stepCount", cityflowEnv.currStep)
        score += reward
#         break
    score /= step
        
    print('Episode:{} Score:{} with {} steps'.format(episode, score, step))

In [46]:
state = cityflowEnv.reset()

In [9]:
action = cityflowEnv.action_space.sample()
print(action)

[ 3 53]


In [44]:
n_state, reward, done, info = cityflowEnv.step(action)

print("n_state", n_state)
print("reward", reward)
print("done", done)
print("info", info)
print("stepCount", cityflowEnv.currStep)

n_state {'numVehicles': array([[120,  16, 120,  16, 120,  16, 120,  16]]), 'numWaitingVehicles': array([[ 98,   0, 100,   0,  99,   0, 100,   0]]), 'avgSpeed': array([[ 2.33717185, 12.918325  ,  2.11277213, 12.918325  ,  2.18425764,
        12.918325  ,  2.04359524, 12.918325  ]])}
reward -39844.0
done False
info {}
stepCount 26


In [45]:
# print("n_state", n_state)
# print("reward", reward)
# print("done", done)
# print("info", info)
# print("stepCount", cityflowEnv.currStep)

In [None]:
# It will check your custom environment and output additional warnings if needed
check_env(cityflowEnv)

In [None]:
# actionSpaceArray = []

# lightphaseSpace = spaces.Discrete(8)
# lightphaseDurationSpace = spaces.Box(low = 0, high=60, shape=(1,), dtype=np.float32)
# # intersection1 = gym.spaces.Tuple((lightphaseSpace.n, lightphaseDurationSpace[0]))
# actionSpaceArray.append([lightphaseSpace.n, lightphaseDurationSpace.high[0]])
# lightphaseSpace = spaces.Discrete(3)
# lightphaseDurationSpace = spaces.Box(low = 10, high=20, shape=(1,), dtype=np.int32)
# # intersection2 = gym.spaces.Tuple((lightphaseSpace.n, lightphaseDurationSpace[0]))
# actionSpaceArray.append([lightphaseSpace.n, lightphaseDurationSpace.high[0]])

# action_space = spaces.MultiDiscrete(actionSpaceArray)




# for intersection in self.intersections:
# lightphaseSpace = spaces.Discrete(8)
# lightphaseDurationSpace = spaces.Box(low = 10, high=60, shape=(1,), dtype=np.int32)
# actionSpaceArray.append([lightphaseSpace, lightphaseDurationSpace]) # tuple for each intersection (lightphase next, duration)
# lightphaseSpace = spaces.Discrete(3)
# lightphaseDurationSpace = spaces.Box(low = 10, high=20, shape=(1,), dtype=np.int32)
# actionSpaceArray.append([lightphaseSpace, lightphaseDurationSpace]) # tuple for each intersection (lightphase next, duration)

# actionSpaceArray.append([i for i in range(8)])
# actionSpaceArray.append([i for i in range(10,61)])
# actionSpaceArray.append([i for i in range(3)])
# actionSpaceArray.append([i for i in range(10,21)])
# action_space = spaces.MultiDiscrete(actionSpaceArray) # multidiscrete for all intersections

# lightphaseSpace = spaces.Discrete(8)
# lightphaseDurationSpace = spaces.Box(low = 10, high=60, shape=(1,), dtype=np.int32)
# actionSpaceArray.append(gym.spaces.MultiDiscrete([lightphaseSpace, lightphaseDurationSpace]))
# lightphaseSpace = spaces.Discrete(3)
# lightphaseDurationSpace = spaces.Box(low = 10, high=20, shape=(1,), dtype=np.int32)
# actionSpaceArray.append(gym.spaces.MultiDiscrete([lightphaseSpace, lightphaseDurationSpace]))

# action_space = spaces.MultiDiscrete(np.array(actionSpaceArray))

# action_space = spaces.MultiDiscrete(actionSpaceArray) # multidiscrete for all intersections


action_space = spaces.Box(
            np.array([0, 0, 0, 0]).astype(np.int32),
            np.array([8, 60, 3, 10]).astype(np.int32),
            dtype=np.int32
        )


In [None]:
# np.ndarray.tolist(action_space.sample())

In [None]:
env = gym.make(
    "cityflow-v0",
    configPath="sample_data/jacob_config.json"
    ,episodeSteps=3600
)

In [None]:
env.getRoadnet()

In [None]:
# vec_env = DummyVecEnv([lambda: env])

In [None]:
# Check action space
print(type(env.action_space))
print(type(env.observation_space))
print(env.action_space)

In [None]:
print(env.action_space.sample())

In [None]:
print(env.actionSpaceArray)

In [None]:
print(env.intersections)

In [None]:
print(env.intersections["intersection_1_1"][1])
print(len(env.intersections["intersection_1_1"][1]))

In [None]:
print(env.intersections["intersection_1_1"][1][0])
print(len(env.intersections["intersection_1_1"][1][0]))
print()
print(env.intersections["intersection_1_1"][1][1])
print(len(env.intersections["intersection_1_1"][1][1]))
print()
print(env.intersections["intersection_1_1"][1][2])
print(len(env.intersections["intersection_1_1"][1][2]))

In [None]:
print(env.intersections["intersection_1_1"][2][0])
print(len(env.intersections["intersection_1_1"][2][0]))
print()
print(env.intersections["intersection_1_1"][2][1])
print(len(env.intersections["intersection_1_1"][2][1]))
print()
print(env.intersections["intersection_1_1"][2][2])
print(len(env.intersections["intersection_1_1"][2][2]))

In [None]:
print(env.intersections["intersection_1_1"][3])
print(len(env.intersections["intersection_1_1"][3]))

In [None]:
env.flowDict

In [None]:
len(env.flowDict)

In [None]:
for i in range(len(env.intersections["intersection_1_1"][1])):
    print("New i " + str(i))
    for j in range(len(env.intersections["intersection_1_1"][1][i])):
        print("Start Road " + env.intersections["intersection_1_1"][1][i][j] + 
              " End Road " + env.intersections["intersection_1_1"][2][i][j])

In [None]:
# # disable print temporarily
# # iterate environment a lttle bit to test env
# actionInterval = 10


# for i in range(10):
#     if i % actionInterval == 0:
#         testAction = []
#         for i in range(0, 16):
#             n = random.randint(0, 8)
#             testAction.append(n)
#     observation, reward, done, debug = env.step(action=testAction)
#     if done:
#         break

# observation, reward, done, debug = env.step(action=testAction)
# print(observation)
# print(reward)

# observation = env.reset()
# print(observation)

In [None]:
episodes = 10
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0
    step = 0
    
    while not done:
#         env.render()
        step += 1
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        
        
        print(reward)
        avg_reward = 0
        for intersection in reward:
            avg_reward += intersection[1]
        avg_reward /= len(reward)
        score += avg_reward
        
    print('Episode:{} Score:{}'.format(episode, score))

In [None]:
print(n_state)

In [None]:
cityflowEnv = Cityflow("sample_data/jacob_config.json", 3600)

In [None]:
log_path = os.path.join('Training', 'Logs')

In [None]:
model = PPO("MultiInputPolicy", cityflowEnv, verbose=1, tensorboard_log=log_path)

In [None]:
print(type(env.observation_space.spaces))

## Simulation of Junction

In [None]:
import cityflow
eng_conf = cityflow.Engine("sample_data/jacob_config.json", thread_num=1)

In [None]:
for i in range(10000):
    eng_conf.next_step()