In [1]:
%matplotlib widget

In [2]:
import matplotlib.pyplot as plt

# Using a Neural Network to drive a car through a racetrack

## Creating the environment

To have a neural network try and navigate through a racetrack we need to use a principle called "reinforced learning". Because it uses a rediculous amount of complicated math and algorithms we are using a library called stable_baselines3 to do train the network. However, it needs a specific environment to work with. Therefore we need to use a tool called Gymnasium to create so called action and observation-spaces as well as an environment function. 

In [3]:
import numpy as np
import os
from RacingSimulator_notebook import Simulator, Racetrack
from gymnasium import Env, spaces

class Racingenvironment(Env):
    metadata = {"render_modes": ["human"], "render_fps": 30}

#get the environment
    def __init__(self, simulator):
        super().__init__()
    
        #get our simulator
        self.simulator=simulator
    
        #now we create our spaces
        self.observation_space = spaces.Box(low=0.0, high=1.0, shape=(60,), dtype=np.float32) #0 to 1 represents the interval of possible distances
        self.action_space = spaces.Box(low=np.array([-1,-1]), high=np.array([1, 1]), dtype=np.float32) #first element of the array contains the velocity, second is the steering angle
    
    #make the reset function compatible
    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        
        obs = self.simulator.reset()
        return obs.astype(np.float32), {}
    
    #make the step function compatible
    def step(self, action):
        
        velocity = (float(action[0])+1)*50
        steering = float(action[1])*5
    
        obs, reward, done = self.simulator.step(velocity, steering)
    
        #some stuff Gym needs
        truncated = False
        terminated = done
        info = {}
    
        return obs.astype(np.float32), reward, terminated, truncated, info

Let's check if our environment works

In [4]:
from stable_baselines3.common.env_checker import check_env

#load tracks
track1=Racetrack(os.path.join(os.getcwd(), 'track1.json'), 500, 100, 0)
track2=Racetrack(os.path.join(os.getcwd(), 'track2.json'), 200, 375, 0)
track3=Racetrack(os.path.join(os.getcwd(), 'track3.json'), 400, 500, 0)

#load the simulator
simulator = Simulator([track1, track2, track3])

env = Racingenvironment(simulator)
check_env(env)

  if not hasattr(np, "object"):


## Creating and training the neural network

### SAC-agent
First we create a neural network to take in the current observations and calculate the optimal velocity and stearing angle. Our agent will be the "soft actor critic" and our neural network will be just a few layers of dense neurons.

In [5]:
from stable_baselines3 import PPO, SAC
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import DummyVecEnv

#the parameters for SAC-training
sac_steps = 250000

sac_lr = 0.0003
sac_buffer_size = 1000000
sac_learning_starts = 100
sac_batch_size = 256
sac_tau = 0.005
sac_gamma= 0.99
sac_train_freq = 1

sac_training = True

In [6]:
while sac_training:

    #create the neural network (policy)
    policy_kwargs = dict(net_arch=dict(
        pi=[128, 128, 64], #actor network
        vf=[256, 256, 128] #critic network
    ))
    
    #create a vectorenvironment, necessary for training with an agent on multiple environments
    vec_env = DummyVecEnv([lambda:Racingenvironment(simulator)])
    
    #create the model with the agent and network and train it
    model = SAC("MlpPolicy", vec_env, verbose=1, device="cpu")
    model.learn(total_timesteps=sac_steps) 
    
    #save the trained network
    model.save("SAC_250k_mult_tracks")
    
    del model
    sac_training = False

Using cpu device
---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 91       |
|    time_elapsed    | 3        |
|    total_timesteps | 313      |
| train/             |          |
|    actor_loss      | -6.87    |
|    critic_loss     | 152      |
|    ent_coef        | 0.939    |
|    ent_coef_loss   | -0.198   |
|    learning_rate   | 0.0003   |
|    n_updates       | 212      |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 76       |
|    time_elapsed    | 10       |
|    total_timesteps | 806      |
| train/             |          |
|    actor_loss      | -14.1    |
|    critic_loss     | 129      |
|    ent_coef        | 0.817    |
|    ent_coef_loss   | -0.586   |
|    learning_rate   | 0.0003   |
|    n_updates       | 705      |
---------------------------------
-------------------------------

### PPO-agent
We can also use a different agent.

In [7]:
#parameters for PPO-training
ppo_steps=250000

ppo_lr = 0.0003
ppo_n_steps = 2048
ppo_batchsize = 64
ppo_n_epochs = 10
ppo_gamma = 0.99
ppo_gae_lamda = 0.95
ppo_clip_range = 0.2

ppo_training = False

In [8]:
while ppo_training:

    #create the neural network (policy)
    policy_kwargs = dict(net_arch=dict(
        pi=[64,64], #actor network
        vf=[128,64] #critic network
    ))

    #create a vectorenvironment, necessary for training with an agent on multiple environments
    vec_env = DummyVecEnv([lambda:Racingenvironment(simulator)])

    #create the model with the agent and network and train it
    model = PPO("MlpPolicy", vec_env, verbose=1, device="cpu")
    model.learn(total_timesteps=ppo_steps)

    #save the trained network
    model.save("PPO_250k")

    del model
    ppo_training = False

## Let the trained neural network drive the car

Now we reload our trained network and use it to navigate through the track (hopefully) without crashing. But because notebook doesnt properly work with live plots, we will end the notebook here and switch to VS and use the following bit of code.

In [9]:
"""
import time
import os
from stable_baselines3 import SAC

from RacingSimulator import Racetrack, Simulator, LiveVisualizer
from environment import Racingenvironment

if __name__ == "__main__":
    #load the racetrack
    track=Racetrack(os.path.join(os.getcwd(), 'track1.json'), 500, 100, 0)

    #load the environment
    simulator = Simulator([track])
    env = Racingenvironment(simulator)

    #load the model
    model = SAC.load("SAC_test")
    obs, _ = env.reset()
    
    #load the visualizer
    visualizer = LiveVisualizer(simulator)

    done = truncated = False

    while True:
        #feed the observation-data into the neural network
        action, states = model.predict(obs, deterministic = True)#deterministic ensures that the most probable action is always taken

        #let's drive
        obs, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated
        visualizer.update(obs, reward, done)
        time.sleep(0.1)
        if terminated or truncated:
            simulator.reset()

"""

'\nimport time\nimport os\nfrom stable_baselines3 import SAC\n\nfrom RacingSimulator import Racetrack, Simulator, LiveVisualizer\nfrom environment import Racingenvironment\n\nif __name__ == "__main__":\n    #load the racetrack\n    track=Racetrack(os.path.join(os.getcwd(), \'track1.json\'), 500, 100, 0)\n\n    #load the environment\n    simulator = Simulator([track])\n    env = Racingenvironment(simulator)\n\n    #load the model\n    model = SAC.load("SAC_test")\n    obs, _ = env.reset()\n\n    #load the visualizer\n    visualizer = LiveVisualizer(simulator)\n\n    done = truncated = False\n\n    while True:\n        #feed the observation-data into the neural network\n        action, states = model.predict(obs, deterministic = True)#deterministic ensures that the most probable action is always taken\n\n        #let\'s drive\n        obs, reward, terminated, truncated, info = env.step(action)\n        done = terminated or truncated\n        visualizer.update(obs, reward, done)\n        