# CarRacing-v0

https://gymnasium.farama.org/environments/box2d/car_racing/

In [1]:
import os
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

In [2]:
import gymnasium as gym
env = gym.make("CarRacing-v2")

episodes = 5
for episode in range(1, episodes+1):
    observation, info = env.reset()
    terminated = False
    truncated = False
    score = 0
    
    while not terminated and not truncated:
        action = env.action_space.sample()
        observation, reward, terminated, truncated, info = env.step(action)
        score += reward
    print(f"Episode: {episode} Score: {score}")
env.close()

Episode: 1 Score: -35.691318327974834
Episode: 2 Score: -34.93150684931565
Episode: 3 Score: -35.27508090614943
Episode: 4 Score: -31.972789115646755
Episode: 5 Score: -38.11074918566834


In [3]:
env.close()

In [4]:
env.action_space 

Box([-1.  0.  0.], 1.0, (3,), float32)

In [5]:
env.action_space.sample()

array([0.41892081, 0.42488927, 0.60337555], dtype=float32)

In [6]:
env.observation_space

Box(0, 255, (96, 96, 3), uint8)

In [7]:
env.observation_space.sample()

array([[[106,  49, 132],
        [212,  58,  82],
        [161,  41, 211],
        ...,
        [ 71, 114, 207],
        [252, 174,  83],
        [150,  56,  56]],

       [[ 16, 207,  43],
        [ 59, 162,  29],
        [194,  63, 145],
        ...,
        [ 54, 192, 126],
        [139, 174,  84],
        [179, 115, 193]],

       [[149,  14,  99],
        [214, 140,  83],
        [104,  45,  76],
        ...,
        [ 41,  32, 161],
        [ 75, 226, 199],
        [233,  84,  72]],

       ...,

       [[136,   4, 241],
        [  4, 197, 184],
        [ 86, 201, 170],
        ...,
        [ 81, 118, 247],
        [145, 247,  33],
        [130,  23,  86]],

       [[239, 142,  43],
        [ 28, 202, 155],
        [159, 211,  19],
        ...,
        [145, 210, 164],
        [ 33, 228, 126],
        [  5,  48,  15]],

       [[153, 144,  44],
        [189, 225, 163],
        [246, 183,  66],
        ...,
        [160,  47, 241],
        [ 85, 100,  28],
        [ 79, 177,  31]]

## Training

In [8]:
log_path = os.path.join('Training', 'Logs_2') # Path: Training/Logs
log_path

'Training\\Logs_2'

In [9]:
env = gym.make("CarRacing-v2")
env = DummyVecEnv([lambda: env])
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env in a VecTransposeImage.


In [19]:
model.learn(total_timesteps=100000)

Logging to Training\Logs_2\PPO_2
-----------------------------
| time/              |      |
|    fps             | 15   |
|    iterations      | 1    |
|    time_elapsed    | 132  |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 13           |
|    iterations           | 2            |
|    time_elapsed         | 296          |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0043052514 |
|    clip_fraction        | 0.035        |
|    clip_range           | 0.2          |
|    entropy_loss         | -3.92        |
|    explained_variance   | 0.0802       |
|    learning_rate        | 0.0003       |
|    loss                 | 0.705        |
|    n_updates            | 110          |
|    policy_gradient_loss | -0.00194     |
|    std                  | 0.893        |
|    value_loss           

<stable_baselines3.ppo.ppo.PPO at 0x22ea5ec04c0>

In [20]:
PPO_Path = os.path.join('Training', 'Saved Models', 'PPO_Model_CarRacing')

In [21]:
model.save(PPO_Path)

In [13]:
del model

In [14]:
model = PPO.load(PPO_Path, env=env)

Wrapping the env in a VecTransposeImage.


## Testing and Evaluations

In [22]:
env = gym.make("CarRacing-v2", render_mode='human')
evaluate_policy(model, env, n_eval_episodes=10)
env.close()



KeyboardInterrupt: 

In [23]:
model.predict(observation)[0]

array([-0.6043579 ,  0.05329743,  0.        ], dtype=float32)

In [24]:
env = gym.make("CarRacing-v2", render_mode='human')

In [27]:
env.close()

In [26]:
episodes = 5
for episode in range(1, episodes+1):
    observation, info = env.reset()
    terminated = False
    truncated = False
    score = 0
    
    while not terminated and not truncated:
        action, _ = model.predict(observation)
        observation, reward, terminated, truncated, info = env.step(action)
        score += reward
    print(f"Episode: {episode} Score: {score}")
env.close()

KeyboardInterrupt: 

In [29]:
env = gym.make("CartPole-v1")

In [34]:
obs = env.reset()
obs = obs[0]
model.predict(obs)

(array(1, dtype=int64), None)

In [35]:
action, _ = model.predict(obs)

In [36]:
env.action_space.sample()

0

In [37]:
env.step(action)

(array([-0.03060195,  0.1461695 , -0.02986192, -0.29166082], dtype=float32),
 1.0,
 False,
 False,
 {})

In [39]:
training_log_path = os.path.join(log_path, 'PPO_4')

In [44]:
!tensorboard --logdir={training_log_path}

^C
