In [1]:
from __future__ import print_function

import gzip
import os
import pickle
import time

import pygame
import gymnasium as gym
import numpy as np

# --- Constants ---
DATA_DIR = 'data'
DATA_FILE = 'data.gzip'

# Pygame Screen Dimensions
GAME_WIDTH, GAME_HEIGHT = 96, 96   # Native CarRacing frame
SCALE_FACTOR = 5                   # Change this to taste
SCREEN_WIDTH = GAME_WIDTH * SCALE_FACTOR
SCREEN_HEIGHT = GAME_HEIGHT * SCALE_FACTOR
FPS = 30                           

# --- Globals (managed inside rollout) ---
agent_action = np.zeros(3, dtype=np.float32)  # [steer, gas, brake]
acceleration_pressed = False
restart_train = False
exit_train = False
pause_train = False


def rollout(env):
    global agent_action, restart_train, exit_train, pause_train, acceleration_pressed

    # — Initialize Pygame —
    pygame.init()
    screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT))
    pygame.display.set_caption("Car Racing (v3) - Manual Training")
    clock = pygame.time.Clock()
    font = pygame.font.Font(None, 36)

    # — Reset state & load old data —
    agent_action = np.zeros(env.action_space.shape[0], dtype=np.float32)
    acceleration_pressed = False
    exit_train = False
    pause_train = False
    restart_train = False

    observations_list_path = os.path.join(DATA_DIR, DATA_FILE)
    if os.path.exists(observations_list_path):
        try:
            with gzip.open(observations_list_path, 'rb') as f:
                observations = pickle.load(f)
            print(f"Loaded {len(observations)} existing observations.")
        except Exception:
            print("Corrupt data file—starting fresh.")
            observations = []
    else:
        observations = []

    # — Start first episode —
    current_obs, info = env.reset()
    total_reward = 0.0
    episode_count = 1
    running = True

    print("Manual data collection started.")
    print("Controls: ←/→ steer, ↑ gas, ↓ brake, Space pause, Enter restart, Esc exit")

    while running:
        # — Input handling —
        for evt in pygame.event.get():
            if evt.type == pygame.QUIT:
                exit_train = True
                running = False

            if evt.type == pygame.KEYDOWN:
                if evt.key == pygame.K_ESCAPE:
                    exit_train = True
                    running = False
                elif evt.key in (pygame.K_RETURN, pygame.K_KP_ENTER):
                    restart_train = True
                elif evt.key == pygame.K_SPACE:
                    pause_train = not pause_train
                    print("Paused" if pause_train else "Resumed")

                # actions
                if evt.key == pygame.K_UP:
                    acceleration_pressed = True
                    agent_action[1], agent_action[2] = 1.0, 0.0
                elif evt.key == pygame.K_DOWN:
                    agent_action[2] = 1.0
                elif evt.key == pygame.K_LEFT:
                    agent_action[0], agent_action[1] = -1.0, 0.0
                elif evt.key == pygame.K_RIGHT:
                    agent_action[0], agent_action[1] = +1.0, 0.0

            if evt.type == pygame.KEYUP:
                if evt.key == pygame.K_UP:
                    acceleration_pressed = False
                    agent_action[1] = 0.0
                elif evt.key == pygame.K_DOWN:
                    agent_action[2] = 0.0
                elif evt.key in (pygame.K_LEFT, pygame.K_RIGHT):
                    agent_action[0] = 0.0
                    agent_action[1] = 1.0 if acceleration_pressed else 0.0

        # — Pause screen —
        if pause_train:
            if current_obs is not None:
                surf = pygame.surfarray.make_surface(current_obs.transpose(1, 0, 2))
                up = pygame.transform.scale(surf, (SCREEN_WIDTH, SCREEN_HEIGHT))
                screen.blit(up, (0, 0))
            txt = font.render("Paused", True, (255, 255, 0))
            rect = txt.get_rect(center=(SCREEN_WIDTH//2, SCREEN_HEIGHT//2))
            screen.blit(txt, rect)
            pygame.display.flip()
            clock.tick(FPS)
            time.sleep(0.1)
            continue

        # — Step the env —
        action = agent_action.copy()
        if action[2] > 0:
            action[2] = 0.2  # fixed brake intensity

        prev = current_obs
        current_obs, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated

        observations.append((prev, action, current_obs, reward, done))
        total_reward += reward

        # — Restart logic —
        if restart_train:
            print(f"Episode {episode_count} restart. Reward so far: {total_reward:.2f}")
            restart_train = False
            current_obs, info = env.reset()
            total_reward = 0.0
            agent_action.fill(0)
            acceleration_pressed = False
            continue

        # — Episode done —
        if done:
            print(f"Episode {episode_count} done. Reward: {total_reward:.2f}. Obs: {len(observations)}")
            episode_count += 1
            total_reward = 0.0

            if episode_count > 20:
                print("20+ episodes reached; exiting.")
                exit_train = True
                running = False
            else:
                current_obs, info = env.reset()
                agent_action.fill(0)
                acceleration_pressed = False

        # — Render frame (nearest-neighbor) —
        if current_obs is not None:
            surf = pygame.surfarray.make_surface(current_obs.transpose(1, 0, 2))
            up = pygame.transform.scale(surf, (SCREEN_WIDTH, SCREEN_HEIGHT))
            screen.blit(up, (0, 0))
        pygame.display.flip()
        clock.tick(FPS)

    # — Save and cleanup —
    if observations:
        os.makedirs(DATA_DIR, exist_ok=True)
        print(f"Saving {len(observations)} observations to {observations_list_path}...")
        with gzip.open(observations_list_path, 'wb') as f:
            pickle.dump(observations, f)
        print("Save complete.")
    else:
        print("No observations to save.")
    env.close()
    pygame.quit()
    print("Session ended.")


env_id = 'CarRacing-v3'
try:
    env = gym.make(env_id, continuous=True)
except Exception as e:
    print(f"Error creating {env_id}: {e}")
    print("Install gymnasium[box2d] if needed.")
    exit(1)

print(f"Env: {env_id}")
print(f"Action space: {env.action_space}")
print(f"Obs space:   {env.observation_space}")
print(f"ACTIONS = {env.action_space.shape[0]}")
rollout(env)


pygame 2.6.1 (SDL 2.28.4, Python 3.10.16)
Hello from the pygame community. https://www.pygame.org/contribute.html
Env: CarRacing-v3
Action space: Box([-1.  0.  0.], 1.0, (3,), float32)
Obs space:   Box(0, 255, (96, 96, 3), uint8)
ACTIONS = 3
Loaded 500632 existing observations.
Manual data collection started.
Controls: ←/→ steer, ↑ gas, ↓ brake, Space pause, Enter restart, Esc exit
Saving 500702 observations to data/data.gzip...
Save complete.
Session ended.


In [2]:
import gzip
import os
import pickle
import random

from torchvision import transforms

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim


In [3]:
DATA_DIR = 'data'
DATA_FILE = 'data.gzip'
MODEL_FILE = 'model.pt'


BATCH_SIZE = 32  # mb size
EPOCHS = 30  # number of epochs
TRAIN_VAL_SPLIT = 0.85  # train/val ratio

# Set of all Actions
actions_set = [[0, 0, 0],  # no action
                     [-1, 0, 0],  # left
                     [1, 0, 0],  # right
                     [0, 1, 0],  # acceleration
                     [0, 0, 1], ]  # break

# transformations for training/testing
data_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Grayscale(1),
    transforms.Pad((12, 12, 12, 0)),
    transforms.CenterCrop(84),
    transforms.ToTensor(),
    transforms.Normalize((0,), (1,)),
])

In [4]:
def read_data():
    """Read the data generated by keyboard_agent.py"""
    with gzip.open(os.path.join(DATA_DIR, DATA_FILE), 'rb') as f:
        data = pickle.load(f)

    random.shuffle(data)

    states, actions, _, _, _ = map(np.array, zip(*data))

    act_classes = np.full((len(actions)), -1, dtype=int)
    for i, a in enumerate(actions_set):
        act_classes[np.all(actions == a, axis=1)] = i

    # drop unsupported actions
    states = np.array(states)
    states = states[act_classes != -1]
    act_classes = act_classes[act_classes != -1]

    for i, a in enumerate(actions_set):
        print("Actions of type {}: {}"
              .format(str(a), str(act_classes[act_classes == i].size)))

    print("Total transitions: " + str(len(act_classes)), act_classes[0], act_classes[1], act_classes[2])
    return states, act_classes

In [5]:
def create_datasets():

    class TensorDatasetTransforms(torch.utils.data.TensorDataset):

        def __init__(self, x, y):
            super().__init__(x, y)

        def __getitem__(self, index):
            tensor = data_transform(self.tensors[0][index])
            return (tensor,) + tuple(t[index] for t in self.tensors[1:])

    x, y = read_data()

    x = np.moveaxis(x, 3, 1)  # channel first (torch requirement)

    # train dataset
    x_train = x[:int(len(x) * TRAIN_VAL_SPLIT)]
    y_train = y[:int(len(y) * TRAIN_VAL_SPLIT)]

    train_set = TensorDatasetTransforms(
        torch.tensor(x_train),
        torch.tensor(y_train))

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=BATCH_SIZE,
                                               shuffle=True,
                                               num_workers=2)

    # test dataset
    x_val, y_val = x[int(len(x_train)):], y[int(len(y_train)):]

    val_set = TensorDatasetTransforms(
        torch.tensor(x_val),
        torch.tensor(y_val))

    val_loader = torch.utils.data.DataLoader(val_set,
                                             batch_size=BATCH_SIZE,
                                             shuffle=False,
                                             num_workers=2)
 
    return train_loader, val_loader

In [6]:

def create_ex_datasets():

    class TensorDatasetTransforms(torch.utils.data.TensorDataset):

        def __init__(self, x, y):
            super().__init__(x, y)

        def __getitem__(self, index):
            tensor = data_transform(self.tensors[0][index])
            return (tensor,) + tuple(t[index] for t in self.tensors[1:])

    x, y = read_data()

    x = np.moveaxis(x, 3, 1)
    x_ex = x[:2]
    y_ex = y[:2]

    ex_set = TensorDatasetTransforms(
        torch.tensor(x_ex),
        torch.tensor(y_ex))

    ex_loader = torch.utils.data.DataLoader(ex_set)

    return ex_loader

def Net():

    class Flatten(nn.Module):

        def forward(self, x):
            return x.view(x.size()[0], -1)

    model = torch.nn.Sequential(
        torch.nn.Conv2d(1, 32, 8, 4),
        torch.nn.BatchNorm2d(32),
        torch.nn.ELU(),
        torch.nn.Dropout2d(0.5),
        torch.nn.Conv2d(32, 64, 4, 2),
        torch.nn.BatchNorm2d(64),
        torch.nn.ELU(),
        torch.nn.Dropout2d(0.5),
        torch.nn.Conv2d(64, 64, 3, 1),
        torch.nn.ELU(),
        Flatten(),
        torch.nn.BatchNorm1d(64 * 7 * 7),
        torch.nn.Dropout(),
        torch.nn.Linear(64 * 7 * 7, 120),
        torch.nn.ELU(),
        torch.nn.BatchNorm1d(120),
        torch.nn.Dropout(),
        torch.nn.Linear(120, len(actions_set)),
    )

    return model


def train(model):
    """
    Training main method
    :param model: the network
    """
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters())
    train_loader, val_order = create_datasets()  # read datasets

    # train
    for epoch in range(EPOCHS):
        print('Epoch {}/{}'.format(epoch + 1, EPOCHS))
        train_epoch(model, loss_function, optimizer, train_loader)
        test(model, loss_function, val_order)

        # save model
        model_path = os.path.join(DATA_DIR, MODEL_FILE)
        torch.save(model.state_dict(), model_path)


def train_epoch(model, loss_function, optimizer, data_loader):
    """Train for a single epoch"""
    # set model to training mode
    model.train()

    current_loss = 0.0
    current_acc = 0

    # iterate over the training data
    for i, (inputs, labels) in enumerate(data_loader):

        # zero the parameter gradients
        optimizer.zero_grad()

        with torch.set_grad_enabled(True):
            # forward
            outputs = model(inputs)
            # print(outputs.size(), inputs.size())
            _, predictions = torch.max(outputs, 1)
            loss = loss_function(outputs, labels)

            # backward
            loss.backward()
            optimizer.step()

        # statistics
        current_loss += loss.item() * inputs.size(0)
        current_acc += torch.sum(predictions == labels.data)

    total_loss = current_loss / len(data_loader.dataset)
    total_acc = current_acc.double() / len(data_loader.dataset)

    print('Train Loss: {:.4f}; Accuracy: {:.4f}'.format(total_loss, total_acc))


def test(model, loss_function, data_loader):
    """Test over the whole dataset"""

    model.eval()  # set model in evaluation mode

    current_loss = 0.0
    current_acc = 0

    # iterate over the validation data
    for i, (inputs, labels) in enumerate(data_loader):
        # forward
        with torch.set_grad_enabled(False):
            outputs = model(inputs)
            _, predictions = torch.max(outputs, 1)
            loss = loss_function(outputs, labels)

        # statistics
        current_loss += loss.item() * inputs.size(0)
        current_acc += torch.sum(predictions == labels.data)

    total_loss = current_loss / len(data_loader.dataset)
    total_acc = current_acc.double() / len(data_loader.dataset)

    print('Test Loss: {:.4f}; Accuracy: {:.4f}'
          .format(total_loss, total_acc))


In [None]:
print('Training...')
m = Net()
m.eval()
train(m)
print('Training Done!')
x_ex = create_ex_datasets()

print('Outputs of Neural Network are as follows:')

for i, (input, label) in enumerate(x_ex):
    print("Example:",i+1)
    output = m(input)
    print(output.tolist()[0])

Training...
Actions of type [0, 0, 0]: 17387
Actions of type [-1, 0, 0]: 1660
Actions of type [1, 0, 0]: 583
Actions of type [0, 1, 0]: 370
Actions of type [0, 0, 1]: 0
Total transitions: 20000 0 0 0
Epoch 1/30
Train Loss: 0.7387; Accuracy: 0.7901
Test Loss: 0.3869; Accuracy: 0.8747
Epoch 2/30
Train Loss: 0.4181; Accuracy: 0.8707
Test Loss: 0.3718; Accuracy: 0.8760
Epoch 3/30
Train Loss: 0.3866; Accuracy: 0.8782
Test Loss: 0.3750; Accuracy: 0.8700
Epoch 4/30
Train Loss: 0.3787; Accuracy: 0.8768
Test Loss: 0.3615; Accuracy: 0.8763
Epoch 5/30
Train Loss: 0.3738; Accuracy: 0.8795
Test Loss: 0.3553; Accuracy: 0.8810
Epoch 6/30
Train Loss: 0.3686; Accuracy: 0.8805
Test Loss: 0.3568; Accuracy: 0.8797
Epoch 7/30
Train Loss: 0.3608; Accuracy: 0.8833
Test Loss: 0.3588; Accuracy: 0.8730
Epoch 8/30
Train Loss: 0.3585; Accuracy: 0.8837
Test Loss: 0.3566; Accuracy: 0.8760
Epoch 9/30
Train Loss: 0.3517; Accuracy: 0.8862
Test Loss: 0.3310; Accuracy: 0.8870
Epoch 10/30
Train Loss: 0.3403; Accuracy: 0.

In [3]:
import os
import numpy as np
import torch
import gymnasium as gym
import pygame


exit_test = False

In [5]:
# Cell 2: Define the model‐only driving loop (corrected)
def drive_with_model(model):
    env = gym.make("CarRacing-v3", render_mode="human")
    obs, _ = env.reset()            # obs is H×W×C uint8 already
    pygame.init()
    done = False

    while not done:
        # ——— Preprocess with exactly the same pipeline you used in training ———
        # data_transform will accept the H×W×C uint8 array, convert to PIL, etc.
        tensor = data_transform(obs)      # → torch.Tensor of shape (1,84,84), floats in [0,1]
        tensor = tensor.unsqueeze(0)      # → (1,1,84,84) batch dimension

        # ——— Forward pass ———
        with torch.no_grad():
            logits = model(tensor)
            choice = int(logits.argmax(dim=1).item())

            # (optional) sanity-check your network outputs:
            print("Logits:", logits.cpu().numpy().flatten())
            print("Chosen class index:", choice)

        # ——— Map to CarRacing action and step ———
        action = np.array(actions_set[choice], dtype=np.float32)

        # scale brake to be more gentle
        if action[2] > 0:
            action[2] = 0.3

        obs, _, terminated, truncated, _ = env.step(action)
        done = terminated or truncated
        env.render()

    env.close()
    pygame.quit()


In [6]:
DATA_DIR = 'data'
DATA_FILE = 'data.gzip'
MODEL_FILE = 'model.pt'

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Net().to(device)
checkpoint = torch.load(os.path.join(DATA_DIR, MODEL_FILE), weights_only=True)
model.load_state_dict(checkpoint)
model.eval()
print(f"Loaded policy from {MODEL_FILE} onto {device}.")

drive_with_model(model)

NameError: name 'Net' is not defined