In [1]:
import numpy as np
import os
os.environ["IMAGEIO_FFMPEG_EXE"] = "/usr/bin/ffmpeg"  # adjust path if different
import cv2
import matplotlib.pyplot as plt
import imageio
import torch
import torch.nn as nn
import torch.nn.functional as F

# ========== PyTorch Model ==========
class DrivingNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(8, 32)
        self.fc2 = nn.Linear(32, 32)
        self.out = nn.Linear(32, 2)  # [steering, throttle]

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return torch.tanh(self.out(x))

# ========== Environment ==========
class CustomTrackEnv:
    def __init__(self, path_file='path.npy', track_radius=0.2):
        self.centerline = np.load(path_file)
        self.centerline = self.centerline - np.mean(self.centerline, axis=0)
        self.centerline = self.centerline / np.max(np.abs(self.centerline))
        self.track_radius = track_radius
        self.center = np.mean(self.centerline, axis=0)
        self.reset()

    def reset(self):
        self.car_pos = np.copy(self.centerline[0])
        self.car_vel = 0.1
        self.car_angle = np.pi / 2
        self.prev_action = np.array([0.0, 0.0])  # steering, throttle
        self.done = False
        self.passed_checkpoints = np.zeros(len(self.centerline), dtype=bool)
        return self.get_observation()

    def is_on_track(self, point):
        return np.any(np.linalg.norm(self.centerline - point, axis=1) < self.track_radius)

    def get_observation(self):
        angles = [-np.pi/4, -np.pi/9, 0, np.pi/9, np.pi/4]
        max_dist = 3.0
        distances = []
        for offset in angles:
            direction = np.array([
                np.cos(self.car_angle + offset),
                np.sin(self.car_angle + offset)
            ])
            for d in np.linspace(0, max_dist, 30):
                probe = self.car_pos + d * direction
                if not self.is_on_track(probe):
                    distances.append(d)
                    break
            else:
                distances.append(max_dist)
        obs = np.array(distances + list(self.prev_action) + [self.car_vel], dtype=np.float32)
        return obs

    def step(self, action):
        steer, throttle = np.clip(action, -1, 1)
        self.prev_action = np.array([steer, throttle])
        self.car_angle += steer * 0.1
        self.car_vel = np.clip(self.car_vel + throttle * 0.01, 0.05, 0.2)
        forward = np.array([np.cos(self.car_angle), np.sin(self.car_angle)])
        self.car_pos += self.car_vel * forward

        if not self.is_on_track(self.car_pos):
            self.done = True
            return self.get_observation(), -10, self.done

        next_idx = self._closest_checkpoint_index()
        dir_to_next = self.centerline[next_idx] - self.car_pos
        dir_to_next /= np.linalg.norm(dir_to_next)
        directional_alignment = np.dot(dir_to_next, forward)

        for i, point in enumerate(self.centerline):
            if not self.passed_checkpoints[i] and np.linalg.norm(self.car_pos - point) < self.track_radius / 2:
                self.passed_checkpoints[i] = True

        r = self.car_pos - self.center
        cross_z = r[0] * forward[1] - r[1] * forward[0]
        rotation_reward = np.sign(cross_z) * 0.2

        obs = self.get_observation()
        progress_bonus = np.sum(self.passed_checkpoints) / len(self.centerline)
        reward = self.car_vel + 0.1 * directional_alignment + 2.0 * progress_bonus + rotation_reward
        return obs, reward, self.done

    def _closest_checkpoint_index(self):
        distances = np.linalg.norm(self.centerline - self.car_pos, axis=1)
        return np.argmin(distances)

    def render(self, title=None):
        fig, ax = plt.subplots()
        ax.plot(self.centerline[:, 0], self.centerline[:, 1], 'k--', label='Centerline')
        for p in self.centerline:
            circle = plt.Circle(p, self.track_radius, color='lightgray', alpha=0.3)
            ax.add_patch(circle)
        ax.plot(self.car_pos[0], self.car_pos[1], 'ro', label='Car')
        ax.plot(self.center[0], self.center[1], 'gx', label='Center of Mass')

        angles = [-np.pi/4, -np.pi/9, 0, np.pi/9, np.pi/4]
        obs = self.get_observation()
        for i, offset in enumerate(angles):
            d = obs[i]
            dir = np.array([np.cos(self.car_angle + offset), np.sin(self.car_angle + offset)])
            end = self.car_pos + dir * d
            ax.plot([self.car_pos[0], end[0]], [self.car_pos[1], end[1]], color='cyan')

        if title:
            ax.set_title(title)

        ax.set_xlim(-1.5, 1.5)
        ax.set_ylim(-1.5, 1.5)
        ax.set_aspect('equal')
        ax.axis('off')
        ax.legend(loc='upper right')
        fig.canvas.draw()
        image = np.frombuffer(fig.canvas.buffer_rgba(), dtype='uint8')
        image = image.reshape(fig.canvas.get_width_height()[::-1] + (4,))
        plt.close(fig)
        return image

# ========== Evolutionary Loop ==========
def mutate_torch(model, mutation_rate=0.05):
    new_model = DrivingNN()
    with torch.no_grad():
        for new_param, param in zip(new_model.parameters(), model.parameters()):
            new_param.copy_(param + mutation_rate * torch.randn_like(param))
    return new_model


if __name__ == '__main__':
    generations = 5
    population_size = 20
    mutation_rate = 0.15
    frames = []

    
    if os.path.exists("best_agent_weights.pt"):
        base_agent = torch.load("best_agent_weights.pt")
        agents = [mutate_torch(base_agent, mutation_rate) for _ in range(population_size)]
        print("Loaded previous agent and generated mutated population.")
    else:
        agents = [DrivingNN() for _ in range(population_size)]
        #agents = [torch.randn(2, input_size) for _ in range(population_size)]
    

    for gen in range(generations):
        scores = []
        for i, agent in enumerate(agents):
            env = CustomTrackEnv()
            obs = env.reset()
            total_reward = 0
            steps = 0
            while not env.done and steps < 200:
                obs_tensor = torch.tensor(obs, dtype=torch.float32)
                with torch.no_grad():
                    action = agent(obs_tensor).numpy()
                obs, reward, done = env.step(action)
                if i == 0:
                    frames.append(env.render(title=f"Generation {gen + 1}"))
                total_reward += reward
                steps += 1
            scores.append(total_reward)

        best_indices = np.argsort(scores)[-5:]
        best_agents = [agents[i] for i in best_indices]
        agents = []
        for parent in best_agents:
            for _ in range(population_size // len(best_agents)):
                agents.append(mutate_torch(parent, mutation_rate))

        print(f"Generation {gen + 1}, Best Score: {scores[best_indices[-1]]:.2f}")
        if scores[best_indices[-1]] > 2000:
            print('we got beyond 2000! finishing early.')
            break
    torch.save(best_agents[0], "best_agent_weights.pt")
    

    if frames:
        imageio.mimsave("race_learning_custom_direction.mp4", frames, fps=30, codec='libx264')

Loaded previous agent and generated mutated population.
Generation 1, Best Score: 340.60
Generation 2, Best Score: 344.42
Generation 3, Best Score: 338.90
Generation 4, Best Score: 339.13
Generation 5, Best Score: 337.90
