#### INFO
implementirani so okolje, prey, predator, rewards in decision logic, trace, observation 6 sosedov, animacija

niso vsi parametri kt so velikost zemljevida in hitrost enaki kt v articlu, ampak mislm da to nima veze. se pa lahko poprav.

passive in active forces se mi zdijo mau problematični. mi ni ratal implementirat. (v clanku opisan pod agent dynamics). ker aF in aR nist aimplementirala, tut decorative reward za laziness (−0.01|aF| − 0.1|aR) ni implementiran.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
import torch
import torch.nn as nn
import torch.optim as optim
from matplotlib.lines import Line2D
import random

In [2]:
class ReplayBuffer:
    def __init__(self, capacity=10000):
        self.buffer = []
        self.capacity = capacity

    def add(self, experience):
        if len(self.buffer) >= self.capacity:
            self.buffer.pop(0)
        self.buffer.append(experience)

    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)

In [3]:
class ActorNetwork(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(ActorNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.tanh(self.fc3(x))  # Actions are continuous, scaled between -1 and 1
        return x

In [4]:
class CriticNetwork(nn.Module):
    def __init__(self, input_dim):
        super(CriticNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [5]:
class PredatorPreyEnvironment:
    def __init__(self, width=100, height=100):
        self.width = width
        self.height = height
        self.agents = []
        
        # Replay buffers for both prey and predators
        self.replay_buffer_prey = ReplayBuffer(capacity=10000)
        self.replay_buffer_predator = ReplayBuffer(capacity=10000)
        
        # Actor and Critic networks (not being used for RL yet, but initialized)
        self.actor_prey = ActorNetwork(input_dim=4, output_dim=2)
        self.critic_prey = CriticNetwork(input_dim=6)
        self.optimizer_actor_prey = optim.Adam(self.actor_prey.parameters(), lr=0.001)
        self.optimizer_critic_prey = optim.Adam(self.critic_prey.parameters(), lr=0.001)


    def is_within_boundaries(self, position):
        """
        Check if a given position is within the environment boundaries.
        """
        x, y = position
        return 0 <= x < self.width and 0 <= y < self.height

    def apply_boundary_conditions(self, position):
        """
        Handle agent boundary interactions with toroidal wrapping.
        """
        x, y = position
        x = x % self.width
        y = y % self.height
        return np.array([x, y], dtype=float)


    def add_agent(self, agent):
        """
        Add an agent (prey or predator) to the environment.
        """
        self.agents.append(agent)

    def update(self):
        """
        Update the positions of all agents in the environment.
        Ensure agents do not overlap by adjusting their positions slightly if they get too close.
        """
        for agent in self.agents:
            agent.move()
            agent.position = self.apply_boundary_conditions(agent.position)

        # Ensure agents do not occupy the same position or overlap significantly
        for i, agent in enumerate(self.agents):
            for j, other_agent in enumerate(self.agents):
                if i != j:
                    distance = np.linalg.norm(agent.position - other_agent.position)
                    min_distance = 6.0 if agent.type == 'predator' or other_agent.type == 'predator' else 3.0  # Predators need more space
                    if distance < min_distance:  # Minimum allowable distance between agents
                        # Adjust the position slightly to avoid overlap
                        direction = agent.position - other_agent.position
                        if np.linalg.norm(direction) == 0:
                            direction = np.random.uniform(-1, 1, 2)  # Random direction if they are exactly on top
                        direction = direction / np.linalg.norm(direction)
                        agent.position += direction * (min_distance - distance) / 2  # Move agents slightly apart

    def adjust_positions_for_traces(self, positions):
        """
        Adjust positions to account for toroidal boundaries in traces.
        This prevents lines from being drawn across the plot when agents wrap around.
        """
        adjusted_positions = [positions[0]]
        for i in range(1, len(positions)):
            prev = adjusted_positions[-1]
            curr = positions[i].copy()
            dx = curr[0] - prev[0]
            dy = curr[1] - prev[1]

            # Adjust for wrapping in x-direction
            if dx > self.width / 2:
                curr[0] -= self.width
            elif dx < -self.width / 2:
                curr[0] += self.width

            # Adjust for wrapping in y-direction
            if dy > self.height / 2:
                curr[1] -= self.height
            elif dy < -self.height / 2:
                curr[1] += self.height

            adjusted_positions.append(curr)
        return np.array(adjusted_positions)

    def animate(self, steps=100):
        """
        Animate the simulation.
        """
        fig, ax = plt.subplots(figsize=(5, 5))
        ax.set_xlim(0, self.width)
        ax.set_ylim(0, self.height)
        ax.set_xticks([])
        ax.set_yticks([])


        # Initialize a dictionary to keep track of agent patches and traces
        agent_artists = {}

        # Initialize the agents' patches and traces
        for agent in self.agents:
            if agent.type == 'prey':
                triangle = plt.Polygon(self.get_agent_shape(agent), closed=True, color='b')
                line, = ax.plot([], [], color='b', alpha=0.2)
            elif agent.type == 'predator':
                triangle = plt.Polygon(self.get_agent_shape(agent), closed=True, color='orange', alpha=0.8)
                line, = ax.plot([], [], color='orange', alpha=0.2)
            agent_artists[agent] = {'patch': triangle, 'trace': line}
            ax.add_patch(triangle)

        def update(frame):
            self.update()

            # Remove artists for agents that are no longer in self.agents
            current_agents = set(self.agents)
            agents_to_remove = [agent for agent in list(agent_artists.keys()) if agent not in current_agents]
            for agent in agents_to_remove:
                artist = agent_artists.pop(agent)
                artist['patch'].remove()
                artist['trace'].remove()

            # Update positions and traces
            for agent in self.agents:
                # If new agents have been added, initialize their patches and traces
                if agent not in agent_artists:
                    if agent.type == 'prey':
                        triangle = plt.Polygon(self.get_agent_shape(agent), closed=True, color='b')
                        line, = ax.plot([], [], color='b', alpha=0.5)
                    elif agent.type == 'predator':
                        triangle = plt.Polygon(self.get_agent_shape(agent), closed=True, color='orange', alpha=0.8)
                        line, = ax.plot([], [], color='orange', alpha=0.5)
                    agent_artists[agent] = {'patch': triangle, 'trace': line}
                    ax.add_patch(triangle)

                artist = agent_artists[agent]
                patch = artist['patch']
                trace = artist['trace']

                patch.set_xy(self.get_agent_shape(agent))
                # Update the trace with the agent's position history
                positions = np.array(agent.position_history)
                # Adjust positions for toroidal wrapping
                positions = self.adjust_positions_for_traces(positions)
                trace.set_data(positions[:, 0], positions[:, 1])

            return []

        ani = animation.FuncAnimation(fig, update, frames=steps, blit=False, interval=50, repeat=False)
        plt.close(fig)  # Close the figure to prevent additional static display
        return HTML(ani.to_jshtml())



    def get_agent_shape(self, agent):
        """
        Return the vertices of a triangle representing the agent's orientation.
        """
        direction = np.array([1.0, 0.0])  # Default direction
        if hasattr(agent, 'direction'):
            direction = agent.direction
        direction = direction / np.linalg.norm(direction)
        perp_direction = np.array([-direction[1], direction[0]])
        size = 4 if agent.type == 'predator' else 2  # Predators are significantly bigger
        front = agent.position + direction * size  # Length of the triangle
        left = agent.position - direction * 1 + perp_direction * 1  # Triangle base left
        right = agent.position - direction * 1 - perp_direction * 1  # Triangle base right
        return [front, left, right]

    def train(self, batch_size=32):
        """
        Train the actor and critic networks using experiences from replay buffers.
        """
        

In [7]:
class Prey:
    def __init__(self, position, speed=1.5, mass=1.0, max_acceleration=1.0, max_angular_velocity=0.5):
        self.position = np.array(position, dtype=float)
        self.speed = speed
        self.mass = mass
        self.type = 'prey'
        self.direction = np.random.uniform(-1, 1, 2)
        if np.linalg.norm(self.direction) == 0:
            self.direction = np.array([1.0, 0.0])
        else:
            self.direction = self.direction / np.linalg.norm(self.direction)
        self.acceleration = np.zeros(2)
        self.max_acceleration = max_acceleration
        self.max_angular_velocity = max_angular_velocity
        self.heading_angle = np.arctan2(self.direction[1], self.direction[0])
        self.position_history = [self.position.copy()]

    def apply_fleeing(self, nearest_predator):
        """Adjust direction to flee from the nearest predator."""
        if nearest_predator is not None:
            direction_away = self.position - nearest_predator.position
            norm = np.linalg.norm(direction_away)
            if norm > 0:
                self.direction = direction_away / norm
            else:
                self.direction = np.random.uniform(-1, 1, 2)
                self.direction /= np.linalg.norm(self.direction)
            self.heading_angle = np.arctan2(self.direction[1], self.direction[0])

    def get_observation(self, perception_radius=15.0, max_allies=6, max_adversaries=6):
        """
        Compute the observation vector for the agent.
        Includes metric and topological constraints:
        - Perceives agents within a certain radius (metric).
        - At most 6 allies and 6 adversaries (topological).
        """
        allies = []
        adversaries = []

        for agent in env.agents:
            if agent is self:
                continue

            distance = np.linalg.norm(agent.position - self.position)

            # Check if within perception radius
            if distance > perception_radius:
                continue

            # Determine if the agent is an ally or adversary
            if self.type == agent.type:
                allies.append((distance, agent))
            else:
                adversaries.append((distance, agent))

        # Sort allies and adversaries by distance and take the closest 6
        allies = sorted(allies, key=lambda x: x[0])[:max_allies]
        adversaries = sorted(adversaries, key=lambda x: x[0])[:max_adversaries]

        # Prepare the observation vector
        observation = []

        # Add relative positions and headings of allies
        for _, ally in allies:
            relative_position = ally.position - self.position
            relative_heading = ally.direction - self.direction
            observation.extend([relative_position[0], relative_position[1], relative_heading[0], relative_heading[1]])

        # Add relative positions and headings of adversaries
        for _, adversary in adversaries:
            relative_position = adversary.position - self.position
            relative_heading = adversary.direction - self.direction
            observation.extend([relative_position[0], relative_position[1], relative_heading[0], relative_heading[1]])

        # Fill with zeros if fewer than 6 allies or adversaries
        while len(observation) < (max_allies + max_adversaries) * 4:
            observation.extend([0, 0, 0, 0])

        # Add the agent's own state (position, direction, and speed)
        observation.extend([
            self.position[0], self.position[1],
            self.direction[0], self.direction[1],
            self.speed
        ])

        return np.array(observation, dtype=float)

    def find_nearest_predator(self):
        """Find the nearest predator within perception range."""
        nearest_predator = None
        min_distance = float('inf')
        for agent in env.agents:
            if agent.type == 'predator':
                distance = np.linalg.norm(agent.position - self.position)
                if distance < min_distance and distance <= 15.0:  # Perception radius
                    nearest_predator = agent
                    min_distance = distance
        return nearest_predator

    def move(self):
        """Move the prey based on fleeing from the nearest predator."""
        # Find the nearest predator
        nearest_predator = self.find_nearest_predator()

        if nearest_predator is not None:
            self.apply_fleeing(nearest_predator)
        else:
            # If no predator is nearby, continue in the current direction or apply slight random turning
            random_turn = np.random.uniform(-self.max_angular_velocity, self.max_angular_velocity)
            self.heading_angle += random_turn
            self.direction = np.array([np.cos(self.heading_angle), np.sin(self.heading_angle)])
            norm = np.linalg.norm(self.direction)
            if norm > 0:
                self.direction /= norm
            else:
                self.direction = np.array([1.0, 0.0])

        # Update position
        self.position += self.direction * self.speed
        self.position = self.position % np.array([env.width, env.height])  # Apply toroidal wrapping
        self.position_history.append(self.position.copy())
        if len(self.position_history) > 5:
            self.position_history.pop(0)

        # Check if being touched by any predator
        being_touched = False
        for agent in env.agents:
            if agent.type == 'predator':
                distance = np.linalg.norm(agent.position - self.position)
                if distance < 2.0:
                    being_touched = True
                    break

        if being_touched:
            reward = -1.0
        else:
            reward = 0.0

        # Add movement penalties
        # Since movement is deterministic towards/away, we can approximate acceleration
        # Here, we'll assume a small acceleration based on direction change
        # For simplicity, set aF and aR to fixed small values
        aF = 0.0  # Not used in this deterministic movement
        aR = 0.0  # Not used as direction is set directly
        reward += -0.01 * abs(aF) - 0.1 * abs(aR)

        # Get the observation vector
        observation = self.get_observation()

        # Store experience with rewards
        state = observation
        action = self.direction * self.speed  # Placeholder action representation
        next_state = self.get_observation()

        env.replay_buffer_prey.add((state, action, reward, next_state))


In [8]:
class Predator:
    def __init__(self, position, speed=2.0, mass=1.0, max_acceleration=1.0, max_angular_velocity=0.5):
        self.position = np.array(position, dtype=float)
        self.speed = speed
        self.mass = mass
        self.type = 'predator'
        self.direction = np.random.uniform(-1, 1, 2)
        if np.linalg.norm(self.direction) == 0:
            self.direction = np.array([1.0, 0.0])
        else:
            self.direction = self.direction / np.linalg.norm(self.direction)
        self.acceleration = np.zeros(2)
        self.max_acceleration = max_acceleration
        self.max_angular_velocity = max_angular_velocity
        self.heading_angle = np.arctan2(self.direction[1], self.direction[0])
        self.position_history = [self.position.copy()]

    def apply_chasing(self, target_prey):
        """Adjust direction to chase the target prey."""
        if target_prey is not None:
            direction_to_prey = target_prey.position - self.position
            norm = np.linalg.norm(direction_to_prey)
            if norm > 0:
                self.direction = direction_to_prey / norm
            else:
                self.direction = np.random.uniform(-1, 1, 2)
                self.direction /= np.linalg.norm(self.direction)
            self.heading_angle = np.arctan2(self.direction[1], self.direction[0])

    def get_observation(self, perception_radius=15.0, max_allies=6, max_adversaries=6):
        """
        Compute the observation vector for the agent.
        Includes metric and topological constraints:
        - Perceives agents within a certain radius (metric).
        - At most 6 allies and 6 adversaries (topological).
        """
        allies = []
        adversaries = []

        for agent in env.agents:
            if agent is self:
                continue

            distance = np.linalg.norm(agent.position - self.position)

            # Check if within perception radius
            if distance > perception_radius:
                continue

            # Determine if the agent is an ally or adversary
            if self.type == agent.type:
                allies.append((distance, agent))
            else:
                adversaries.append((distance, agent))

        # Sort allies and adversaries by distance and take the closest 6
        allies = sorted(allies, key=lambda x: x[0])[:max_allies]
        adversaries = sorted(adversaries, key=lambda x: x[0])[:max_adversaries]

        # Prepare the observation vector
        observation = []

        # Add relative positions and headings of allies
        for _, ally in allies:
            relative_position = ally.position - self.position
            relative_heading = ally.direction - self.direction
            observation.extend([relative_position[0], relative_position[1], relative_heading[0], relative_heading[1]])

        # Add relative positions and headings of adversaries
        for _, adversary in adversaries:
            relative_position = adversary.position - self.position
            relative_heading = adversary.direction - self.direction
            observation.extend([relative_position[0], relative_position[1], relative_heading[0], relative_heading[1]])

        # Fill with zeros if fewer than 6 allies or adversaries
        while len(observation) < (max_allies + max_adversaries) * 4:
            observation.extend([0, 0, 0, 0])

        # Add the agent’s own state (position, direction, and speed)
        observation.extend([
            self.position[0], self.position[1],
            self.direction[0], self.direction[1],
            self.speed
        ])

        return np.array(observation, dtype=float)

    def find_nearest_prey(self):
        """Find the nearest prey within perception range."""
        nearest_prey = None
        min_distance = float('inf')
        for agent in env.agents:
            if agent.type == 'prey':
                distance = np.linalg.norm(agent.position - self.position)
                if distance < min_distance and distance <= 15.0:  # Perception radius
                    nearest_prey = agent
                    min_distance = distance
        return nearest_prey

    def move(self):
        """Move the predator based on chasing the nearest prey."""
        # Find all prey within contact distance
        contact_prey = []
        for agent in env.agents:
            if agent.type == 'prey':
                distance = np.linalg.norm(agent.position - self.position)
                if distance < 2.0:
                    contact_prey.append((distance, agent))

        if contact_prey:
            # If multiple prey are in contact, choose the closest one
            contact_prey = sorted(contact_prey, key=lambda x: x[0])
            target_prey = contact_prey[0][1]
            self.apply_chasing(target_prey)
            caught_prey = True
        else:
            # No prey in contact, seek the nearest prey
            target_prey = self.find_nearest_prey()
            if target_prey is not None:
                self.apply_chasing(target_prey)
                caught_prey = False
            else:
                # No prey available, apply random turning
                random_turn = np.random.uniform(-self.max_angular_velocity, self.max_angular_velocity)
                self.heading_angle += random_turn
                self.direction = np.array([np.cos(self.heading_angle), np.sin(self.heading_angle)])
                norm = np.linalg.norm(self.direction)
                if norm > 0:
                    self.direction /= norm
                else:
                    self.direction = np.array([1.0, 0.0])
                caught_prey = False

        # Update position
        self.position += self.direction * self.speed
        self.position = self.position % np.array([env.width, env.height])  # Apply toroidal wrapping
        self.position_history.append(self.position.copy())
        if len(self.position_history) > 5:
            self.position_history.pop(0)

        # Calculate reward for predator
        if contact_prey:
            reward = 1.0
        else:
            reward = 0.0

        # Add movement penalties
        # Since movement is deterministic towards/away, we can approximate acceleration
        # Here, we'll assume a small acceleration based on direction change
        # For simplicity, set aF and aR to fixed small values
        aF = 0.0  # Not used in this deterministic movement
        aR = 0.0  # Not used as direction is set directly
        reward += -0.01 * abs(aF) - 0.1 * abs(aR)

        # Get the observation vector
        observation = self.get_observation()

        # Store experience with rewards
        state = observation
        action = self.direction * self.speed  # Placeholder action representation
        next_state = self.get_observation()

        env.replay_buffer_predator.add((state, action, reward, next_state))


In [9]:
# Create environment instance

#change for bigger/smaller map
width=70

env = PredatorPreyEnvironment(width, width)

# Add prey agents
num_prey = 15
for _ in range(num_prey):
    initial_position = np.random.uniform(0, width, 2)
    prey = Prey(position=initial_position, speed=2, mass=1.0)
    env.add_agent(prey)

# Add predator agents
num_predators = 3
for _ in range(num_predators):
    initial_position = np.random.uniform(0, width, 2)
    predator = Predator(position=initial_position, speed=2.5, mass=1.0)
    env.add_agent(predator)

# Animate the environment
animation_html = env.animate(steps=200)
animation_html