In [14]:
from collections import deque
import numpy as np

class RollingWindowPolicy:
    def __init__(self, h):
        self.h = h  # Horizon of action steps
        self.policies = deque()  # Initialize a deque to store the policies

    def add_policy(self, policy):
        """
        Add a new policy to the deque. If the deque already contains `h` policies,
        remove the oldest one to maintain the window size.
        """
        if len(self.policies) == self.h:
            self.policies.popleft()  # Remove the oldest policy if we hit the limit
        self.policies.append(policy)

    def get_averaged_action(self, iteration):
        """
        Calculate the averaged action for the given iteration.
        The iteration starts at 0 and signifies the shifting window position.
        """
        if not self.policies:
            raise ValueError("No policies have been added yet.")
        
        # Initialize variables to calculate the sum and count for averaging
        action_sum = 0
        count = 0

        # Loop over the policies and accumulate actions for the iteration
        for i in range(min(iteration + 1, len(self.policies))):
            action_sum += self.policies[i][iteration - i]  # Adjust index for overlap
            count += 1

        # Calculate the average action for this iteration
        return action_sum / count if count > 0 else None

# Example usage
h = 6  # Define the horizon
rp = RollingWindowPolicy(h)

# Adding policies
policies = [
    [1, 2, 3, 4, 5],
    [2, 3, 4, 5, 6],
    [3, 4, 5, 6, 7],
    [4, 5, 6, 7, 8],
    [5, 6, 7, 8, 9],
    [6, 7, 8, 9, 10],
]

for policy in policies:
    rp.add_policy(policy)

# Calculating averaged actions for the first few iterations
iteration = 0
averaged_action = rp.get_averaged_action(iteration)
print(f"Iteration {iteration + 1}: Averaged action = {averaged_action}")


Iteration 1: Averaged action = 1.0


In [4]:
import numpy as np

class AveragedRollingPolicy:
    def __init__(self, h):
        self.h = h
        self.policies = []

    def add_policy(self, new_policy):
        """
        Add a new policy and remove the oldest if we exceed the window size.
        """
        if len(self.policies) >= self.h:
            self.policies.pop(0)
        self.policies.append(new_policy)

    def calculate_next_step_policy(self):
        """
        Calculate the policy for the next step as an average of overlapping actions
        from all policies in the window.
        """
        if not self.policies:
            raise ValueError("No policies available to calculate the next step.")
        
        # Initialize an array to accumulate actions for the next step
        next_step_actions = np.zeros(self.h)
        count_actions = np.zeros(self.h)
        
        # Loop through the policies and their steps
        for i, policy in enumerate(self.policies):
            for j, action in enumerate(policy):
                # For each policy, the relevant action for the next step shifts
                next_step_actions[j] += action
                count_actions[j] += 1
        
        # Calculate the average actions for each step in the next policy
        averaged_policy = next_step_actions / count_actions
        return averaged_policy

# Example usage
h = 3  # Define the horizon
arp = AveragedRollingPolicy(h)

# Simulate adding policies over time
policies = [
    [1, 2, 3],
    [2, 3, 4],
    [3, 4, 5]
]

for policy in policies:
    arp.add_policy(policy)

# Calculate the averaged policy for the next step
next_step_policy = arp.calculate_next_step_policy()
print(f"Averaged policy for the next step: {next_step_policy}")

Averaged policy for the next step: [2. 3. 4.]
