In [5]:
import numpy as np
import random

# Step 1: Define the environment
class AntennaArrayEnvironment:
    def __init__(self, angle_range=(-90, 90), snr_target=20):
        self.angle_range = angle_range  # Range of angles (degrees)
        self.snr_target = snr_target  # Target Signal-to-Noise Ratio (SNR)
        self.current_angle = 0  # Initial angle
        self.interference = random.uniform(5, 15)  # Random interference

    def reset(self):
        self.current_angle = random.uniform(*self.angle_range)
        self.interference = random.uniform(5, 15)
        return self.current_angle, self.interference

    def step(self, action):
        # Action: -1 (decrease angle), 0 (no change), 1 (increase angle)
        angle_adjustment = action - 1  # Map action {0,1,2} to {-1, 0, +1}
        self.current_angle = np.clip(self.current_angle + angle_adjustment, *self.angle_range)
        
        # Simulate SNR based on current angle
        snr = self.snr_target - abs(self.current_angle) + random.uniform(-1, 1) - self.interference
        
        # Reward: Higher SNR gets a better reward
        reward = snr - self.snr_target if snr >= self.snr_target else -abs(self.snr_target - snr)
        
        # Termination condition
        done = abs(snr - self.snr_target) < 0.5
        
        return (self.current_angle, self.interference), reward, done

# Step 2: Define the Q-Learning agent
class QLearningAgent:
    def __init__(self, state_space, action_space, learning_rate=0.1, discount_factor=0.99, exploration_rate=1.0, exploration_decay=0.99):
        self.state_space = state_space
        self.action_space = action_space
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.exploration_decay = exploration_decay
        self.q_table = np.zeros((state_space, action_space))

    def choose_action(self, state):
        if random.uniform(0, 1) < self.exploration_rate:
            return random.randint(0, self.action_space - 1)
        return np.argmax(self.q_table[state])

    def update_q_value(self, state, action, reward, next_state):
        best_next_action = np.argmax(self.q_table[next_state])
        td_target = reward + self.discount_factor * self.q_table[next_state, best_next_action]
        self.q_table[state, action] += self.learning_rate * (td_target - self.q_table[state, action])

    def decay_exploration(self):
        self.exploration_rate *= self.exploration_decay

# Step 3: Train the agent and track rewards for each angle
env = AntennaArrayEnvironment()
agent = QLearningAgent(state_space=180, action_space=3)

episodes = 500
angle_rewards = {angle: 0 for angle in range(-90, 91)}  # Dictionary to track rewards for each angle

for episode in range(episodes):
    state, _ = env.reset()
    state = int(np.clip(state + 90, 0, 179))  # Map angle to index and clip within bounds
    total_reward = 0

    for _ in range(100):  # Limit steps per episode
        action = agent.choose_action(state)
        next_state, reward, done = env.step(action)
        next_state = int(np.clip(next_state[0] + 90, 0, 179))  # Map angle to index and clip within bounds

        agent.update_q_value(state, action, reward, next_state)
        state = next_state
        total_reward += reward

        if done:
            break

    # Track total rewards per angle
    int_angle = int(np.round(env.current_angle))  # Convert to nearest integer
    angle_rewards[int_angle] += total_reward
    agent.decay_exploration()

# Step 4: Display rewards for each angle
print("Angle Rewards Summary:")
for angle, reward in sorted(angle_rewards.items()):
    print(f"Angle: {angle}°, Total Reward: {reward:.2f}")


Angle Rewards Summary:
Angle: -90°, Total Reward: -27904.42
Angle: -89°, Total Reward: 0.00
Angle: -88°, Total Reward: -28501.72
Angle: -87°, Total Reward: -27253.82
Angle: -86°, Total Reward: -10181.13
Angle: -85°, Total Reward: 0.00
Angle: -84°, Total Reward: -9573.30
Angle: -83°, Total Reward: -9154.73
Angle: -82°, Total Reward: -36334.94
Angle: -81°, Total Reward: 0.00
Angle: -80°, Total Reward: -27940.41
Angle: -79°, Total Reward: -17124.20
Angle: -78°, Total Reward: -26782.19
Angle: -77°, Total Reward: 0.00
Angle: -76°, Total Reward: -17658.85
Angle: -75°, Total Reward: -18285.66
Angle: -74°, Total Reward: -33670.26
Angle: -73°, Total Reward: -51354.32
Angle: -72°, Total Reward: -16338.52
Angle: -71°, Total Reward: 0.00
Angle: -70°, Total Reward: -7746.96
Angle: -69°, Total Reward: -46756.62
Angle: -68°, Total Reward: 0.00
Angle: -67°, Total Reward: -16464.09
Angle: -66°, Total Reward: -15776.41
Angle: -65°, Total Reward: -23098.92
Angle: -64°, Total Reward: -28902.18
Angle: -63°