# Reinforcement Learning Agent for SASL Sign Learning Simulation

In [None]:

In this tutorial, we simulate an agent learning SASL signs through reinforcement learning (RL).
We'll define basic RL components and use a Q-learning setup to simulate learning a sequence of signs.

### Objectives:
- Define states, actions, and rewards
- Implement a Q-learning loop
- Visualise learning progress


In [None]:
# Step 1: Import required libraries
import numpy as np
import matplotlib.pyplot as plt


In [None]:
# Step 2: Define environment parameters

# States: hand shapes (0-4)
# Actions: sign transitions (0-4)
# Rewards: +1 for correct transition, 0 otherwise

num_states = 5
num_actions = 5
q_table = np.zeros((num_states, num_actions))
learning_rate = 0.1
discount_factor = 0.95
exploration_rate = 1.0
exploration_decay = 0.99
episodes = 100

# Define the correct transition for simplicity (ideal mapping)
correct_action = [0, 1, 2, 3, 4]


In [None]:
# Step 3: Q-learning training loop

rewards = []

for episode in range(episodes):
    state = np.random.randint(0, num_states)
    total_reward = 0

    for step in range(10):  # Max steps per episode
        if np.random.rand() < exploration_rate:
            action = np.random.randint(0, num_actions)
        else:
            action = np.argmax(q_table[state])

        reward = 1 if action == correct_action[state] else 0
        next_state = (state + 1) % num_states

        old_value = q_table[state, action]
        next_max = np.max(q_table[next_state])

        # Q-learning update
        new_value = old_value + learning_rate * (reward + discount_factor * next_max - old_value)
        q_table[state, action] = new_value

        state = next_state
        total_reward += reward

    exploration_rate *= exploration_decay
    rewards.append(total_reward)


In [None]:
# Step 4: Plot the reward over time

plt.plot(rewards)
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.title('RL Agent Learning Progress')
plt.grid(True)
plt.show()

In [None]:
# Step 5: Inspect the final Q-table

print("Learned Q-Table:")
print(q_table)
