<a href="https://colab.research.google.com/github/vanessaNjoroge2/AI_Practical-Assignment/blob/main/Learning_Agent_Simulation_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import random

# Define the environment
# 3x3 grid (0: empty, 1: goal)
grid_size = 3
goal_state = (2, 2)  # bottom-right corner is the goal

# Initialize Q-table (states x actions)
# Actions: 0=up, 1=down, 2=left, 3=right
Q = np.zeros((grid_size, grid_size, 4))

# Parameters
alpha = 0.5        # Learning rate
gamma = 0.9        # Discount factor
epsilon = 0.2      # Exploration probability
episodes = 50      # Number of trials

# Function to choose action (epsilon-greedy)
def choose_action(state):
    if random.uniform(0,1) < epsilon:
        return random.randint(0,3)  # Explore: random action
    else:
        x, y = state
        return np.argmax(Q[x, y])   # Exploit: best action

# Function to take action and return next state and reward
def step(state, action):
    x, y = state
    if action == 0 and x > 0:          # up
        x -= 1
    elif action == 1 and x < grid_size-1:  # down
        x += 1
    elif action == 2 and y > 0:        # left
        y -= 1
    elif action == 3 and y < grid_size-1:  # right
        y += 1

    next_state = (x, y)
    reward = 1 if next_state == goal_state else -0.1  # small penalty for each move
    done = next_state == goal_state
    return next_state, reward, done

# Training loop
for episode in range(episodes):
    state = (0, 0)  # Start at top-left corner
    total_reward = 0
    steps = 0
    done = False

    while not done:
        action = choose_action(state)
        next_state, reward, done = step(state, action)

        # Q-learning update
        x, y = state
        nx, ny = next_state
        Q[x, y, action] = Q[x, y, action] + alpha * (reward + gamma * np.max(Q[nx, ny]) - Q[x, y, action])

        state = next_state
        total_reward += reward
        steps += 1

    print(f"Episode {episode+1}: Total Reward = {total_reward:.2f}, Steps taken = {steps}")

print("\nTrained Q-Table:")
print(Q)


Episode 1: Total Reward = -2.30, Steps taken = 34
Episode 2: Total Reward = 0.60, Steps taken = 5
Episode 3: Total Reward = 0.70, Steps taken = 4
Episode 4: Total Reward = 0.30, Steps taken = 8
Episode 5: Total Reward = 0.60, Steps taken = 5
Episode 6: Total Reward = 0.60, Steps taken = 5
Episode 7: Total Reward = 0.70, Steps taken = 4
Episode 8: Total Reward = 0.70, Steps taken = 4
Episode 9: Total Reward = 0.50, Steps taken = 6
Episode 10: Total Reward = 0.70, Steps taken = 4
Episode 11: Total Reward = 0.50, Steps taken = 6
Episode 12: Total Reward = 0.70, Steps taken = 4
Episode 13: Total Reward = 0.70, Steps taken = 4
Episode 14: Total Reward = 0.70, Steps taken = 4
Episode 15: Total Reward = 0.70, Steps taken = 4
Episode 16: Total Reward = 0.70, Steps taken = 4
Episode 17: Total Reward = 0.70, Steps taken = 4
Episode 18: Total Reward = 0.70, Steps taken = 4
Episode 19: Total Reward = 0.70, Steps taken = 4
Episode 20: Total Reward = 0.70, Steps taken = 4
Episode 21: Total Reward = 