In [1]:
import numpy as np

class GridWorld:
    def __init__(self, size=5):
        self.size = size  # Grid size (5x5)
        self.start_state = (0, 0)  # The agent starts at the top-left corner
        self.goal_state = (4, 4)  # Goal is at the bottom-right corner
        self.state = self.start_state

    def reset(self):
        """ Reset the environment to the initial state """
        self.state = self.start_state
        return self.state

    def step(self, action):
        """ Take an action and return new state, reward, and done flag """
        x, y = self.state
        if action == "up":
            x = max(0, x - 1)
        elif action == "down":
            x = min(self.size - 1, x + 1)
        elif action == "left":
            y = max(0, y - 1)
        elif action == "right":
            y = min(self.size - 1, y + 1)

        self.state = (x, y)

        # Define rewards
        if self.state == self.goal_state:
            return self.state, 10, True  # Goal reached, reward +10
        else:
            return self.state, -1, False  # Step cost -1

    def render(self):
        """ Print the gridworld state """
        grid = np.zeros((self.size, self.size), dtype=str)
        grid[:] = "."
        x, y = self.state
        gx, gy = self.goal_state
        grid[x, y] = "A"  # Agent
        grid[gx, gy] = "G"  # Goal
        print("\n".join([" ".join(row) for row in grid]))
        print("\n")

# Test environment
env = GridWorld()
state = env.reset()
done = False

print("Initial Environment:")
env.render()

actions = ["up", "down", "left", "right"]
while not done:
    action = np.random.choice(actions)  # Random action
    next_state, reward, done = env.step(action)
    print(f"Action: {action}, New State: {next_state}, Reward: {reward}")
    env.render()

Initial Environment:
A . . . .
. . . . .
. . . . .
. . . . .
. . . . G


Action: left, New State: (0, 0), Reward: -1
A . . . .
. . . . .
. . . . .
. . . . .
. . . . G


Action: down, New State: (1, 0), Reward: -1
. . . . .
A . . . .
. . . . .
. . . . .
. . . . G


Action: right, New State: (1, 1), Reward: -1
. . . . .
. A . . .
. . . . .
. . . . .
. . . . G


Action: right, New State: (1, 2), Reward: -1
. . . . .
. . A . .
. . . . .
. . . . .
. . . . G


Action: up, New State: (0, 2), Reward: -1
. . A . .
. . . . .
. . . . .
. . . . .
. . . . G


Action: down, New State: (1, 2), Reward: -1
. . . . .
. . A . .
. . . . .
. . . . .
. . . . G


Action: left, New State: (1, 1), Reward: -1
. . . . .
. A . . .
. . . . .
. . . . .
. . . . G


Action: left, New State: (1, 0), Reward: -1
. . . . .
A . . . .
. . . . .
. . . . .
. . . . G


Action: up, New State: (0, 0), Reward: -1
A . . . .
. . . . .
. . . . .
. . . . .
. . . . G


Action: left, New State: (0, 0), Reward: -1
A . . . .
. . . . .
.