In [9]:
import numpy as np

# Define the grid size
grid_size = 4

# Define actions
actions = ['up', 'down', 'left', 'right']

# Define rewards
rewards = np.zeros((grid_size, grid_size))
rewards[3, 3] = 1  # Reward for reaching the goal state

# Define transition probabilities
transition_probs = {
    'up': (lambda x, y: (max(x-1, 0), y)),
    'down': (lambda x, y: (min(x+1, grid_size-1), y)),
    'left': (lambda x, y: (x, max(y-1, 0))),
    'right': (lambda x, y: (x, min(y+1, grid_size-1)))
}

def value_iteration(grid_size, rewards, transition_probs, actions, gamma=0.9, theta=1e-6):
    value_table = np.zeros((grid_size, grid_size))
    policy = np.zeros((grid_size, grid_size), dtype=int)

    while True:
        delta = 0
        for x in range(grid_size):
            for y in range(grid_size):
                if (x, y) == (3, 3):  # Skip the terminal state
                    continue
                v = value_table[x, y]
                q_values = []
                for i, action in enumerate(actions):
                    (next_x, next_y) = transition_probs[action](x, y)
                    q_value = rewards[x, y] + gamma * value_table[next_x, next_y]
                    q_values.append(q_value)
                value_table[x, y] = max(q_values)
                policy[x, y] = np.argmax(q_values)
                delta = max(delta, abs(v - value_table[x, y]))
        if delta < theta:
            break

    return policy, value_table

def print_policy(policy, actions):
    policy_arrows = np.full(policy.shape, ' ')
    for x in range(policy.shape[0]):
        for y in range(policy.shape[1]):
            if (x, y) == (3, 3):
                policy_arrows[x, y] = 'G'  # Goal state
            else:
                policy_arrows[x, y] = actions[policy[x, y]][0].upper()
    for row in policy_arrows:
        print(' '.join(row))

policy, value_table = value_iteration(grid_size, rewards, transition_probs, actions)
print("Optimal Policy:")
print_policy(policy, actions)


Optimal Policy:
U U U U
U U U U
U U U U
U U U G
