In [17]:
import gymnasium as gym
import numpy as np

# Create the Cart-Pole environment
env = gym.make('CartPole-v1')

def play_episode(w0, w1, w2, w3):
    """
    Play one episode of Cart-Pole with given weights
    Returns the total number of steps the pole was balanced
    """
    observation, info = env.reset()
    total_steps = 0

    while True:
        # Extract the four features
        cart_position = observation[0]
        cart_velocity = observation[1]
        pole_angle = observation[2]
        pole_angular_velocity = observation[3]

        # Compute weighted sum
        weighted_sum = (w0 * cart_position +
                       w1 * cart_velocity +
                       w2 * pole_angle +
                       w3 * pole_angular_velocity)

        # Decide action based on sign
        if weighted_sum < 0:
            action = 0  # Move left
        else:
            action = 1  # Move right

        # Take action
        observation, reward, terminated, truncated, info = env.step(action)
        total_steps += 1

        if terminated or truncated:
            break

    return total_steps

# Monte Carlo algorithm
best_weights = None
best_steps = 0
max_score_weights = []  # Track all weights that achieve maximum score (500)

print("Running Monte Carlo algorithm for 10,000 iterations...")
print("Note: Maximum possible steps in Cart-Pole is 500\n")

for iteration in range(10000):
    # Initialize random weights
    w0 = np.random.uniform(-1, 1)
    w1 = np.random.uniform(-1, 1)
    w2 = np.random.uniform(-1, 1)
    w3 = np.random.uniform(-1, 1)

    # Play game
    steps = play_episode(w0, w1, w2, w3)

    # Record if this is the best
    if steps > best_steps:
        best_steps = steps
        best_weights = [w0, w1, w2, w3]
        print(f"New best at iteration {iteration + 1}: {steps} steps")

    # Track if we achieved maximum score
    if steps == 500:
        max_score_weights.append([w0, w1, w2, w3])

    # Progress update
    if (iteration + 1) % 1000 == 0:
        print(f"Iteration {iteration + 1}: Best = {best_steps}, "
              f"Weights reaching max score: {len(max_score_weights)}")

# Final results
print("\n" + "="*60)
print("MONTE CARLO ALGORITHM RESULTS")
print("="*60)
print(f"Best weights found:")
print(f"  w0 (Cart Position):        {best_weights[0]:8.4f}")
print(f"  w1 (Cart Velocity):        {best_weights[1]:8.4f}")
print(f"  w2 (Pole Angle):           {best_weights[2]:8.4f}")
print(f"  w3 (Pole Angular Velocity): {best_weights[3]:8.4f}")
print(f"\nTotal steps achieved: {best_steps}")
print(f"Number of weight combinations that reached maximum (500 steps): {len(max_score_weights)}")

# Show statistics if we found weights that reached max
if max_score_weights:
    print(f"\nOut of 10,000 attempts, {len(max_score_weights)} reached the maximum score")
    print(f"Success rate: {len(max_score_weights)/100:.2f}%")

env.close()

Running Monte Carlo algorithm for 10,000 iterations...
Note: Maximum possible steps in Cart-Pole is 500

New best at iteration 1: 88 steps
New best at iteration 13: 102 steps
New best at iteration 14: 148 steps
New best at iteration 35: 280 steps
New best at iteration 39: 500 steps
Iteration 1000: Best = 500, Weights reaching max score: 23
Iteration 2000: Best = 500, Weights reaching max score: 52
Iteration 3000: Best = 500, Weights reaching max score: 86
Iteration 4000: Best = 500, Weights reaching max score: 117
Iteration 5000: Best = 500, Weights reaching max score: 145
Iteration 6000: Best = 500, Weights reaching max score: 178
Iteration 7000: Best = 500, Weights reaching max score: 212
Iteration 8000: Best = 500, Weights reaching max score: 242
Iteration 9000: Best = 500, Weights reaching max score: 279
Iteration 10000: Best = 500, Weights reaching max score: 316

MONTE CARLO ALGORITHM RESULTS
Best weights found:
  w0 (Cart Position):         -0.0410
  w1 (Cart Velocity):         