In [20]:
# =========================================================
# Lab 12: Reinforcement Learning - CartPole & MountainCar
# =========================================================

# Install dependencies
!pip install -q gymnasium pygame

# Imports
import gymnasium as gym
import pygame
import numpy as np

# =========================================================
# PART 1: CartPole-v1 Environment
# =========================================================

def run_cartpole(num_episodes=50, display_score=True, delay=20, rule_based=False):
    env = gym.make("CartPole-v1", render_mode="human")
    font = None
    episode_scores = []

    for episode in range(1, num_episodes + 1):
        state, info = env.reset()
        done = False
        score = 0

        while not done:
            # Choose action
            if rule_based:
                # Rule-based: push in direction of pole angle
                action = 1 if state[2] > 0 else 0
            else:
                action = env.action_space.sample()  # random action

            state, reward, terminated, truncated, info = env.step(action)
            done = terminated or truncated
            score += reward

            # Display score using pygame
            if display_score:
                if font is None:
                    pygame.font.init()
                    font = pygame.font.SysFont("Arial", 24)

                surface = pygame.display.get_surface()
                text = font.render(f"Episode: {episode} | Score: {int(score)}", True, (0,255,0))
                surface.fill((0,0,0))  # clear previous text
                surface.blit(text, (200, 20))
                pygame.display.update()
                pygame.time.delay(delay)

        print(f"Episode {episode} Score: {score}")
        episode_scores.append(score)

    env.close()
    pygame.quit()
    print(f"\nCartPole: Maximum Score Achieved: {max(episode_scores)}")
    return episode_scores

# Run CartPole
cartpole_scores = run_cartpole()

# =========================================================
# PART 2: MountainCar-v0 Environment
# =========================================================

def run_mountaincar(num_episodes=20, display_score=True, delay=20, rule_based=True):
    env = gym.make("MountainCar-v0", render_mode="human")
    font = None
    episode_scores = []
    best_score = -float('inf')

    for episode in range(1, num_episodes + 1):
        state, info = env.reset()
        done = False
        score = 0
        step_count = 0

        while not done:
            step_count += 1
            # Rule-based policy: use velocity to push car
            if rule_based:
                action = 2 if state[1] > 0 else 0
            else:
                action = env.action_space.sample()  # random action

            state, reward, terminated, truncated, info = env.step(action)
            done = terminated or truncated
            score += reward

            if display_score:
                if font is None:
                    pygame.font.init()
                    font = pygame.font.SysFont("Arial", 24)

                surface = pygame.display.get_surface()
                text = font.render(f"Episode: {episode} | Score: {int(score)}", True, (0,0,255))
                surface.fill((0,0,0))
                surface.blit(text, (200, 20))
                pygame.display.update()
                pygame.time.delay(delay)

        print(f"Episode {episode} Score: {score} | Steps: {step_count}")
        episode_scores.append(score)
        if score > best_score:
            best_score = score

    env.close()
    pygame.quit()
    print(f"\nMountainCar: Maximum Score Achieved: {best_score}")
    return episode_scores

# Run MountainCar
mountaincar_scores = run_mountaincar(num_episodes=10)

# =========================================================
# Notes / Observations:
# 1. CartPole: Score increases as episodes proceed, random actions result in varied performance.
# 2. MountainCar: Rule-based policy builds momentum to reach hilltop efficiently.
# 3. Delays can slow down visualization for clearer observation.
# 4. Text color and position are customizable via pygame font rendering.
# 5. Step counts indicate episode length and performance.
# =========================================================


Episode 1 Score: 21.0
Episode 2 Score: 19.0
Episode 3 Score: 16.0
Episode 4 Score: 14.0
Episode 5 Score: 50.0
Episode 6 Score: 22.0
Episode 7 Score: 32.0
Episode 8 Score: 23.0
Episode 9 Score: 28.0
Episode 10 Score: 16.0
Episode 11 Score: 49.0
Episode 12 Score: 39.0
Episode 13 Score: 11.0
Episode 14 Score: 10.0
Episode 15 Score: 16.0
Episode 16 Score: 46.0
Episode 17 Score: 29.0
Episode 18 Score: 17.0
Episode 19 Score: 15.0
Episode 20 Score: 27.0
Episode 21 Score: 37.0
Episode 22 Score: 13.0
Episode 23 Score: 43.0
Episode 24 Score: 10.0
Episode 25 Score: 28.0
Episode 26 Score: 15.0
Episode 27 Score: 23.0
Episode 28 Score: 13.0
Episode 29 Score: 39.0
Episode 30 Score: 33.0
Episode 31 Score: 29.0
Episode 32 Score: 16.0
Episode 33 Score: 14.0
Episode 34 Score: 51.0
Episode 35 Score: 16.0
Episode 36 Score: 34.0
Episode 37 Score: 18.0
Episode 38 Score: 20.0
Episode 39 Score: 11.0
Episode 40 Score: 12.0
Episode 41 Score: 29.0
Episode 42 Score: 19.0
Episode 43 Score: 8.0
Episode 44 Score: 15.