# CartPole Evolution Demo

This notebook demonstrates how to evolve Linear Genetic Programming (LGP) individuals to balance the CartPole environment using the utilities in this project.


In [1]:
import numpy as np
import gymnasium as gym

from memory_system import MemoryConfig, MemoryBank
from instruction_set import InstructionSet
from operation import ALL_OPS, SCALAR_OPS
from individual import Individual
from population import Population, PopulationConfig
from operators import GeneticOperators
from evaluator import CartPoleEvaluator
from evolution_engine import EvolutionEngine, EvolutionConfig



In [2]:
rng = np.random.default_rng(0)

memory_cfg = MemoryConfig(
    n_scalar=8,
    n_vector=0,
    n_matrix=0,
    n_obs_scalar=4,
    n_obs_vector=0,
    n_obs_matrix=0,
    vector_size=1,
    matrix_shape=(1, 1),
)

# Use only scalar operations for CartPole (actions derived from scalar register)
instruction_set = InstructionSet([op() for op in SCALAR_OPS], memory_cfg)
operators = GeneticOperators(instruction_set, rng)


In [3]:
population_config = PopulationConfig(size=30, program_length=(6, 12), elitism=2, max_program_length=20)
population = Population(population_config, instruction_set, memory_cfg, operators=operators, rng=rng)
population.initialize_random(mutate_constants=True)

cartpole_eval = CartPoleEvaluator(episodes=5, max_steps=500, output_register=7, rng=rng)
engine = EvolutionEngine(
    population=population,
    operators=operators,
    evaluator=cartpole_eval,
    config=EvolutionConfig(max_generations=10, mutation_threshold=0.2, constant_mutation_rate=0.2, verbose=False),
    rng=rng,
)


In [4]:
final_population = engine.run()
final_population.print_summary()

best_individual = final_population.best_ever
print("Best fitness:", best_individual.fitness)



Generation 10 | Population size 30
Min: 500.000, Mean: 500.000, Max: 500.000, Std: 0.000
Length mean 10.9, std 2.0
Best ever fitness 500.000 at generation 5
Best fitness: 500.0


In [5]:
def run_episode(individual: Individual, render_mode: str = "rgb_array"):
    env = gym.make("CartPole-v1", render_mode=render_mode)
    observation, _ = env.reset()
    observation = np.asarray(observation, dtype=np.float32)
    memory = individual.memory.copy()
    frames = []
    total_reward = 0.0
    for _ in range(500):
        memory.load_observation({'scalar': observation.tolist()})
        individual.program.execute(memory)
        action_value = memory.read_scalar(7)
        action = 1 if action_value >= 0.0 else 0
        observation, reward, terminated, truncated, _ = env.step(action)
        total_reward += reward
        observation = np.asarray(observation, dtype=np.float32)
        if render_mode == "rgb_array":
            frame = env.render()
            frames.append(frame)
        if terminated or truncated:
            break
    env.close()
    return total_reward, frames



In [6]:
reward, frames = run_episode(best_individual, render_mode="human")
print(f"Episode reward: {reward}")

if frames:
    import matplotlib.pyplot as plt
    plt.imshow(frames[0])
    plt.title("First frame of best policy")
    plt.axis("off")



Episode reward: 500.0


> Tip: set `render_mode="human"` inside `run_episode` to watch the agent live (requires a display).
