In [None]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
from collections import OrderedDict


class XoptEnv(gym.Env):
    """
    A custom environment for xopt-style optimization using RL.
    The agent attempts to maximize a target function of N variables.
    The observation space is the current value of the variables (x1, x2, ...).
    The action space is the step/delta to apply to each variable (dx1, dx2, ...).
    """

    metadata = {"render_modes": ["human"], "render_fps": 30}

    def __init__(self, target_function, variables, action_scale=0.1):
        """
        Initializes the environment.

        :param target_function: The callable function to maximize, which accepts
                                a NumPy array of variable values.
        :param variables: A dictionary mapping variable names to their [min, max] bounds.
                          Example: {"x1": [-5.0, 5.0], "x2": [0.0, 10.0]}
        :param action_scale: The factor by which the normalized action [-1, 1] is scaled.
        """
        super(XoptEnv, self).__init__()

        self.variables = OrderedDict(variables)
        self.var_names = list(self.variables.keys())
        self.n_variables = len(self.variables)

        self.target_function = target_function
        self.action_scale = action_scale

        var_bounds = np.array(list(self.variables.values()), dtype=np.float32)

        low_obs = var_bounds[:, 0]
        high_obs = var_bounds[:, 1]

        self.observation_space = spaces.Box(
            low=low_obs, high=high_obs, shape=(self.n_variables,), dtype=np.float32
        )

        # --- Define Action Space (Deltas for each Variable) ---
        # The action is a vector of N dimensions, each in [-1.0, 1.0]
        self.action_space = spaces.Box(
            low=np.array([-1.0] * self.n_variables, dtype=np.float32),
            high=np.array([1.0] * self.n_variables, dtype=np.float32),
            shape=(self.n_variables,),
            dtype=np.float32,
        )

        # current_x will now be a NumPy array of shape (N,)
        self.current_x = None

    def _get_obs(self):
        """Helper to get the observation (current variable values)"""
        return self.current_x.astype(np.float32)

    def _calculate_reward(self, x_array):
        """
        The reward is the objective value we want to maximize.
        x_array is a NumPy array of all variable values.
        """
        # The target function must be able to handle an N-dimensional array input
        return self.target_function(x_array)

    def reset(self, seed=None, options=None):
        """
        Initializes the environment by sampling a starting point for all variables.
        The starting point is sampled uniformly between the bounds for each variable.
        """
        super().reset(seed=seed)

        # Get the variable bounds for uniform sampling
        var_bounds = np.array(list(self.variables.values()), dtype=np.float32)
        low_bounds = var_bounds[:, 0]
        high_bounds = var_bounds[:, 1]

        # Sample a random starting point for ALL variables
        self.current_x = self.np_random.uniform(
            low=low_bounds, high=high_bounds, size=self.n_variables
        )

        observation = self._get_obs()
        info = {}
        return observation, info

    def step(self, action):
        """
        Applies the action (delta_x vector) to the current state, evaluates the function,
        and returns the reward.

        :param action: A NumPy array of shape (N,) from the action space,
                       where each element is in [-1.0, 1.0].
        """
        # Scale the action vector: delta_x = action * action_scale
        delta_x = action * self.action_scale

        # Calculate the new, unconstrained variable values
        new_x = self.current_x + delta_x

        # Get the min/max bounds for all variables
        var_bounds = np.array(list(self.variables.values()), dtype=np.float32)
        x_min = var_bounds[:, 0]
        x_max = var_bounds[:, 1]

        # Apply bounds clipping to ALL variables
        self.current_x = np.clip(new_x, x_min, x_max)

        # Calculate the reward using the N-dimensional state
        reward = self._calculate_reward(self.current_x)

        terminated = False  # RL usually runs until a termination condition is met (e.g., max steps)
        truncated = False

        observation = self._get_obs()
        info = {"objective_value": reward}

        return observation, reward, terminated, truncated, info

In [None]:
from stable_baselines3 import PPO

N_ENVS = 4


def quadratic_n_dim(x):
    """Target function: f(x) = -sum((xi - 2)^2) + 4 * N_dim"""
    # The optimal value is 4 * N_dim when all xi = 2
    n_dim = len(x)
    return -np.sum((x - 2.0) ** 2) + 4.0 * n_dim


# Define variables for a 3D problem
three_d_variables = {"x1": [-5.0, 5.0], "x2": [-10.0, 10.0], "x3": [0.0, 5.0]}

# Create the environment
env = XoptEnv(
    target_function=quadratic_n_dim, variables=three_d_variables, action_scale=0.5
)

model = PPO(
    "MlpPolicy",
    env,
    verbose=0,
    gamma=0.99,
    n_steps=256,
    ent_coef=0.01,
    device="auto",
)

TIMESTEPS = 25000
print(f"Starting PPO training for {TIMESTEPS} timesteps...")
model.learn(total_timesteps=TIMESTEPS)
print("Training complete.")

In [None]:
from xopt.generators.rl import RLModelGenerator
from xopt import Xopt, Evaluator
from xopt.vocs import VOCS
import gymnasium as gym


def quadratic_n_dim(x):
    """
    Target function: f(x) = -sum((x_i - 2)^2) + 4 * N_dim
    Accepts a single NumPy array 'x' of shape (N,)
    """
    n_dim = len(x)
    return -np.sum((x - 2.0) ** 2) + 4.0 * n_dim


def objective_function(input_data: dict) -> dict:
    """
    Wraps the target_function for Xopt's Evaluator.
    It expects a dictionary where keys are variable names (e.g., 'x1', 'x2').
    """
    x_array = np.array([input_data["x1"], input_data["x2"], input_data["x3"]])

    return {"f": quadratic_n_dim(x_array)}


multi_variable_vocs_data = {
    "variables": {"x1": [-5.0, 5.0], "x2": [-10.0, 10.0], "x3": [0.0, 5.0]},
    "objectives": {"f": "MAXIMIZE"},
}
vocs = VOCS(**multi_variable_vocs_data)

generator = RLModelGenerator(vocs=vocs)
generator.set_model(model)

evaluator = Evaluator(function=objective_function)

X = Xopt(generator=generator, evaluator=evaluator, vocs=vocs)
X.max_evaluations = 10


print("\n--- Xopt Configuration Summary ---")
print(X)

print("\nPerforming 2 random initial evaluations...")
X.random_evaluate(2)

print("\n--- Initial Optimization Results ---")
print(X.data[["x1", "x2", "x3", "f"]])

print(f"\nRunning Xopt for {X.max_evaluations - len(X.data)} more iterations...")
X.run()

print("\n--- Final Optimization Results ---")
print(X.data[["x1", "x2", "x3", "f"]])