In [3]:
import torch
from utils.es_utils import gradient_estimate

In [8]:
def artificial_reward(theta, theta_star):
    return -(theta - theta_star).pow(2).sum().item()

def test_es(dim=4, generations=200, sigma=0.1, lr=0.01, num_perturbations=64):
    device = "cuda" if torch.cuda.is_available() else "cpu"

    theta = torch.zeros(dim, device=device)
    theta_star = torch.tensor([1.0, -2.0, 0.5, 3.0], device=device)

    for gen in range(generations):

        eps_list = []
        diffs = []

        for _ in range(num_perturbations):
            eps = torch.randn(dim, device=device) * sigma

            r_pos = artificial_reward(theta + eps, theta_star)
            r_neg = artificial_reward(theta - eps, theta_star)

            diffs.append(r_pos - r_neg)
            eps_list.append(eps)

        diffs = torch.tensor(diffs, device=device)
        eps_list = eps_list

        # normalize (optional)
        diffs = (diffs - diffs.mean()) / (diffs.std() + 1e-8)

        theta = gradient_estimate(
            theta,
            diffs,
            dim,
            eps_list,
            sigma,
            lr,
            num_perturbations
        )

        if gen % 10 == 0:
            dist = (theta - theta_star).norm().item()
            print(f"Gen {gen:03d}: distance to optimum = {dist:.4f}")

    print("Final theta:", theta)
    print("Theta★:", theta_star)

In [9]:
test_es()

Gen 000: distance to optimum = 3.7242
Gen 010: distance to optimum = 3.2227
Gen 020: distance to optimum = 2.7302
Gen 030: distance to optimum = 2.2219
Gen 040: distance to optimum = 1.7523
Gen 050: distance to optimum = 1.2617
Gen 060: distance to optimum = 0.7802
Gen 070: distance to optimum = 0.2719
Gen 080: distance to optimum = 0.0262
Gen 090: distance to optimum = 0.0157
Gen 100: distance to optimum = 0.0138
Gen 110: distance to optimum = 0.0342
Gen 120: distance to optimum = 0.0207
Gen 130: distance to optimum = 0.0317
Gen 140: distance to optimum = 0.0377
Gen 150: distance to optimum = 0.0192
Gen 160: distance to optimum = 0.0263
Gen 170: distance to optimum = 0.0293
Gen 180: distance to optimum = 0.0195
Gen 190: distance to optimum = 0.0164
Final theta: tensor([ 1.0154, -1.9911,  0.4936,  2.9928])
Theta★: tensor([ 1.0000, -2.0000,  0.5000,  3.0000])
