In [1]:
import sys

sys.path.append("megai_man")

from hyperparam_optimization import tune
import optuna

In [None]:
def sample_params(trial: optuna.Trial, n_envs: int, n_steps: int):
    learning_rate = trial.suggest_float("learning_rate", 1e-6, 1e-2, log=True)
    clip_range = trial.suggest_float("clip_range", 0.1, 0.3, step=0.1)
    vf_coef = trial.suggest_float("vf_coef", 0.0, 1.0)
    ent_coef = trial.suggest_float("ent_coef", 1e-8, 1e-1, log=True)
    gae_lambda = trial.suggest_float("gae_lambda", 0.8, 1.0, log=True)
    n_epochs = trial.suggest_int("n_epochs", 1, 10)
    gamma = trial.suggest_categorical("gamma", [0.99, 0.995, 0.999])
    max_grad_norm = trial.suggest_float("max_grad_norm", 0.3, 1)
    features_dim_exp = trial.suggest_int("features_dim_power", 8, 10)
    features_dim = 2**features_dim_exp

    return {
        "n_steps": 1024,
        "batch_size": 8192,
        "learning_rate": learning_rate,
        "clip_range": clip_range,
        "vf_coef": vf_coef,
        "ent_coef": ent_coef,
        "gae_lambda": gae_lambda,
        "n_epochs": n_epochs,
        "gamma": gamma,
        "max_grad_norm": max_grad_norm,
        "policy_kwargs": {
            "features_extractor_kwargs": {
                "features_dim": features_dim,
            },
        },
    }


tune(
    sample_fn=sample_params,
    name="cutman_random_searcher_nsteps1024_1minibatch",
    n_trials=100,
    timesteps_per_trial=1_000_000,
)

In [None]:
def sample_params(trial: optuna.Trial, n_envs: int, n_steps: int):
    learning_rate = trial.suggest_float("learning_rate", 1e-6, 1e-2, log=True)
    clip_range = trial.suggest_float("clip_range", 0.1, 0.3, step=0.1)
    vf_coef = trial.suggest_float("vf_coef", 0.0, 1.0)
    ent_coef = trial.suggest_float("ent_coef", 1e-8, 1e-1, log=True)
    gae_lambda = trial.suggest_float("gae_lambda", 0.8, 1.0, log=True)
    n_epochs = trial.suggest_int("n_epochs", 1, 10)
    gamma = trial.suggest_categorical("gamma", [0.99, 0.995, 0.999])
    max_grad_norm = trial.suggest_float("max_grad_norm", 0.3, 1)
    features_dim_exp = trial.suggest_int("features_dim_power", 8, 10)
    features_dim = 2**features_dim_exp

    return {
        "n_steps": 128,
        "batch_size": 256,
        "learning_rate": learning_rate,
        "clip_range": clip_range,
        "vf_coef": vf_coef,
        "ent_coef": ent_coef,
        "gae_lambda": gae_lambda,
        "n_epochs": n_epochs,
        "gamma": gamma,
        "max_grad_norm": max_grad_norm,
        "policy_kwargs": {
            "features_extractor_kwargs": {
                "features_dim": features_dim,
            },
        },
    }


tune(
    sample_fn=sample_params,
    name="cutman_random_searcher_nsteps256_4minibatch",
    n_trials=100,
    timesteps_per_trial=1_000_000,
)