In [1]:
## Imports

import gym 
import numpy as np

from function_approximators.function_approximators import NeuralNetwork, LinearModel, DecisionTree, RandomForest, SupportVectorRegressor, KNeighboursRegressor, GaussianProcess, OnlineGaussianProcess
from sklearn.gaussian_process.kernels import RBF
from sklearn.metrics.pairwise import rbf_kernel

from utils.train_utils import train, solve, train_time

from agents.agents import DQNAgent, LinearAgent, FQIAgent, OnlineGaussianProccessAgent
import operator


In [2]:
## Environment

function_approximators = [NeuralNetwork, LinearModel, DecisionTree, RandomForest, SupportVectorRegressor, KNeighboursRegressor, GaussianProcess, OnlineGaussianProcess]

agents = [DQNAgent, LinearAgent, *[FQIAgent]*5, OnlineGaussianProccessAgent]

RENDER = False
env = gym.make("LunarLander-v2")
environment = "lunarlander"

In [5]:
# DQN Config
CONFIG_DQN = {
    "episode_length": 500,
    "max_timesteps": 200000,
    "max_time": 30 * 60,
    "eval_freq": 10000, 
    "eval_episodes": 10,
    "learning_rate": 0.0015,
    "hidden_size": (256,128),
    "target_update_freq": 100,
    "batch_size": 64,
    "gamma": 0.99,
    "buffer_capacity": int(1e6),
    "plot_loss": False,
    "epsilon": 1,
    "max_deduct": 0.95,
    "decay": 0.1,
    "lr_step_size": 1000,
    "lr_gamma": 0.99,
    "max_steps": 500,
    "non_param": False,
}

# Linear Config
CONFIG_LINEAR = {
    "episode_length": 500,
    "max_timesteps": 200000,
    "max_time": 30 * 60,
    "eval_freq": 10000, 
    "eval_episodes": 10,
    "learning_rate": 0.02,
    "target_update_freq": 50,
    "batch_size": 64,
    "gamma": 0.99,
    "buffer_capacity": int(1e5),
    "plot_loss": False,
    "epsilon": 1,
    "max_steps": 500,
    "poly_degree": 4,
    "max_deduct": 0.95,
    "decay": 0.1,
    "lr_step_size": 1000,
    "lr_gamma": 0.99,
    "non_param": False,
}

# Decision Tree Config
CONFIG_DT = {
    "episode_length": 500,
    "max_timesteps": 200000,
    "max_time": 30 * 60,
    "eval_freq": 10000, 
    "eval_episodes": 10,
    "model_save_freq": 0,
    "model_save_capacity": 0,
    "update_freq": 1,
    "batch_size": 128,
    "gamma": 0.99,
    "buffer_capacity": int(1e6),
    "epsilon": 1,
    "max_deduct": 0.9,
    "decay": 0.4,
    "max_steps": 500,
    "non_param": True,
    "model_params": {"criterion":"mse","max_depth": 20, "min_samples_split": 20, "min_samples_leaf": 5},
    "feature_names": ["Cart Position", "Cart Velocity", "Pole Angle", "Pole Angular Velocity", "Action: Push Left", "Action: Push Right"],
    "plot_name": "dt_depth=8",
}

# Random Forest Config
CONFIG_RF = {
    "episode_length": 500,
    "max_timesteps": 200000,
    "max_time": 30 * 1000,
    "eval_freq": 10000, 
    "eval_episodes": 10,
    "model_save_freq": 0,
    "model_save_capacity": 0,
    "update_freq": 10,
    "batch_size": 128,
    "gamma": 0.99,
    "buffer_capacity": int(1e6),
    "epsilon": 1,
    "max_deduct": 0.9,
    "decay": 0.4,
    "max_steps": 500,
    "non_param": True,
    "model_params": {"n_estimators": 10,"max_depth": 20, "min_samples_split": 20, "min_samples_leaf": 5},
}

# Support Vector Regressor Config
CONFIG_SVR = {
    "episode_length": 500,
    "max_timesteps": 200000,
    "max_time": 30 * 1000,
    "eval_freq": 10000, 
    "eval_episodes": 10,
    "model_save_freq": 0,
    "model_save_capacity": 0,
    "update_freq": 200,
    "batch_size": 128,
    "gamma": 0.99,
    "buffer_capacity": int(1e6),
    "epsilon": 1,
    "max_deduct": 0.95,
    "decay": 0.3,
    "max_steps": 500,
    "non_param": True,
    "model_params": {"kernel":"rbf", "degree": 2, "C": 1.2},
}


# K-Neighbors Regressor Config
CONFIG_KNR = {
    "episode_length": 500,
    "max_timesteps": 200000,
    "max_time": 30 * 1000,
    "eval_freq": 10000, 
    "eval_episodes": 10,
    "model_save_freq": 0,
    "model_save_capacity": 0,
    "update_freq": 100,
    "batch_size": 128,
    "gamma": 0.99,
    "buffer_capacity": int(1e6),
    "epsilon": 1,
    "max_deduct": 0.93,
    "decay": 0.4,
    "max_steps": 500,
    "non_param": True,
    "model_params": {"n_neighbors":10, "weights": "distance", "algorithm": "auto", "leaf_size": 30},
}

# Gaussian Process Config
CONFIG_GP = {
    "episode_length": 500,
    "max_timesteps": 200000,
    "max_time": 30 * 1000,
    "eval_freq": 10000, 
    "eval_episodes": 10,
    "model_save_freq": 0,
    "model_save_capacity": 0,
    "update_freq": 100,
    "batch_size": 128,
    "gamma": 0.99,
    "buffer_capacity": int(1e6),
    "epsilon": 1,
    "max_deduct": 0.9,
    "decay": 0.4,
    "max_steps": 500,
    "non_param": True,
    "model_params": {"alpha": 1e-10, "normalize_y": False, "kernel":  RBF(length_scale=0.3, length_scale_bounds="fixed")},
}

# Online Gaussian Process Config
CONFIG_GP_Online = {
    "episode_length": 500,
    "max_timesteps": 200000,
    "max_time": 30 * 1000,
    "eval_freq": 10000, 
    "eval_episodes": 10,
    "gamma": 0.99,
    "buffer_capacity": int(1e6),
    "batch_size": 32,
    "epsilon": 1,
    "max_deduct": 0.93,
    "decay": 0.4,
    "max_steps": 500,
    "non_param": True,
    "model_params": {"sigma_0": 0.3, "init": 0, "kernel":  rbf_kernel, "epsilon_tol": 0.075, "basis_limit": 1000},
}

CONFIGS = [CONFIG_DQN, CONFIG_LINEAR, CONFIG_DT, CONFIG_RF, CONFIG_SVR, CONFIG_KNR, CONFIG_GP, CONFIG_GP_Online]
onlines = [False, False, False, False, False, False, False, True]
models = ["Neural Network", "Linear Model", "Decision Tree", "Random Forest", "Support Vectors", "K-Neighbours", "Gaussian Process", "Gaussian Process Online"]

In [4]:
## Performance Evaluation

returns = []
train_returns = []
train_times = []
n_seeds=30

j=4
for i in range(n_seeds):
    print(f"\n Run: {i+1} \n")
    r, _, t, times = train(env, 
            CONFIGS[j], 
            fa=function_approximators[j], 
            agent = agents[j], 
            render=RENDER,
            online=onlines[j],
            threshold = 0.25)
    env.close()
    returns.append(r)
    train_returns.append(t)
    train_times.append(times)
    


  0%|          | 488/200000 [00:00<00:53, 3733.26it/s]


 Run: 1 



  5%|▌         | 10069/200000 [00:33<23:26, 135.00it/s]

Evaluation at timestep 10069 returned a mean returns of 28.83572672831072
Epsilon = 0.8421891666666667
Replay Buffer count: 2007


 10%|█         | 20111/200000 [02:49<57:39, 52.00it/s]

Evaluation at timestep 20111 returned a mean returns of 34.19422262921398
Epsilon = 0.6842041666666667
Replay Buffer count: 3325


 15%|█▌        | 30091/200000 [07:44<1:51:54, 25.31it/s]

Evaluation at timestep 30091 returned a mean returns of -60.453579735244475
Epsilon = 0.5257283333333334
Replay Buffer count: 4770


 20%|██        | 40239/200000 [16:23<2:33:52, 17.30it/s]

Evaluation at timestep 40239 returned a mean returns of 23.193655350341015
Epsilon = 0.36812333333333336
Replay Buffer count: 5819


 25%|██▌       | 50112/200000 [27:33<3:02:32, 13.69it/s]

Evaluation at timestep 50112 returned a mean returns of -0.6440524561405092
Epsilon = 0.21447666666666676
Replay Buffer count: 6461


 26%|██▋       | 52612/200000 [31:18<1:27:43, 28.00it/s]


KeyboardInterrupt: 

In [5]:
with open(f'{environment}_eval_{models[j]}.csv', 'ab') as eval:
    for i in range(n_seeds):
        np.savetxt(eval, [returns[i]], delimiter=',')

IndexError: list index out of range

In [6]:
with open(f'{environment}_train_{models[j]}.csv', 'ab') as train:
    for i in range(n_seeds):
        np.savetxt(train, [train_returns[i]], delimiter=',')
        np.savetxt(train, [train_times[i]], delimiter=',')

IndexError: list index out of range

In [10]:
## Sample Efficiency Evaluation

n_eps = []
n_steps = []
not_solved = []
n_seeds=10

j=4
for i in range(n_seeds):
    print(f"\n Run: {i+1} \n")
    s, e, n = solve(env, 
            CONFIGS[j], 
            fa=function_approximators[j], 
            agent = agents[j],
            target_return=100,
            op=operator.ge, 
            render=RENDER,
            online=onlines[j],
            threshold=0.25)
    env.close()
    n_eps.append(e)
    n_steps.append(s)
    not_solved.append(n)


 Run: 1 

Ep. timesteps: 500
Total timesteps: 10455
Total episodes: 78
Evaluation mean return: 114.72874752385272

 Run: 2 

Ep. timesteps: 500
Total timesteps: 6306
Total episodes: 54
Evaluation mean return: 143.54222720984495

 Run: 3 

Ep. timesteps: 494
Total timesteps: 11736
Total episodes: 72
Evaluation mean return: 159.6750638685724

 Run: 4 

Ep. timesteps: 500
Total timesteps: 16944
Total episodes: 93
Evaluation mean return: 175.46025593824072

 Run: 5 

Ep. timesteps: 387
Total timesteps: 9434
Total episodes: 74
Evaluation mean return: 279.08728930953407

 Run: 6 

Ep. timesteps: 244
Total timesteps: 14255
Total episodes: 87
Evaluation mean return: 302.6887018206717

 Run: 7 

Ep. timesteps: 500
Total timesteps: 14544
Total episodes: 81
Evaluation mean return: 130.73198234143658

 Run: 8 

Ep. timesteps: 384
Total timesteps: 15448
Total episodes: 89
Evaluation mean return: 200.9778050945896

 Run: 9 

Ep. timesteps: 452
Total timesteps: 9004
Total episodes: 68
Evaluation mea

In [11]:
with open(f'{environment}_se_{models[j]}.csv', 'ab') as se:
    np.savetxt(se, [n_eps], delimiter=',')
    np.savetxt(se, [n_steps], delimiter=',')
    np.savetxt(se, [not_solved], delimiter=',')

In [22]:
mean_eps = np.mean(n_eps)
std_eps = np.std(n_eps)
print(f"Average n_eps: {mean_eps}")
print(f"Std n_eps: {std_eps}")
print(f"St.error n_eps: {std_eps/np.sqrt(n_seeds)}")

mean_steps = np.mean(n_steps)
std_steps = np.std(n_steps)
print(f"Average n_steps: {mean_steps}0")
print(f"Std n_steps: {std_steps}")
print(f"St.error n_steps: {std_steps/np.sqrt(n_seeds)}")

print(f"Not solved: {np.sum(not_solved)} runs")

Average n_eps: 21.633333333333333
Std n_eps: 27.45721924902245
St.error n_eps: 5.0129794496848845
Average n_steps: 900.60
Std n_steps: 1383.9435826651315
St.error n_steps: 252.67237284673604
Not solved: 3 runs


In [16]:
## Training time

times = []
for j in range(2,8):
        time = train_time(env, 
                CONFIGS[j], 
                fa=function_approximators[j], 
                agent = agents[j],
                online=onlines[j],
                threshold=0.45)
        env.close()
        times.append(time)

print(time)

 17%|█▋        | 34080/200000 [05:33<53:35, 51.59it/s]

In [13]:
with open(f'{environment}_times.csv', 'ab') as t:
    np.savetxt(t, [times], delimiter=',')