In [1]:
import gym 
import numpy as np
from custom_envs.gridworlds import WindyGridworldEnv, SimpleGridworldEnv

from function_approximators.function_approximators import NeuralNetwork, LinearModel, DecisionTree, RandomForest, ExtraTrees, GradientBoostingTrees, SupportVectorRegressor, KNeighboursRegressor, GaussianProcess, eGaussianProcess, OnlineGaussianProcess
from sklearn.gaussian_process.kernels import RBF, ConstantKernel, RationalQuadratic, Matern
from sklearn.metrics.pairwise import rbf_kernel, chi2_kernel, laplacian_kernel 

from utils.train_utils import train, solve
from utils.plot_utils import plot_returns

from agents.av_agents import DQNAgent, LinearAgent, NonParametricAgent, eGaussianProccessAgent, OnlineGaussianProccessAgent

import operator


In [2]:
function_approximators = [NeuralNetwork, LinearModel, DecisionTree, RandomForest, ExtraTrees, GradientBoostingTrees, SupportVectorRegressor, KNeighboursRegressor, GaussianProcess, eGaussianProcess, OnlineGaussianProcess]
agents = [DQNAgent, LinearAgent, *[NonParametricAgent]*7, eGaussianProccessAgent, OnlineGaussianProccessAgent]

RENDER = False
# env = gym.make("CartPole-v1")
# env = gym.make("Acrobot-v1")
env = gym.make("MountainCar-v0")
# env = WindyGridworldEnv()
# env = gym.make("LunarLander-v2")
# env = SimpleGridworldEnv()

In [5]:
# DQN Config
CONFIG_DQN = {
    "episode_length": 200,
    "max_timesteps": 20000,
    "max_time": 30 * 60,
    "eval_freq": 1, 
    "eval_episodes": 1,
    "learning_rate": 0.00075,
    "hidden_size": (32,32),
    "target_update_freq": 200,
    "batch_size": 32,
    "gamma": 0.99,
    "buffer_capacity": int(1e7),
    "plot_loss": False,
    "epsilon": 1,
    "max_deduct": 0.97,
    "decay": 0.2,
    "lr_step_size": 1000,
    "lr_gamma": 0.95,
    "max_steps": 200,
    "non_param": False,
}

# Linear Config
CONFIG_LINEAR = {
    "episode_length": 200,
    "max_timesteps": 20000,
    "max_time": 30 * 60,
    "eval_freq": 1000, 
    "eval_episodes": 10,
    "learning_rate": 0.02,
    "target_update_freq": 50,
    "batch_size": 32,
    "gamma": 0.99,
    "buffer_capacity": int(1e7),
    "plot_loss": False,
    "epsilon": 1,
    "max_steps": 200,
    "poly_degree": 2,
    "max_deduct": 0.97,
    "decay": 0.5,
    "lr_step_size": 1000,
    "lr_gamma": 0.99,
    "non_param": False,
}

# Decision Tree Config
CONFIG_DT = {
    "episode_length": 200,
    "max_timesteps": 20000,
    "max_time": 30 * 60,
    "eval_freq": 1000, 
    "eval_episodes": 10,
    "model_save_freq": 1000,
    "model_save_capacity": 20,
    "update_freq": 1,
    "batch_size": 512,
    "gamma": 0.99,
    "buffer_capacity": int(1e6),
    "epsilon": 1,
    "max_deduct": 0.95,
    "decay": 0.3,
    "max_steps": 200,
    "non_param": True,
    "model_params": {"criterion":"mse","max_depth": 8, "min_samples_split": 20, "min_samples_leaf": 5},
    "feature_names": ["Cart Position", "Cart Velocity", "Pole Angle", "Pole Angular Velocity", "Action: Push Left", "Action: Push Right"],
    "plot_name": "dt_depth=8",
}

# Random Forest Config
CONFIG_RF = {
    "episode_length": 200,
    "max_timesteps": 20000,
    "max_time": 30 * 60,
    "eval_freq": 1000, 
    "eval_episodes": 5,
    "model_save_freq": 1000,
    "model_save_capacity": 20,
    "update_freq": 1,
    "batch_size": 512,
    "gamma": 0.99,
    "buffer_capacity": int(1e6),
    "epsilon": 1,
    "max_deduct": 0.95,
    "decay": 0.3,
    "max_steps": 200,
    "non_param": True,
    "model_params": {"n_estimators": 10,"max_depth": 10, "min_samples_split": 20, "min_samples_leaf": 5},
}

# Extra Trees Config
CONFIG_ET = {
    "episode_length": 200,
    "max_timesteps": 20000,
    "max_time": 30 * 60,
    "eval_freq": 1000, 
    "eval_episodes": 5,
    "model_save_freq": 1000,
    "model_save_capacity": 20,
    "update_freq": 5,
    "batch_size": 512,
    "gamma": 0.99,
    "buffer_capacity": int(1e6),
    "epsilon": 1,
    "max_deduct": 0.95,
    "decay": 0.3,
    "max_steps": 200,
    "non_param": True,
    "model_params": {"n_estimators": 10, "max_depth": 10, "min_samples_split": 20, "min_samples_leaf": 5},
}

# Gradient Boosting Trees Config
CONFIG_GBT = {
    "episode_length": 200,
    "max_timesteps": 20000,
    "max_time": 30 * 60,
    "eval_freq": 1000, 
    "eval_episodes": 10,
    "model_save_freq": 1000,
    "model_save_capacity": 20,
    "update_freq": 1,
    "batch_size": 512,
    "gamma": 0.99,
    "buffer_capacity": int(1e5),
    "epsilon": 1,
    "max_deduct": 0.95,
    "decay": 0.3,
    "max_steps": 200,
    "non_param": True,
    "model_params": {"loss":"ls","learning_rate":0.1, "n_estimators": 10,"max_depth": 10, "min_samples_split": 10, "min_samples_leaf": 10},
}

# Support Vector Regressor Config
CONFIG_SVR = {
    "episode_length": 200,
    "max_timesteps": 20000,
    "max_time": 30 * 60,
    "eval_freq": 1000, 
    "eval_episodes": 5,
    "model_save_freq": 1000,
    "model_save_capacity": 20,
    "update_freq": 1,
    "batch_size": 512,
    "gamma": 0.99,
    "buffer_capacity": int(1e6),
    "epsilon": 1,
    "max_deduct": 0.95,
    "decay": 0.3,
    "max_steps": 200,
    "non_param": True,
    "model_params": {"kernel":"rbf", "degree": 2, "C": 1.5},
}


# K-Neighbors Regressor Config
CONFIG_KNR = {
    "episode_length": 200,
    "max_timesteps": 20000,
    "max_time": 30 * 60,
    "eval_freq": 1000, 
    "eval_episodes": 5,
    "model_save_freq": 2000,
    "model_save_capacity": 10,
    "update_freq": 10,
    "batch_size": 512,
    "gamma": 0.99,
    "buffer_capacity": int(1e6),
    "epsilon": 1,
    "max_deduct": 0.95,
    "decay": 0.3,
    "max_steps": 200,
    "non_param": True,
    "model_params": {"n_neighbors":5, "weights": "uniform", "algorithm": "auto", "leaf_size": 30},
}

# Gaussian Process Config
CONFIG_GP = {
    "episode_length": 200,
    "max_timesteps": 20000,
    "max_time": 30 * 60,
    "eval_freq": 1000, 
    "eval_episodes": 5,
    "model_save_freq": 2000,
    "model_save_capacity": 10,
    "update_freq": 10,
    "batch_size": 512,
    "gamma": 0.99,
    "buffer_capacity": int(1e6),
    "epsilon": 1,
    "max_deduct": 0.95,
    "decay": 0.4,
    "max_steps": 200,
    "non_param": True,
    "model_params": {"alpha": 1e-10, "normalize_y": False, "kernel":  RBF(length_scale=0.05, length_scale_bounds="fixed")},
}

# Gaussian Process Config
CONFIG_eGP = {
    "episode_length": 200,
    "max_timesteps": 20000,
    "max_time": 30 * 60,
    "eval_freq": 1000, 
    "eval_episodes": 5,
    "model_save_freq": 2000,
    "model_save_capacity": 10,
    "update_freq": 10,
    "batch_size": 512,
    "gamma": 0.99,
    "buffer_capacity": int(1e6),
    "epsilon": 1,
    "max_deduct": 0,
    "decay": 0.4,
    "max_steps": 200,
    "non_param": True,
    "model_params": {"alpha": 1e-10, "normalize_y": False, "kernel":  RBF(length_scale=0.05, length_scale_bounds="fixed")},
}

CONFIG_GP_Online = {
    "episode_length": 200,
    "max_timesteps": 20000,
    "max_time": 30 * 60,
    "eval_freq": 1000, 
    "eval_episodes": 5,
    "gamma": 0.99,
    "buffer_capacity": int(1e6),
    "batch_size": 32,
    "epsilon": 1,
    "max_deduct": 0.95,
    "decay": 0.4,
    "max_steps": 200,
    "non_param": True,
    "model_params": {"sigma_0": 0.5, "kernel":  rbf_kernel, "epsilon_tol": 0.05, "basis_limit": 1000},
}


In [4]:
returns = []
n_seeds=5

j=10
for i in range(n_seeds):
    print(f"\n Run: {i+1} \n")
    r, _ = train(env, 
            CONFIG_GP_Online, 
            fa=function_approximators[j], 
            agent = agents[j], 
            render=RENDER,
            online=True)
    env.close()
    returns.append(r)

  0%|          | 0/20000 [00:00<?, ?it/s]
 Run: 1 

  5%|▌         | 1000/20000 [00:09<02:40, 118.27it/s]Evaluation at timestep 1000 returned a mean returns of -200.0
Epsilon = 0.905
Support Points = 708
 10%|█         | 2000/20000 [00:17<02:08, 139.94it/s]Evaluation at timestep 2000 returned a mean returns of -200.0
Epsilon = 0.78625
Support Points = 723
 15%|█▌        | 3000/20000 [00:25<02:16, 124.76it/s]Evaluation at timestep 3000 returned a mean returns of -200.0
Epsilon = 0.6675
Support Points = 776
 20%|██        | 4000/20000 [00:36<02:44, 97.19it/s]Evaluation at timestep 4000 returned a mean returns of -200.0
Epsilon = 0.5487500000000001
Support Points = 873
 25%|██▌       | 5000/20000 [00:50<03:04, 81.38it/s]Evaluation at timestep 5000 returned a mean returns of -200.0
Epsilon = 0.43000000000000005
Support Points = 874
 30%|███       | 6000/20000 [01:06<03:29, 66.80it/s]Evaluation at timestep 6000 returned a mean returns of -200.0
Epsilon = 0.31125
Support Points = 878
 35%|██

KeyboardInterrupt: 

In [10]:
returns_all = []
mean = [np.mean(returns, axis=0)]
std = [np.std(returns, axis=0)]
print(mean[-1])
print(std[-1])


[ 73.44 178.12 186.92 187.84 190.12 194.8  199.12 197.68 198.16 198.92
 198.2  198.88 199.92 199.24 198.32 199.04 198.52 197.88 196.8  198.  ]
[64.94889068 38.40574957 25.76186329 14.22471089 15.20926034  7.8291762
  1.76        4.64        3.38797875  1.55743379  2.27683992  2.14140141
  0.09797959  0.94994737  2.96540048  1.92        2.96        3.10766794
  3.44789791  3.80315658]


In [11]:
with open(f'cartpole_Gaussian Process (Expl).csv', 'ab') as f:
    np.savetxt(f, mean, delimiter=',')
    np.savetxt(f, std, delimiter=',')
        

In [4]:
n_eps = []
n_steps = []
not_solved = []
n_seeds=50

j=7
for i in range(n_seeds):
    print(f"\n Run: {i+1} \n")
    s, e, n = solve(env, 
            CONFIG_KNR, 
            fa=function_approximators[j], 
            agent = agents[j],
            target_return=195,
            op=operator.ge, 
            render=RENDER)
    env.close()
    n_eps.append(e)
    n_steps.append(s)
    not_solved.append(n)


 Run: 1 

Ep. timesteps: 200
Total timesteps: 4557
Total episodes: 135
Evaluation mean return: 200.0

 Run: 2 

Ep. timesteps: 200
Total timesteps: 13216
Total episodes: 201
Evaluation mean return: 195.4

 Run: 3 


 Run: 4 

Ep. timesteps: 200
Total timesteps: 880
Total episodes: 39
Evaluation mean return: 197.8

 Run: 5 


 Run: 6 

Ep. timesteps: 200
Total timesteps: 6592
Total episodes: 172
Evaluation mean return: 200.0

 Run: 7 

Ep. timesteps: 180
Total timesteps: 5284
Total episodes: 168
Evaluation mean return: 196.0

 Run: 8 



KeyboardInterrupt: 

In [7]:
mean_eps = np.mean(n_eps)
std_eps = np.std(n_eps)
print(f"Average n_eps: {mean_eps}")
print(f"Std n_eps: {std_eps}")
print(f"St.error n_eps: {std_eps/np.sqrt(n_seeds)}")

mean_steps = np.mean(n_steps)
std_steps = np.std(n_steps)
print(f"Average n_steps: {mean_steps}")
print(f"Std n_steps: {std_steps}")
print(f"St.error n_steps: {std_steps/np.sqrt(n_seeds)}")

print(f"Not solved: {np.sum(not_solved)} runs")

Average n_eps: 29.48
St.error n_eps: 0.70021139665104
Std n_eps7.0021139665104
Average n_steps: 682.0
St.error n_steps: 18.233299207768187
Std n_steps: 182.33299207768187
Not solved: 0 runs
