In [1]:
import gym
import numpy as np
import torch as th
import inventory_model
import pandas as pd
from evaluate import *
from ppo_evaluate import ppo_evaluate
import matplotlib.pyplot as plt


from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.env_util import make_vec_env

In [2]:
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [3]:
#wrapper for cont env with PPO, plot result for several steps
def ppo_eval_interval(p, L, t_t, n_iter, n_step, gae, learning_rate=0.0003):
    ContCONFIG = {'h': 1, 'p': p, 'L': L, 'lambda': 1, 'action': 400}
    PolicyCONFIG = dict(activation_fn=th.nn.Tanh,
                     net_arch=[dict(pi=[64,64], vf=[64, 64])])
    cont_env = make_vec_env('inventory_cont_disc_action_config_fix_model-v0', n_envs=4, env_kwargs=ContCONFIG)
    print("Running PPO w/: p=", p, ", L=",L)
    cont_model = PPO(MlpPolicy, cont_env, verbose=1, gamma = 1, gae_lambda=gae, n_epochs = 16,
                     learning_rate = learning_rate,use_sde = False, n_steps = n_step, policy_kwargs = PolicyCONFIG)
    env_eval = make_vec_env('inventory_cont_disc_action_config_fix_model-v0', n_envs=1, env_kwargs=ContCONFIG)
    timesteps = 0
    numiter = n_iter#test
    res_mean_arr = []
    res_std_arr = []

    while(timesteps <= t_t):

        cont_model.learn(total_timesteps=2*4*n_step-1)#each iteration has 8192 timesteps with n_env=4
        timesteps = timesteps + 2*4*n_step

        res_mean, res_std = ppo_evaluate(cont_model, env_eval, numiter)
        res_mean_arr.append(-res_mean)
        res_std_arr.append(res_std)
        
        if -res_mean == min(res_mean_arr):
            cont_model.save("ppo_min_disc_model_"+str(p)+"_"+str(L))
        
#         print(res_mean_arr)
    
    plt.plot(res_mean_arr)
    plt.xlabel("Iteration")
    plt.ylabel("Average cost")
    plt.title("L="+str(L)+", p="+str(p))
    plt.yscale('log')
    plt.show()
    
    min_model = PPO.load("ppo_min_disc_model_"+str(p)+"_"+str(L))
    mean_min, std_min = ppo_evaluate(min_model, env_eval, 50000)
    print("p="+str(p)+"， L="+str(L)+": mean "+str(-mean_min)+", std_dev: "+str(std_min))
    
    return res_mean_arr, res_std_arr, mean_min, std_min

In [None]:
# listp = [0.25,1,4,9,39,99]
listp = [4,9,39,99]
# listL = [1,4,70,100]
# listp = [99]
listL = [1]
# t_t = 100000
n_iter = 500
gae_lambda = 0.95
# n_step = 8192
learning_rate = 0.0003
ppo_res = pd.DataFrame(columns = ['p','L','res_mean', 'res_std'])

for p in listp:
    for L in listL:
        n_step = 32*L 
        t_t = 1200*4*n_step
        res_mean, res_std, mean_min, std_min = ppo_eval_interval(p,L,t_t, n_iter, n_step, gae_lambda, learning_rate)
        ppo_res = ppo_res.append({'p': p, 'L':L, 'res_mean':-mean_min, 'res_std': std_min}, ignore_index=True)

Running PPO w/: p= 4 , L= 1
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 6493 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 128  |
-----------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 1802          |
|    iterations           | 2             |
|    time_elapsed         | 0             |
|    total_timesteps      | 256           |
| train/                  |               |
|    approx_kl            | 0.00019500032 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.99         |
|    explained_variance   | 0.00131       |
|    learning_rate        | 0.0003        |
|    loss                 | 3.31e+04      |
|    n_updates            | 16            |
|    policy_gradient_loss | -0.0194       |
|    value_loss           | 6.35e+04     

-------------------------------------------
| time/                   |               |
|    fps                  | 1892          |
|    iterations           | 2             |
|    time_elapsed         | 0             |
|    total_timesteps      | 256           |
| train/                  |               |
|    approx_kl            | -0.0022371933 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.99         |
|    explained_variance   | -0.00241      |
|    learning_rate        | 0.0003        |
|    loss                 | 9.2e+03       |
|    n_updates            | 176           |
|    policy_gradient_loss | -0.0243       |
|    value_loss           | 1.83e+04      |
-------------------------------------------
mean:  -244.10943498470783
standard deviation: 20.883430691327643
------------------------------------------
| time/                   |              |
|    fps                  | 7209         |
|    iteratio

mean:  -246.67077899101076
standard deviation: 23.9783492886509
-------------------------------------------
| time/                   |               |
|    fps                  | 7554          |
|    iterations           | 1             |
|    time_elapsed         | 0             |
|    total_timesteps      | 128           |
| train/                  |               |
|    approx_kl            | -0.0008751154 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.98         |
|    explained_variance   | 9.54e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 1.69e+05      |
|    n_updates            | 352           |
|    policy_gradient_loss | -0.00802      |
|    value_loss           | 3.46e+05      |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 1678          |
|    iterati

-------------------------------------------
| time/                   |               |
|    fps                  | 1727          |
|    iterations           | 2             |
|    time_elapsed         | 0             |
|    total_timesteps      | 256           |
| train/                  |               |
|    approx_kl            | -0.0032295175 |
|    clip_fraction        | 0.000977      |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.98         |
|    explained_variance   | 0.00224       |
|    learning_rate        | 0.0003        |
|    loss                 | 2.63e+04      |
|    n_updates            | 528           |
|    policy_gradient_loss | -0.0177       |
|    value_loss           | 4.98e+04      |
-------------------------------------------
mean:  -241.5909350435674
standard deviation: 6.102009588317591
-------------------------------------------
| time/                   |               |
|    fps                  | 6589          |
|    iterati

-------------------------------------------
| time/                   |               |
|    fps                  | 1919          |
|    iterations           | 2             |
|    time_elapsed         | 0             |
|    total_timesteps      | 256           |
| train/                  |               |
|    approx_kl            | -0.0028403737 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.98         |
|    explained_variance   | 0.00149       |
|    learning_rate        | 0.0003        |
|    loss                 | 2.52e+04      |
|    n_updates            | 688           |
|    policy_gradient_loss | -0.0177       |
|    value_loss           | 4.93e+04      |
-------------------------------------------
mean:  -237.49250622695982
standard deviation: 19.366080094555432
------------------------------------------
| time/                   |              |
|    fps                  | 7395         |
|    iteratio

--------------------------------------------
| time/                   |                |
|    fps                  | 1954           |
|    iterations           | 2              |
|    time_elapsed         | 0              |
|    total_timesteps      | 256            |
| train/                  |                |
|    approx_kl            | -0.00050519034 |
|    clip_fraction        | 0.00195        |
|    clip_range           | 0.2            |
|    entropy_loss         | -5.98          |
|    explained_variance   | 0.000526       |
|    learning_rate        | 0.0003         |
|    loss                 | 2.53e+04       |
|    n_updates            | 848            |
|    policy_gradient_loss | -0.0191        |
|    value_loss           | 4.86e+04       |
--------------------------------------------
mean:  -247.93660285879372
standard deviation: 16.47833319319099
-------------------------------------------
| time/                   |               |
|    fps                  | 6733     

--------------------------------------------
| time/                   |                |
|    fps                  | 1834           |
|    iterations           | 2              |
|    time_elapsed         | 0              |
|    total_timesteps      | 256            |
| train/                  |                |
|    approx_kl            | -0.00015964732 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    entropy_loss         | -5.97          |
|    explained_variance   | 0.000133       |
|    learning_rate        | 0.0003         |
|    loss                 | 2.87e+04       |
|    n_updates            | 1008           |
|    policy_gradient_loss | -0.0199        |
|    value_loss           | 5.36e+04       |
--------------------------------------------
mean:  -213.1023174061317
standard deviation: 10.357006647695254
--------------------------------------------
| time/                   |                |
|    fps                  | 8084   

------------------------------------------
| time/                   |              |
|    fps                  | 1905         |
|    iterations           | 2            |
|    time_elapsed         | 0            |
|    total_timesteps      | 256          |
| train/                  |              |
|    approx_kl            | 0.0017058589 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.97        |
|    explained_variance   | 0.00032      |
|    learning_rate        | 0.0003       |
|    loss                 | 2.9e+04      |
|    n_updates            | 1168         |
|    policy_gradient_loss | -0.0212      |
|    value_loss           | 5.31e+04     |
------------------------------------------
mean:  -233.1648508446753
standard deviation: 20.447829925041802
-------------------------------------------
| time/                   |               |
|    fps                  | 7105          |
|    iterations           | 1

-------------------------------------------
| time/                   |               |
|    fps                  | 1736          |
|    iterations           | 2             |
|    time_elapsed         | 0             |
|    total_timesteps      | 256           |
| train/                  |               |
|    approx_kl            | -0.0021816045 |
|    clip_fraction        | 0.00195       |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.96         |
|    explained_variance   | -1.88e-05     |
|    learning_rate        | 0.0003        |
|    loss                 | 2.49e+04      |
|    n_updates            | 1328          |
|    policy_gradient_loss | -0.0212       |
|    value_loss           | 4.53e+04      |
-------------------------------------------
mean:  -205.97585740362405
standard deviation: 13.351427009108079
-------------------------------------------
| time/                   |               |
|    fps                  | 8487          |
|    itera

-------------------------------------------
| time/                   |               |
|    fps                  | 1868          |
|    iterations           | 2             |
|    time_elapsed         | 0             |
|    total_timesteps      | 256           |
| train/                  |               |
|    approx_kl            | -0.0015933774 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.95         |
|    explained_variance   | -1.14e-05     |
|    learning_rate        | 0.0003        |
|    loss                 | 2.11e+04      |
|    n_updates            | 1488          |
|    policy_gradient_loss | -0.0204       |
|    value_loss           | 5e+04         |
-------------------------------------------
mean:  -205.9389004246801
standard deviation: 14.252079544061019
------------------------------------------
| time/                   |              |
|    fps                  | 8522         |
|    iteration

mean:  -214.8859018131137
standard deviation: 18.487617723270507
------------------------------------------
| time/                   |              |
|    fps                  | 7398         |
|    iterations           | 1            |
|    time_elapsed         | 0            |
|    total_timesteps      | 128          |
| train/                  |              |
|    approx_kl            | 0.0021486245 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.93        |
|    explained_variance   | 5.96e-08     |
|    learning_rate        | 0.0003       |
|    loss                 | 7.61e+04     |
|    n_updates            | 1664         |
|    policy_gradient_loss | -0.0114      |
|    value_loss           | 1.76e+05     |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1940        |
|    iterations           | 2      

----------------------------------------
| time/                   |            |
|    fps                  | 1900       |
|    iterations           | 2          |
|    time_elapsed         | 0          |
|    total_timesteps      | 256        |
| train/                  |            |
|    approx_kl            | 0.00650391 |
|    clip_fraction        | 0          |
|    clip_range           | 0.2        |
|    entropy_loss         | -5.94      |
|    explained_variance   | 4.34e-05   |
|    learning_rate        | 0.0003     |
|    loss                 | 2.3e+04    |
|    n_updates            | 1840       |
|    policy_gradient_loss | -0.0196    |
|    value_loss           | 4.4e+04    |
----------------------------------------
mean:  -193.4971506940007
standard deviation: 25.04779774280585
-------------------------------------------
| time/                   |               |
|    fps                  | 7314          |
|    iterations           | 1             |
|    time_elapsed     

mean:  -199.02682092005907
standard deviation: 21.059687911841262
-----------------------------------------
| time/                   |             |
|    fps                  | 7409        |
|    iterations           | 1           |
|    time_elapsed         | 0           |
|    total_timesteps      | 128         |
| train/                  |             |
|    approx_kl            | 0.002629578 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.9        |
|    explained_variance   | -2.38e-07   |
|    learning_rate        | 0.0003      |
|    loss                 | 1.31e+05    |
|    n_updates            | 2016        |
|    policy_gradient_loss | -0.011      |
|    value_loss           | 2.49e+05    |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1816         |
|    iterations           | 2            |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 1878         |
|    iterations           | 2            |
|    time_elapsed         | 0            |
|    total_timesteps      | 256          |
| train/                  |              |
|    approx_kl            | -0.004646413 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.91        |
|    explained_variance   | 4.99e-05     |
|    learning_rate        | 0.0003       |
|    loss                 | 1.51e+04     |
|    n_updates            | 2192         |
|    policy_gradient_loss | -0.0266      |
|    value_loss           | 3.09e+04     |
------------------------------------------
mean:  -171.23223453786073
standard deviation: 17.714837679752065
-------------------------------------------
| time/                   |               |
|    fps                  | 8660          |
|    iterations           | 

mean:  -200.8096678934537
standard deviation: 8.911594620158944
--------------------------------------------
| time/                   |                |
|    fps                  | 7579           |
|    iterations           | 1              |
|    time_elapsed         | 0              |
|    total_timesteps      | 128            |
| train/                  |                |
|    approx_kl            | -0.00048055127 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    entropy_loss         | -5.86          |
|    explained_variance   | 0              |
|    learning_rate        | 0.0003         |
|    loss                 | 1.14e+05       |
|    n_updates            | 2368           |
|    policy_gradient_loss | -0.0133        |
|    value_loss           | 2.08e+05       |
--------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1820        

mean:  -171.68550564328433
standard deviation: 10.55582107331277
------------------------------------------
| time/                   |              |
|    fps                  | 7989         |
|    iterations           | 1            |
|    time_elapsed         | 0            |
|    total_timesteps      | 128          |
| train/                  |              |
|    approx_kl            | 0.0007448308 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.82        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 8.32e+04     |
|    n_updates            | 2528         |
|    policy_gradient_loss | -0.0124      |
|    value_loss           | 1.85e+05     |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1725         |
|    iterations           | 2   

-------------------------------------------
| time/                   |               |
|    fps                  | 1884          |
|    iterations           | 2             |
|    time_elapsed         | 0             |
|    total_timesteps      | 256           |
| train/                  |               |
|    approx_kl            | -0.0015305821 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.83         |
|    explained_variance   | 2.56e-06      |
|    learning_rate        | 0.0003        |
|    loss                 | 1.85e+04      |
|    n_updates            | 2704          |
|    policy_gradient_loss | -0.0231       |
|    value_loss           | 4.42e+04      |
-------------------------------------------
mean:  -152.8461335518718
standard deviation: 17.374323209509498
------------------------------------------
| time/                   |              |
|    fps                  | 8246         |
|    iteration

mean:  -137.40048108471038
standard deviation: 12.596023803265545
------------------------------------------
| time/                   |              |
|    fps                  | 7246         |
|    iterations           | 1            |
|    time_elapsed         | 0            |
|    total_timesteps      | 128          |
| train/                  |              |
|    approx_kl            | 0.0023747943 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.66        |
|    explained_variance   | 5.96e-08     |
|    learning_rate        | 0.0003       |
|    loss                 | 6.33e+04     |
|    n_updates            | 2880         |
|    policy_gradient_loss | -0.02        |
|    value_loss           | 1.27e+05     |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 1906          |
|    iterations           | 

-----------------------------------------
| time/                   |             |
|    fps                  | 1909        |
|    iterations           | 2           |
|    time_elapsed         | 0           |
|    total_timesteps      | 256         |
| train/                  |             |
|    approx_kl            | 0.002162328 |
|    clip_fraction        | 0.000488    |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.79       |
|    explained_variance   | 1.91e-06    |
|    learning_rate        | 0.0003      |
|    loss                 | 1.05e+04    |
|    n_updates            | 3056        |
|    policy_gradient_loss | -0.0275     |
|    value_loss           | 2.01e+04    |
-----------------------------------------
mean:  -126.34056690486668
standard deviation: 11.490278491220911
------------------------------------------
| time/                   |              |
|    fps                  | 8371         |
|    iterations           | 1            |
|    t

mean:  -122.71197513267994
standard deviation: 9.774978293736563
------------------------------------------
| time/                   |              |
|    fps                  | 7504         |
|    iterations           | 1            |
|    time_elapsed         | 0            |
|    total_timesteps      | 128          |
| train/                  |              |
|    approx_kl            | -0.002161689 |
|    clip_fraction        | 0.0112       |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.67        |
|    explained_variance   | 1.79e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 5.49e+04     |
|    n_updates            | 3232         |
|    policy_gradient_loss | -0.0231      |
|    value_loss           | 1.19e+05     |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1840         |
|    iterations           | 2   

------------------------------------------
| time/                   |              |
|    fps                  | 1964         |
|    iterations           | 2            |
|    time_elapsed         | 0            |
|    total_timesteps      | 256          |
| train/                  |              |
|    approx_kl            | -0.003364779 |
|    clip_fraction        | 0.00342      |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.68        |
|    explained_variance   | -3.7e-06     |
|    learning_rate        | 0.0003       |
|    loss                 | 1.65e+04     |
|    n_updates            | 3408         |
|    policy_gradient_loss | -0.0275      |
|    value_loss           | 3.26e+04     |
------------------------------------------
mean:  -108.56531589568854
standard deviation: 3.554052977186678
-------------------------------------------
| time/                   |               |
|    fps                  | 7107          |
|    iterations           | 1

mean:  -84.88574510669316
standard deviation: 23.053297194478148
-------------------------------------------
| time/                   |               |
|    fps                  | 7327          |
|    iterations           | 1             |
|    time_elapsed         | 0             |
|    total_timesteps      | 128           |
| train/                  |               |
|    approx_kl            | -0.0067501385 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.52         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 4.57e+04      |
|    n_updates            | 3584          |
|    policy_gradient_loss | -0.0207       |
|    value_loss           | 9.38e+04      |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1861         |
|    iteration

-------------------------------------------
| time/                   |               |
|    fps                  | 1884          |
|    iterations           | 2             |
|    time_elapsed         | 0             |
|    total_timesteps      | 256           |
| train/                  |               |
|    approx_kl            | -0.0047602467 |
|    clip_fraction        | 0.0894        |
|    clip_range           | 0.2           |
|    entropy_loss         | -5.55         |
|    explained_variance   | -5.48e-06     |
|    learning_rate        | 0.0003        |
|    loss                 | 5.17e+03      |
|    n_updates            | 3760          |
|    policy_gradient_loss | -0.0488       |
|    value_loss           | 1.3e+04       |
-------------------------------------------
mean:  -76.3168610932529
standard deviation: 12.212540958444315
---------------------------------------
| time/                   |           |
|    fps                  | 7184      |
|    iterations         

mean:  -15.929313342262617
standard deviation: 3.8039407017741866
-----------------------------------------
| time/                   |             |
|    fps                  | 8021        |
|    iterations           | 1           |
|    time_elapsed         | 0           |
|    total_timesteps      | 128         |
| train/                  |             |
|    approx_kl            | 0.031397957 |
|    clip_fraction        | 0.0405      |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.8        |
|    explained_variance   | 1.19e-07    |
|    learning_rate        | 0.0003      |
|    loss                 | 8.78e+03    |
|    n_updates            | 3936        |
|    policy_gradient_loss | -0.0392     |
|    value_loss           | 2.38e+04    |
-----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 1939          |
|    iterations           | 2             |
| 

-----------------------------------------
| time/                   |             |
|    fps                  | 1884        |
|    iterations           | 2           |
|    time_elapsed         | 0           |
|    total_timesteps      | 256         |
| train/                  |             |
|    approx_kl            | 0.022483598 |
|    clip_fraction        | 0.143       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.72       |
|    explained_variance   | 1.25e-06    |
|    learning_rate        | 0.0003      |
|    loss                 | 1.75e+03    |
|    n_updates            | 4112        |
|    policy_gradient_loss | -0.0533     |
|    value_loss           | 3.32e+03    |
-----------------------------------------
mean:  -5.2328237613320585
standard deviation: 0.5849472902094209
-----------------------------------------
| time/                   |             |
|    fps                  | 8263        |
|    iterations           | 1           |
|    time_

mean:  -5.144777278729819
standard deviation: 0.7264647322322825
-----------------------------------------
| time/                   |             |
|    fps                  | 6793        |
|    iterations           | 1           |
|    time_elapsed         | 0           |
|    total_timesteps      | 128         |
| train/                  |             |
|    approx_kl            | 0.011669725 |
|    clip_fraction        | 0.141       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.79       |
|    explained_variance   | 2.5e-06     |
|    learning_rate        | 0.0003      |
|    loss                 | 1.02e+03    |
|    n_updates            | 4288        |
|    policy_gradient_loss | -0.0416     |
|    value_loss           | 2.14e+03    |
-----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 1900          |
|    iterations           | 2             |
|  

------------------------------------------
| time/                   |              |
|    fps                  | 1808         |
|    iterations           | 2            |
|    time_elapsed         | 0            |
|    total_timesteps      | 256          |
| train/                  |              |
|    approx_kl            | 0.0028776005 |
|    clip_fraction        | 0.21         |
|    clip_range           | 0.2          |
|    entropy_loss         | -4.46        |
|    explained_variance   | -1.74e-05    |
|    learning_rate        | 0.0003       |
|    loss                 | 430          |
|    n_updates            | 4464         |
|    policy_gradient_loss | -0.0598      |
|    value_loss           | 917          |
------------------------------------------
mean:  -2.9579454101040494
standard deviation: 0.19277723092539684
------------------------------------------
| time/                   |              |
|    fps                  | 6780         |
|    iterations           | 1 

mean:  -2.6845919023506344
standard deviation: 0.15561542733871458
-----------------------------------------
| time/                   |             |
|    fps                  | 7994        |
|    iterations           | 1           |
|    time_elapsed         | 0           |
|    total_timesteps      | 128         |
| train/                  |             |
|    approx_kl            | 0.020302033 |
|    clip_fraction        | 0.139       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.75       |
|    explained_variance   | -2.09e-05   |
|    learning_rate        | 0.0003      |
|    loss                 | 777         |
|    n_updates            | 4640        |
|    policy_gradient_loss | -0.0386     |
|    value_loss           | 1.4e+03     |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 1814       |
|    iterations           | 2          |
|    time_ela

-------------------------------------------
| time/                   |               |
|    fps                  | 1813          |
|    iterations           | 2             |
|    time_elapsed         | 0             |
|    total_timesteps      | 256           |
| train/                  |               |
|    approx_kl            | -0.0018339064 |
|    clip_fraction        | 0.0264        |
|    clip_range           | 0.2           |
|    entropy_loss         | -4.43         |
|    explained_variance   | -2.18e-05     |
|    learning_rate        | 0.0003        |
|    loss                 | 505           |
|    n_updates            | 4816          |
|    policy_gradient_loss | -0.0256       |
|    value_loss           | 1.04e+03      |
-------------------------------------------
mean:  -2.4592735613556407
standard deviation: 0.16312913943635607
------------------------------------------
| time/                   |              |
|    fps                  | 7511         |
|    iterati

mean:  -2.3296303945628694
standard deviation: 0.13566548316244245
-----------------------------------------
| time/                   |             |
|    fps                  | 5019        |
|    iterations           | 1           |
|    time_elapsed         | 0           |
|    total_timesteps      | 128         |
| train/                  |             |
|    approx_kl            | 0.022047587 |
|    clip_fraction        | 0.0688      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.88       |
|    explained_variance   | -8.34e-06   |
|    learning_rate        | 0.0003      |
|    loss                 | 374         |
|    n_updates            | 4992        |
|    policy_gradient_loss | -0.0272     |
|    value_loss           | 800         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1429        |
|    iterations           | 2           |
|    time

----------------------------------------
| time/                   |            |
|    fps                  | 1767       |
|    iterations           | 2          |
|    time_elapsed         | 0          |
|    total_timesteps      | 256        |
| train/                  |            |
|    approx_kl            | 0.00645994 |
|    clip_fraction        | 0.11       |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.04      |
|    explained_variance   | -4.05e-06  |
|    learning_rate        | 0.0003     |
|    loss                 | 459        |
|    n_updates            | 5168       |
|    policy_gradient_loss | -0.0281    |
|    value_loss           | 823        |
----------------------------------------
mean:  -2.3496781276592142
standard deviation: 0.11816282746030989
-----------------------------------------
| time/                   |             |
|    fps                  | 5824        |
|    iterations           | 1           |
|    time_elapsed         |

mean:  -2.352118303633574
standard deviation: 0.10164466891228727
-------------------------------------------
| time/                   |               |
|    fps                  | 7355          |
|    iterations           | 1             |
|    time_elapsed         | 0             |
|    total_timesteps      | 128           |
| train/                  |               |
|    approx_kl            | -0.0055353437 |
|    clip_fraction        | 0.0132        |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.96         |
|    explained_variance   | -1.43e-06     |
|    learning_rate        | 0.0003        |
|    loss                 | 272           |
|    n_updates            | 5344          |
|    policy_gradient_loss | -0.014        |
|    value_loss           | 535           |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1955        |
|    iterations 

------------------------------------------
| time/                   |              |
|    fps                  | 1732         |
|    iterations           | 2            |
|    time_elapsed         | 0            |
|    total_timesteps      | 256          |
| train/                  |              |
|    approx_kl            | -0.011130864 |
|    clip_fraction        | 0.0537       |
|    clip_range           | 0.2          |
|    entropy_loss         | -3.03        |
|    explained_variance   | -5.13e-06    |
|    learning_rate        | 0.0003       |
|    loss                 | 462          |
|    n_updates            | 5520         |
|    policy_gradient_loss | -0.0229      |
|    value_loss           | 945          |
------------------------------------------
mean:  -2.3520589964385494
standard deviation: 0.07590107309129265
------------------------------------------
| time/                   |              |
|    fps                  | 7426         |
|    iterations           | 1 

mean:  -2.2561768750761635
standard deviation: 0.09171884205940709
------------------------------------------
| time/                   |              |
|    fps                  | 7800         |
|    iterations           | 1            |
|    time_elapsed         | 0            |
|    total_timesteps      | 128          |
| train/                  |              |
|    approx_kl            | 0.0029759444 |
|    clip_fraction        | 0.042        |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.94        |
|    explained_variance   | -4.77e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 339          |
|    n_updates            | 5696         |
|    policy_gradient_loss | -0.0199      |
|    value_loss           | 602          |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1751        |
|    iterations           | 2    

------------------------------------------
| time/                   |              |
|    fps                  | 1835         |
|    iterations           | 2            |
|    time_elapsed         | 0            |
|    total_timesteps      | 256          |
| train/                  |              |
|    approx_kl            | 0.0043512527 |
|    clip_fraction        | 0.0381       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.52        |
|    explained_variance   | -4.77e-06    |
|    learning_rate        | 0.0003       |
|    loss                 | 351          |
|    n_updates            | 5872         |
|    policy_gradient_loss | -0.0235      |
|    value_loss           | 704          |
------------------------------------------
mean:  -2.3335376696665735
standard deviation: 0.0695553379658068
------------------------------------------
| time/                   |              |
|    fps                  | 5833         |
|    iterations           | 1  

mean:  -2.205784764196992
standard deviation: 0.07403835257742718
-----------------------------------------
| time/                   |             |
|    fps                  | 8347        |
|    iterations           | 1           |
|    time_elapsed         | 0           |
|    total_timesteps      | 128         |
| train/                  |             |
|    approx_kl            | 0.022225702 |
|    clip_fraction        | 0.0537      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.85       |
|    explained_variance   | -5.6e-06    |
|    learning_rate        | 0.0003      |
|    loss                 | 334         |
|    n_updates            | 6048        |
|    policy_gradient_loss | -0.0215     |
|    value_loss           | 633         |
-----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 1810          |
|    iterations           | 2             |
| 

-----------------------------------------
| time/                   |             |
|    fps                  | 1953        |
|    iterations           | 2           |
|    time_elapsed         | 0           |
|    total_timesteps      | 256         |
| train/                  |             |
|    approx_kl            | 0.006301863 |
|    clip_fraction        | 0.0273      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.66       |
|    explained_variance   | -3.22e-06   |
|    learning_rate        | 0.0003      |
|    loss                 | 370         |
|    n_updates            | 6224        |
|    policy_gradient_loss | -0.0153     |
|    value_loss           | 822         |
-----------------------------------------
mean:  -2.2029012062232005
standard deviation: 0.07432423088273805
-------------------------------------------
| time/                   |               |
|    fps                  | 7987          |
|    iterations           | 1             |
|

mean:  -2.227903057606239
standard deviation: 0.10227120611531959
-------------------------------------------
| time/                   |               |
|    fps                  | 7331          |
|    iterations           | 1             |
|    time_elapsed         | 0             |
|    total_timesteps      | 128           |
| train/                  |               |
|    approx_kl            | -0.0045167655 |
|    clip_fraction        | 0.0127        |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.55         |
|    explained_variance   | -1.55e-06     |
|    learning_rate        | 0.0003        |
|    loss                 | 248           |
|    n_updates            | 6400          |
|    policy_gradient_loss | -0.0116       |
|    value_loss           | 520           |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1796        |
|    iterations 

-----------------------------------------
| time/                   |             |
|    fps                  | 1811        |
|    iterations           | 2           |
|    time_elapsed         | 0           |
|    total_timesteps      | 256         |
| train/                  |             |
|    approx_kl            | 0.017198853 |
|    clip_fraction        | 0.0273      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.78       |
|    explained_variance   | -2.62e-06   |
|    learning_rate        | 0.0003      |
|    loss                 | 345         |
|    n_updates            | 6576        |
|    policy_gradient_loss | -0.0185     |
|    value_loss           | 762         |
-----------------------------------------
mean:  -2.169446729545461
standard deviation: 0.10285726814401337
-----------------------------------------
| time/                   |             |
|    fps                  | 7261        |
|    iterations           | 1           |
|    time_

mean:  -2.26515999440779
standard deviation: 0.11289111104277641
------------------------------------------
| time/                   |              |
|    fps                  | 7176         |
|    iterations           | 1            |
|    time_elapsed         | 0            |
|    total_timesteps      | 128          |
| train/                  |              |
|    approx_kl            | 0.0026369486 |
|    clip_fraction        | 0.0186       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.2         |
|    explained_variance   | 1.01e-06     |
|    learning_rate        | 0.0003       |
|    loss                 | 321          |
|    n_updates            | 6752         |
|    policy_gradient_loss | -0.0149      |
|    value_loss           | 628          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1827         |
|    iterations           | 2   

----------------------------------------
| time/                   |            |
|    fps                  | 1875       |
|    iterations           | 2          |
|    time_elapsed         | 0          |
|    total_timesteps      | 256        |
| train/                  |            |
|    approx_kl            | 0.02890752 |
|    clip_fraction        | 0.0967     |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.26      |
|    explained_variance   | -3.34e-06  |
|    learning_rate        | 0.0003     |
|    loss                 | 473        |
|    n_updates            | 6928       |
|    policy_gradient_loss | -0.0189    |
|    value_loss           | 888        |
----------------------------------------
mean:  -2.21902361264894
standard deviation: 0.0623175148369886
------------------------------------------
| time/                   |              |
|    fps                  | 6698         |
|    iterations           | 1            |
|    time_elapsed         

mean:  -2.2133784155451806
standard deviation: 0.04965490966222607
-----------------------------------------
| time/                   |             |
|    fps                  | 7313        |
|    iterations           | 1           |
|    time_elapsed         | 0           |
|    total_timesteps      | 128         |
| train/                  |             |
|    approx_kl            | 0.019478485 |
|    clip_fraction        | 0.082       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.25       |
|    explained_variance   | -1.31e-06   |
|    learning_rate        | 0.0003      |
|    loss                 | 430         |
|    n_updates            | 7104        |
|    policy_gradient_loss | -0.0193     |
|    value_loss           | 860         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1744        |
|    iterations           | 2           |
|    time

------------------------------------------
| time/                   |              |
|    fps                  | 1826         |
|    iterations           | 2            |
|    time_elapsed         | 0            |
|    total_timesteps      | 256          |
| train/                  |              |
|    approx_kl            | 0.0033714212 |
|    clip_fraction        | 0.0146       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.32        |
|    explained_variance   | -2.26e-06    |
|    learning_rate        | 0.0003       |
|    loss                 | 185          |
|    n_updates            | 7280         |
|    policy_gradient_loss | -0.015       |
|    value_loss           | 377          |
------------------------------------------
mean:  -2.246652659400925
standard deviation: 0.0720666563283262
-----------------------------------------
| time/                   |             |
|    fps                  | 7375        |
|    iterations           | 1      

mean:  -2.296119243934378
standard deviation: 0.09010265538517835
------------------------------------------
| time/                   |              |
|    fps                  | 7244         |
|    iterations           | 1            |
|    time_elapsed         | 0            |
|    total_timesteps      | 128          |
| train/                  |              |
|    approx_kl            | -0.004553305 |
|    clip_fraction        | 0.0449       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.19        |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 314          |
|    n_updates            | 7456         |
|    policy_gradient_loss | -0.0187      |
|    value_loss           | 618          |
------------------------------------------
--------------------------------------------
| time/                   |                |
|    fps                  | 1906           |
|    iterations          

-------------------------------------------
| time/                   |               |
|    fps                  | 1815          |
|    iterations           | 2             |
|    time_elapsed         | 0             |
|    total_timesteps      | 256           |
| train/                  |               |
|    approx_kl            | -0.0038661603 |
|    clip_fraction        | 0.0493        |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.91         |
|    explained_variance   | -1.31e-06     |
|    learning_rate        | 0.0003        |
|    loss                 | 465           |
|    n_updates            | 7632          |
|    policy_gradient_loss | -0.0226       |
|    value_loss           | 874           |
-------------------------------------------
mean:  -2.2352288460095413
standard deviation: 0.1072179343389503
-----------------------------------------
| time/                   |             |
|    fps                  | 7394        |
|    iterations 

mean:  -2.2811894310371486
standard deviation: 0.11307462491286137
------------------------------------------
| time/                   |              |
|    fps                  | 7321         |
|    iterations           | 1            |
|    time_elapsed         | 0            |
|    total_timesteps      | 128          |
| train/                  |              |
|    approx_kl            | 0.0053322166 |
|    clip_fraction        | 0.0933       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.51        |
|    explained_variance   | -1.79e-06    |
|    learning_rate        | 0.0003       |
|    loss                 | 401          |
|    n_updates            | 7808         |
|    policy_gradient_loss | -0.029       |
|    value_loss           | 731          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1822         |
|    iterations           | 2 

-------------------------------------------
| time/                   |               |
|    fps                  | 1800          |
|    iterations           | 2             |
|    time_elapsed         | 0             |
|    total_timesteps      | 256           |
| train/                  |               |
|    approx_kl            | -0.0023752134 |
|    clip_fraction        | 0.0527        |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.42         |
|    explained_variance   | -1.19e-06     |
|    learning_rate        | 0.0003        |
|    loss                 | 337           |
|    n_updates            | 7984          |
|    policy_gradient_loss | -0.0241       |
|    value_loss           | 626           |
-------------------------------------------
mean:  -2.251653433894669
standard deviation: 0.03913379555478033
------------------------------------------
| time/                   |              |
|    fps                  | 7120         |
|    iteratio

mean:  -2.286216167667945
standard deviation: 0.11049154349059262
-------------------------------------------
| time/                   |               |
|    fps                  | 7372          |
|    iterations           | 1             |
|    time_elapsed         | 0             |
|    total_timesteps      | 128           |
| train/                  |               |
|    approx_kl            | 0.00053903833 |
|    clip_fraction        | 0.0288        |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.34         |
|    explained_variance   | -9.54e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 318           |
|    n_updates            | 8160          |
|    policy_gradient_loss | -0.0218       |
|    value_loss           | 639           |
-------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 1708       |
|    iterations    

---------------------------------------
| time/                   |           |
|    fps                  | 1738      |
|    iterations           | 2         |
|    time_elapsed         | 0         |
|    total_timesteps      | 256       |
| train/                  |           |
|    approx_kl            | 0.0221385 |
|    clip_fraction        | 0.112     |
|    clip_range           | 0.2       |
|    entropy_loss         | -2.91     |
|    explained_variance   | -1.07e-06 |
|    learning_rate        | 0.0003    |
|    loss                 | 345       |
|    n_updates            | 8336      |
|    policy_gradient_loss | -0.0305   |
|    value_loss           | 755       |
---------------------------------------
mean:  -2.2175674328602852
standard deviation: 0.10729214777076843
-----------------------------------------
| time/                   |             |
|    fps                  | 7225        |
|    iterations           | 1           |
|    time_elapsed         | 0           |
|  

mean:  -2.159787892683595
standard deviation: 0.106825756204504
------------------------------------------
| time/                   |              |
|    fps                  | 7204         |
|    iterations           | 1            |
|    time_elapsed         | 0            |
|    total_timesteps      | 128          |
| train/                  |              |
|    approx_kl            | 0.0086155385 |
|    clip_fraction        | 0.0972       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.83        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 271          |
|    n_updates            | 8512         |
|    policy_gradient_loss | -0.0285      |
|    value_loss           | 540          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 1770          |
|    iterations           | 2 

----------------------------------------
| time/                   |            |
|    fps                  | 1772       |
|    iterations           | 2          |
|    time_elapsed         | 0          |
|    total_timesteps      | 256        |
| train/                  |            |
|    approx_kl            | 0.02436984 |
|    clip_fraction        | 0.0757     |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.73      |
|    explained_variance   | -2.98e-06  |
|    learning_rate        | 0.0003     |
|    loss                 | 310        |
|    n_updates            | 8688       |
|    policy_gradient_loss | -0.0274    |
|    value_loss           | 624        |
----------------------------------------
mean:  -2.255357710182224
standard deviation: 0.0924128894055764
----------------------------------------
| time/                   |            |
|    fps                  | 7313       |
|    iterations           | 1          |
|    time_elapsed         | 0    

In [None]:
ppo_res

In [None]:
plt.plot(res_mean)

## Below is obsolete.